Merge pull request #1443 from eikek/file-backends

File backends
This commit is contained in:
mergify[bot] 2022-03-13 14:44:32 +00:00 committed by GitHub
commit 7c85605687
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
165 changed files with 4044 additions and 1944 deletions

View File

@ -519,6 +519,29 @@ val pubsubNaive = project
) )
.dependsOn(common, pubsubApi, store % "compile->compile;test->test") .dependsOn(common, pubsubApi, store % "compile->compile;test->test")
val schedulerApi = project
.in(file("modules/scheduler/api"))
.disablePlugins(RevolverPlugin)
.settings(sharedSettings)
.withTestSettingsDependsOn(loggingScribe)
.settings(
name := "docspell-scheduler-api",
libraryDependencies ++=
Dependencies.fs2Core ++
Dependencies.circeCore
)
.dependsOn(loggingApi, common, pubsubApi)
val schedulerImpl = project
.in(file("modules/scheduler/impl"))
.disablePlugins(RevolverPlugin)
.settings(sharedSettings)
.withTestSettingsDependsOn(loggingScribe)
.settings(
name := "docspell-scheduler-impl"
)
.dependsOn(store, schedulerApi, notificationApi, pubsubApi)
val extract = project val extract = project
.in(file("modules/extract")) .in(file("modules/extract"))
.disablePlugins(RevolverPlugin) .disablePlugins(RevolverPlugin)
@ -641,7 +664,16 @@ val backend = project
Dependencies.http4sClient ++ Dependencies.http4sClient ++
Dependencies.emil Dependencies.emil
) )
.dependsOn(store, notificationApi, joexapi, ftsclient, totp, pubsubApi, loggingApi) .dependsOn(
store,
notificationApi,
joexapi,
ftsclient,
totp,
pubsubApi,
loggingApi,
schedulerApi
)
val oidc = project val oidc = project
.in(file("modules/oidc")) .in(file("modules/oidc"))
@ -732,7 +764,8 @@ val joex = project
restapi, restapi,
ftssolr, ftssolr,
pubsubNaive, pubsubNaive,
notificationImpl notificationImpl,
schedulerImpl
) )
val restserver = project val restserver = project
@ -804,7 +837,8 @@ val restserver = project
ftssolr, ftssolr,
oidc, oidc,
pubsubNaive, pubsubNaive,
notificationImpl notificationImpl,
schedulerImpl
) )
// --- Website Documentation // --- Website Documentation
@ -902,7 +936,9 @@ val root = project
pubsubApi, pubsubApi,
pubsubNaive, pubsubNaive,
notificationApi, notificationApi,
notificationImpl notificationImpl,
schedulerApi,
schedulerImpl
) )
// --- Helpers // --- Helpers

View File

@ -10,14 +10,13 @@ import cats.effect._
import docspell.backend.auth.Login import docspell.backend.auth.Login
import docspell.backend.fulltext.CreateIndex import docspell.backend.fulltext.CreateIndex
import docspell.backend.msg.JobQueuePublish
import docspell.backend.ops._ import docspell.backend.ops._
import docspell.backend.signup.OSignup import docspell.backend.signup.OSignup
import docspell.ftsclient.FtsClient import docspell.ftsclient.FtsClient
import docspell.notification.api.{EventExchange, NotificationModule} import docspell.notification.api.{EventExchange, NotificationModule}
import docspell.pubsub.api.PubSubT import docspell.pubsub.api.PubSubT
import docspell.scheduler.JobStoreModule
import docspell.store.Store import docspell.store.Store
import docspell.store.usertask.UserTaskStore
import docspell.totp.Totp import docspell.totp.Totp
import emil.Emil import emil.Emil
@ -50,6 +49,7 @@ trait BackendApp[F[_]] {
def events: EventExchange[F] def events: EventExchange[F]
def notification: ONotification[F] def notification: ONotification[F]
def bookmarks: OQueryBookmarks[F] def bookmarks: OQueryBookmarks[F]
def fileRepository: OFileRepository[F]
} }
object BackendApp { object BackendApp {
@ -59,29 +59,43 @@ object BackendApp {
javaEmil: Emil[F], javaEmil: Emil[F],
ftsClient: FtsClient[F], ftsClient: FtsClient[F],
pubSubT: PubSubT[F], pubSubT: PubSubT[F],
schedulerModule: JobStoreModule[F],
notificationMod: NotificationModule[F] notificationMod: NotificationModule[F]
): Resource[F, BackendApp[F]] = ): Resource[F, BackendApp[F]] =
for { for {
utStore <- UserTaskStore(store)
queue <- JobQueuePublish(store, pubSubT, notificationMod)
totpImpl <- OTotp(store, Totp.default) totpImpl <- OTotp(store, Totp.default)
loginImpl <- Login[F](store, Totp.default) loginImpl <- Login[F](store, Totp.default)
signupImpl <- OSignup[F](store) signupImpl <- OSignup[F](store)
joexImpl <- OJoex(pubSubT) joexImpl <- OJoex(pubSubT)
collImpl <- OCollective[F](store, utStore, queue, joexImpl) collImpl <- OCollective[F](
store,
schedulerModule.userTasks,
schedulerModule.jobs,
joexImpl
)
sourceImpl <- OSource[F](store) sourceImpl <- OSource[F](store)
tagImpl <- OTag[F](store) tagImpl <- OTag[F](store)
equipImpl <- OEquipment[F](store) equipImpl <- OEquipment[F](store)
orgImpl <- OOrganization(store) orgImpl <- OOrganization(store)
uploadImpl <- OUpload(store, queue, joexImpl) uploadImpl <- OUpload(store, schedulerModule.jobs, joexImpl)
nodeImpl <- ONode(store) nodeImpl <- ONode(store)
jobImpl <- OJob(store, joexImpl, pubSubT) jobImpl <- OJob(store, joexImpl, pubSubT)
createIndex <- CreateIndex.resource(ftsClient, store) createIndex <- CreateIndex.resource(ftsClient, store)
itemImpl <- OItem(store, ftsClient, createIndex, queue, joexImpl) itemImpl <- OItem(store, ftsClient, createIndex, schedulerModule.jobs, joexImpl)
itemSearchImpl <- OItemSearch(store) itemSearchImpl <- OItemSearch(store)
fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl) fulltextImpl <- OFulltext(
itemSearchImpl,
ftsClient,
store,
schedulerModule.jobs,
joexImpl
)
mailImpl <- OMail(store, javaEmil) mailImpl <- OMail(store, javaEmil)
userTaskImpl <- OUserTask(utStore, store, queue, joexImpl) userTaskImpl <- OUserTask(
schedulerModule.userTasks,
store,
joexImpl
)
folderImpl <- OFolder(store) folderImpl <- OFolder(store)
customFieldsImpl <- OCustomFields(store) customFieldsImpl <- OCustomFields(store)
simpleSearchImpl = OSimpleSearch(fulltextImpl, itemSearchImpl) simpleSearchImpl = OSimpleSearch(fulltextImpl, itemSearchImpl)
@ -91,6 +105,7 @@ object BackendApp {
) )
notifyImpl <- ONotification(store, notificationMod) notifyImpl <- ONotification(store, notificationMod)
bookmarksImpl <- OQueryBookmarks(store) bookmarksImpl <- OQueryBookmarks(store)
fileRepoImpl <- OFileRepository(store, schedulerModule.jobs, joexImpl)
} yield new BackendApp[F] { } yield new BackendApp[F] {
val pubSub = pubSubT val pubSub = pubSubT
val login = loginImpl val login = loginImpl
@ -118,5 +133,6 @@ object BackendApp {
val events = notificationMod val events = notificationMod
val notification = notifyImpl val notification = notifyImpl
val bookmarks = bookmarksImpl val bookmarks = bookmarksImpl
val fileRepository = fileRepoImpl
} }
} }

View File

@ -6,9 +6,13 @@
package docspell.backend package docspell.backend
import cats.data.{Validated, ValidatedNec}
import cats.implicits._
import docspell.backend.signup.{Config => SignupConfig} import docspell.backend.signup.{Config => SignupConfig}
import docspell.common._ import docspell.common._
import docspell.store.JdbcConfig import docspell.store.JdbcConfig
import docspell.store.file.FileRepositoryConfig
import emil.javamail.Settings import emil.javamail.Settings
@ -21,10 +25,45 @@ case class Config(
def mailSettings: Settings = def mailSettings: Settings =
Settings.defaultSettings.copy(debug = mailDebug) Settings.defaultSettings.copy(debug = mailDebug)
} }
object Config { object Config {
case class Files(chunkSize: Int, validMimeTypes: Seq[MimeType]) case class Files(
chunkSize: Int,
validMimeTypes: Seq[MimeType],
defaultStore: Ident,
stores: Map[Ident, FileStoreConfig]
) {
val enabledStores: Map[Ident, FileStoreConfig] =
stores.view.filter(_._2.enabled).toMap
def defaultStoreConfig: FileStoreConfig =
enabledStores(defaultStore)
def defaultFileRepositoryConfig: FileRepositoryConfig =
FileRepositoryConfig.fromFileStoreConfig(chunkSize, defaultStoreConfig)
def getFileRepositoryConfig(id: Ident): Option[FileRepositoryConfig] =
stores.get(id).map(FileRepositoryConfig.fromFileStoreConfig(chunkSize, _))
def validate: ValidatedNec[String, Files] = {
val storesEmpty =
if (enabledStores.isEmpty)
Validated.invalidNec(
"No file stores defined! Make sure at least one enabled store is present."
)
else Validated.validNec(())
val defaultStorePresent =
enabledStores.get(defaultStore) match {
case Some(_) => Validated.validNec(())
case None =>
Validated.invalidNec(s"Default file store not present: ${defaultStore.id}")
}
(storesEmpty |+| defaultStorePresent).map(_ => this)
}
}
} }

View File

@ -12,80 +12,88 @@ import cats.implicits._
import docspell.backend.MailAddressCodec import docspell.backend.MailAddressCodec
import docspell.common._ import docspell.common._
import docspell.notification.api.PeriodicQueryArgs import docspell.notification.api.PeriodicQueryArgs
import docspell.store.records.RJob import docspell.scheduler.Job
object JobFactory extends MailAddressCodec { object JobFactory extends MailAddressCodec {
def periodicQuery[F[_]: Sync](args: PeriodicQueryArgs, submitter: AccountId): F[RJob] = def integrityCheck[F[_]: Sync](
for { args: FileIntegrityCheckArgs,
id <- Ident.randomId[F] submitter: AccountId = DocspellSystem.account
now <- Timestamp.current[F] ): F[Job[FileIntegrityCheckArgs]] =
job = RJob.newJob( Job.createNew(
id, FileIntegrityCheckArgs.taskName,
PeriodicQueryArgs.taskName, submitter.collective,
submitter.collective, args,
args, s"Check integrity of files",
s"Running periodic query, notify via ${args.channels.map(_.channelType)}", submitter.user,
now, Priority.High,
submitter.user, Some(FileIntegrityCheckArgs.taskName)
Priority.Low, )
None
) def fileCopy[F[_]: Sync](
} yield job args: FileCopyTaskArgs,
submitter: AccountId = DocspellSystem.account
): F[Job[FileCopyTaskArgs]] =
Job.createNew(
FileCopyTaskArgs.taskName,
submitter.collective,
args,
"Copying all files",
submitter.user,
Priority.High,
Some(FileCopyTaskArgs.taskName)
)
def periodicQuery[F[_]: Sync](
args: PeriodicQueryArgs,
submitter: AccountId
): F[Job[PeriodicQueryArgs]] =
Job.createNew(
PeriodicQueryArgs.taskName,
submitter.collective,
args,
s"Running periodic query, notify via ${args.channels.map(_.channelType)}",
submitter.user,
Priority.Low,
None
)
def makePageCount[F[_]: Sync]( def makePageCount[F[_]: Sync](
args: MakePageCountArgs, args: MakePageCountArgs,
account: Option[AccountId] account: Option[AccountId]
): F[RJob] = ): F[Job[MakePageCountArgs]] =
for { Job.createNew(
id <- Ident.randomId[F] MakePageCountArgs.taskName,
now <- Timestamp.current[F] account.map(_.collective).getOrElse(DocspellSystem.taskGroup),
job = RJob.newJob( args,
id, s"Find page-count metadata for ${args.attachment.id}",
MakePageCountArgs.taskName, account.map(_.user).getOrElse(DocspellSystem.user),
account.map(_.collective).getOrElse(DocspellSystem.taskGroup), Priority.Low,
args, Some(MakePageCountArgs.taskName / args.attachment)
s"Find page-count metadata for ${args.attachment.id}", )
now,
account.map(_.user).getOrElse(DocspellSystem.user),
Priority.Low,
Some(MakePageCountArgs.taskName / args.attachment)
)
} yield job
def makePreview[F[_]: Sync]( def makePreview[F[_]: Sync](
args: MakePreviewArgs, args: MakePreviewArgs,
account: Option[AccountId] account: Option[AccountId]
): F[RJob] = ): F[Job[MakePreviewArgs]] =
for { Job.createNew(
id <- Ident.randomId[F] MakePreviewArgs.taskName,
now <- Timestamp.current[F] account.map(_.collective).getOrElse(DocspellSystem.taskGroup),
job = RJob.newJob( args,
id, s"Generate preview image",
MakePreviewArgs.taskName, account.map(_.user).getOrElse(DocspellSystem.user),
account.map(_.collective).getOrElse(DocspellSystem.taskGroup), Priority.Low,
args, Some(MakePreviewArgs.taskName / args.attachment)
s"Generate preview image", )
now,
account.map(_.user).getOrElse(DocspellSystem.user),
Priority.Low,
Some(MakePreviewArgs.taskName / args.attachment)
)
} yield job
def allPreviews[F[_]: Sync]( def allPreviews[F[_]: Sync](
args: AllPreviewsArgs, args: AllPreviewsArgs,
submitter: Option[Ident] submitter: Option[Ident]
): F[RJob] = ): F[Job[AllPreviewsArgs]] =
for { Job.createNew(
id <- Ident.randomId[F]
now <- Timestamp.current[F]
} yield RJob.newJob(
id,
AllPreviewsArgs.taskName, AllPreviewsArgs.taskName,
args.collective.getOrElse(DocspellSystem.taskGroup), args.collective.getOrElse(DocspellSystem.taskGroup),
args, args,
"Create preview images", "Create preview images",
now,
submitter.getOrElse(DocspellSystem.user), submitter.getOrElse(DocspellSystem.user),
Priority.Low, Priority.Low,
Some(DocspellSystem.allPreviewTaskTracker) Some(DocspellSystem.allPreviewTaskTracker)
@ -95,127 +103,91 @@ object JobFactory extends MailAddressCodec {
collective: Option[Ident], collective: Option[Ident],
submitter: Option[Ident], submitter: Option[Ident],
prio: Priority prio: Priority
): F[RJob] = ): F[Job[ConvertAllPdfArgs]] =
for { Job.createNew(
id <- Ident.randomId[F] ConvertAllPdfArgs.taskName,
now <- Timestamp.current[F] collective.getOrElse(DocspellSystem.taskGroup),
job = RJob.newJob( ConvertAllPdfArgs(collective),
id, s"Convert all pdfs not yet converted",
ConvertAllPdfArgs.taskName, submitter.getOrElse(DocspellSystem.user),
collective.getOrElse(DocspellSystem.taskGroup), prio,
ConvertAllPdfArgs(collective), collective
s"Convert all pdfs not yet converted", .map(c => c / ConvertAllPdfArgs.taskName)
now, .orElse(ConvertAllPdfArgs.taskName.some)
submitter.getOrElse(DocspellSystem.user), )
prio,
collective
.map(c => c / ConvertAllPdfArgs.taskName)
.orElse(ConvertAllPdfArgs.taskName.some)
)
} yield job
def reprocessItem[F[_]: Sync]( def reprocessItem[F[_]: Sync](
args: ReProcessItemArgs, args: ReProcessItemArgs,
account: AccountId, account: AccountId,
prio: Priority prio: Priority
): F[RJob] = ): F[Job[ReProcessItemArgs]] =
for { Job.createNew(
id <- Ident.randomId[F] ReProcessItemArgs.taskName,
now <- Timestamp.current[F] account.collective,
job = RJob.newJob( args,
id, s"Re-process files of item ${args.itemId.id}",
ReProcessItemArgs.taskName, account.user,
account.collective, prio,
args, Some(ReProcessItemArgs.taskName / args.itemId)
s"Re-process files of item ${args.itemId.id}", )
now,
account.user,
prio,
Some(ReProcessItemArgs.taskName / args.itemId)
)
} yield job
def processItem[F[_]: Sync]( def processItem[F[_]: Sync](
args: ProcessItemArgs, args: ProcessItemArgs,
account: AccountId, account: AccountId,
prio: Priority, prio: Priority,
tracker: Option[Ident] tracker: Option[Ident]
): F[RJob] = ): F[Job[ProcessItemArgs]] =
for { Job.createNew(
id <- Ident.randomId[F] ProcessItemArgs.taskName,
now <- Timestamp.current[F] account.collective,
job = RJob.newJob( args,
id, args.makeSubject,
ProcessItemArgs.taskName, account.user,
account.collective, prio,
args, tracker
args.makeSubject, )
now,
account.user,
prio,
tracker
)
} yield job
def processItems[F[_]: Sync]( def processItems[F[_]: Sync](
args: Vector[ProcessItemArgs], args: Vector[ProcessItemArgs],
account: AccountId, account: AccountId,
prio: Priority, prio: Priority,
tracker: Option[Ident] tracker: Option[Ident]
): F[Vector[RJob]] = { ): F[Vector[Job[ProcessItemArgs]]] = {
def create(now: Timestamp, arg: ProcessItemArgs): F[RJob] = def create(arg: ProcessItemArgs): F[Job[ProcessItemArgs]] =
Ident Job.createNew(
.randomId[F] ProcessItemArgs.taskName,
.map(id => account.collective,
RJob.newJob( arg,
id, arg.makeSubject,
ProcessItemArgs.taskName, account.user,
account.collective, prio,
arg, tracker
arg.makeSubject, )
now,
account.user,
prio,
tracker
)
)
for { args.traverse(create)
now <- Timestamp.current[F]
jobs <- args.traverse(a => create(now, a))
} yield jobs
} }
def reIndexAll[F[_]: Sync]: F[RJob] = def reIndexAll[F[_]: Sync]: F[Job[ReIndexTaskArgs]] =
for { Job.createNew(
id <- Ident.randomId[F]
now <- Timestamp.current[F]
} yield RJob.newJob(
id,
ReIndexTaskArgs.taskName, ReIndexTaskArgs.taskName,
DocspellSystem.taskGroup, DocspellSystem.taskGroup,
ReIndexTaskArgs(None), ReIndexTaskArgs(None),
s"Recreate full-text index", "Recreate full-text index",
now,
DocspellSystem.taskGroup, DocspellSystem.taskGroup,
Priority.Low, Priority.Low,
Some(DocspellSystem.migrationTaskTracker) Some(DocspellSystem.migrationTaskTracker)
) )
def reIndex[F[_]: Sync](account: AccountId): F[RJob] = def reIndex[F[_]: Sync](account: AccountId): F[Job[ReIndexTaskArgs]] = {
for { val args = ReIndexTaskArgs(Some(account.collective))
id <- Ident.randomId[F] Job.createNew(
now <- Timestamp.current[F]
args = ReIndexTaskArgs(Some(account.collective))
} yield RJob.newJob(
id,
ReIndexTaskArgs.taskName, ReIndexTaskArgs.taskName,
account.collective, account.collective,
args, args,
s"Recreate full-text index", "Recreate full-text index",
now,
account.user, account.user,
Priority.Low, Priority.Low,
Some(ReIndexTaskArgs.tracker(args)) Some(ReIndexTaskArgs.tracker(args))
) )
}
} }

View File

@ -8,16 +8,19 @@ package docspell.backend.msg
import cats.data.NonEmptyList import cats.data.NonEmptyList
import docspell.pubsub.api.{Topic, TypedTopic} import docspell.pubsub.api.TypedTopic
import docspell.scheduler.msg._
/** All topics used in Docspell. */ /** All topics used in Docspell. */
object Topics { object Topics {
/** A generic notification to the job executors to look for new work. */
val jobsNotify: TypedTopic[Unit] =
TypedTopic[Unit](Topic("jobs-notify"))
/** A list of all topics. It is required to list every topic in use here! */ /** A list of all topics. It is required to list every topic in use here! */
val all: NonEmptyList[TypedTopic[_]] = val all: NonEmptyList[TypedTopic[_]] =
NonEmptyList.of(JobDone.topic, CancelJob.topic, jobsNotify, JobSubmitted.topic) NonEmptyList.of(
JobDone.topic,
CancelJob.topic,
JobsNotify(),
JobSubmitted.topic,
PeriodicTaskNotify()
)
} }

View File

@ -14,11 +14,11 @@ import docspell.backend.JobFactory
import docspell.backend.PasswordCrypt import docspell.backend.PasswordCrypt
import docspell.backend.ops.OCollective._ import docspell.backend.ops.OCollective._
import docspell.common._ import docspell.common._
import docspell.scheduler.JobStore
import docspell.scheduler.usertask.{UserTask, UserTaskScope, UserTaskStore}
import docspell.store.UpdateResult import docspell.store.UpdateResult
import docspell.store.queries.{QCollective, QUser} import docspell.store.queries.{QCollective, QUser}
import docspell.store.queue.JobQueue
import docspell.store.records._ import docspell.store.records._
import docspell.store.usertask.{UserTask, UserTaskScope, UserTaskStore}
import docspell.store.{AddResult, Store} import docspell.store.{AddResult, Store}
import com.github.eikek.calev._ import com.github.eikek.calev._
@ -133,7 +133,7 @@ object OCollective {
def apply[F[_]: Async]( def apply[F[_]: Async](
store: Store[F], store: Store[F],
uts: UserTaskStore[F], uts: UserTaskStore[F],
queue: JobQueue[F], jobStore: JobStore[F],
joex: OJoex[F] joex: OJoex[F]
): Resource[F, OCollective[F]] = ): Resource[F, OCollective[F]] =
Resource.pure[F, OCollective[F]](new OCollective[F] { Resource.pure[F, OCollective[F]](new OCollective[F] {
@ -196,32 +196,32 @@ object OCollective {
for { for {
id <- Ident.randomId[F] id <- Ident.randomId[F]
args = LearnClassifierArgs(collective) args = LearnClassifierArgs(collective)
ut <- UserTask( ut = UserTask(
id, id,
LearnClassifierArgs.taskName, LearnClassifierArgs.taskName,
true, true,
CalEvent(WeekdayComponent.All, DateEvent.All, TimeEvent.All), CalEvent(WeekdayComponent.All, DateEvent.All, TimeEvent.All),
None, None,
args args
).encode.toPeriodicTask(UserTaskScope(collective), args.makeSubject.some) )
job <- ut.toJob _ <- uts
_ <- queue.insert(job) .updateOneTask(UserTaskScope(collective), args.makeSubject.some, ut)
_ <- joex.notifyAllNodes _ <- joex.notifyAllNodes
} yield () } yield ()
def startEmptyTrash(args: EmptyTrashArgs): F[Unit] = def startEmptyTrash(args: EmptyTrashArgs): F[Unit] =
for { for {
id <- Ident.randomId[F] id <- Ident.randomId[F]
ut <- UserTask( ut = UserTask(
id, id,
EmptyTrashArgs.taskName, EmptyTrashArgs.taskName,
true, true,
CalEvent(WeekdayComponent.All, DateEvent.All, TimeEvent.All), CalEvent(WeekdayComponent.All, DateEvent.All, TimeEvent.All),
None, None,
args args
).encode.toPeriodicTask(UserTaskScope(args.collective), args.makeSubject.some) )
job <- ut.toJob _ <- uts
_ <- queue.insert(job) .updateOneTask(UserTaskScope(args.collective), args.makeSubject.some, ut)
_ <- joex.notifyAllNodes _ <- joex.notifyAllNodes
} yield () } yield ()
@ -321,7 +321,7 @@ object OCollective {
AllPreviewsArgs(Some(account.collective), storeMode), AllPreviewsArgs(Some(account.collective), storeMode),
Some(account.user) Some(account.user)
) )
_ <- queue.insertIfNew(job) _ <- jobStore.insertIfNew(job.encode)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield UpdateResult.success } yield UpdateResult.success

View File

@ -0,0 +1,100 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.backend.ops
import cats.data.OptionT
import cats.effect._
import cats.implicits._
import docspell.backend.JobFactory
import docspell.backend.ops.OFileRepository.IntegrityResult
import docspell.common._
import docspell.scheduler.{Job, JobStore}
import docspell.store.Store
import scodec.bits.ByteVector
trait OFileRepository[F[_]] {
/** Inserts the job or return None if such a job already is running. */
def cloneFileRepository(
args: FileCopyTaskArgs,
notifyJoex: Boolean
): F[Option[Job[FileCopyTaskArgs]]]
def checkIntegrityAll(
part: FileKeyPart,
notifyJoex: Boolean
): F[Option[Job[FileIntegrityCheckArgs]]]
def checkIntegrity(key: FileKey, hash: Option[ByteVector]): F[Option[IntegrityResult]]
}
object OFileRepository {
case class IntegrityResult(ok: Boolean, key: FileKey)
def apply[F[_]: Async](
store: Store[F],
jobStore: JobStore[F],
joex: OJoex[F]
): Resource[F, OFileRepository[F]] =
Resource.pure(new OFileRepository[F] {
private[this] val logger = docspell.logging.getLogger[F]
def cloneFileRepository(
args: FileCopyTaskArgs,
notifyJoex: Boolean
): F[Option[Job[FileCopyTaskArgs]]] =
for {
job <- JobFactory.fileCopy(args)
flag <- jobStore.insertIfNew(job.encode)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield Option.when(flag)(job)
def checkIntegrityAll(
part: FileKeyPart,
notifyJoex: Boolean
): F[Option[Job[FileIntegrityCheckArgs]]] =
for {
job <- JobFactory.integrityCheck(FileIntegrityCheckArgs(part))
flag <- jobStore.insertIfNew(job.encode)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield Option.when(flag)(job)
def checkIntegrity(
key: FileKey,
hash: Option[ByteVector]
): F[Option[IntegrityResult]] =
(for {
_ <- OptionT.liftF(
logger.debugWith(s"Checking file $key")(_.data("fileKey", key))
)
expectedHash <-
hash.fold(OptionT(store.fileRepo.findMeta(key)).map(_.checksum))(h =>
OptionT.pure[F](h)
)
actualHash <-
OptionT.liftF(
logger.debugWith(s"Calculating new hash for $key")(
_.data("fileKey", key)
) *>
store.fileRepo
.getBytes(key)
.through(fs2.hash.sha256)
.compile
.foldChunks(ByteVector.empty)(_ ++ _.toByteVector)
)
res = IntegrityResult(expectedHash == actualHash, key)
_ <- OptionT.liftF {
if (res.ok) logger.debug(s"File hashes match for $key")
else logger.warnWith(s"File hashes differ for: $key")(_.data("fileKey", key))
}
} yield res).value
})
}

View File

@ -17,8 +17,8 @@ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.query.ItemQuery._ import docspell.query.ItemQuery._
import docspell.query.ItemQueryDsl._ import docspell.query.ItemQueryDsl._
import docspell.scheduler.JobStore
import docspell.store.queries.{QFolder, QItem, SelectedItem} import docspell.store.queries.{QFolder, QItem, SelectedItem}
import docspell.store.queue.JobQueue
import docspell.store.records.RJob import docspell.store.records.RJob
import docspell.store.{Store, qb} import docspell.store.{Store, qb}
@ -81,7 +81,7 @@ object OFulltext {
itemSearch: OItemSearch[F], itemSearch: OItemSearch[F],
fts: FtsClient[F], fts: FtsClient[F],
store: Store[F], store: Store[F],
queue: JobQueue[F], jobStore: JobStore[F],
joex: OJoex[F] joex: OJoex[F]
): Resource[F, OFulltext[F]] = ): Resource[F, OFulltext[F]] =
Resource.pure[F, OFulltext[F]](new OFulltext[F] { Resource.pure[F, OFulltext[F]](new OFulltext[F] {
@ -90,7 +90,7 @@ object OFulltext {
for { for {
_ <- logger.info(s"Re-index all.") _ <- logger.info(s"Re-index all.")
job <- JobFactory.reIndexAll[F] job <- JobFactory.reIndexAll[F]
_ <- queue.insertIfNew(job) *> joex.notifyAllNodes _ <- jobStore.insertIfNew(job.encode) *> joex.notifyAllNodes
} yield () } yield ()
def reindexCollective(account: AccountId): F[Unit] = def reindexCollective(account: AccountId): F[Unit] =
@ -102,7 +102,7 @@ object OFulltext {
job <- JobFactory.reIndex(account) job <- JobFactory.reIndex(account)
_ <- _ <-
if (exist.isDefined) ().pure[F] if (exist.isDefined) ().pure[F]
else queue.insertIfNew(job) *> joex.notifyAllNodes else jobStore.insertIfNew(job.encode) *> joex.notifyAllNodes
} yield () } yield ()
def findIndexOnly(maxNoteLen: Int)( def findIndexOnly(maxNoteLen: Int)(
@ -324,9 +324,7 @@ object OFulltext {
def apply[A](implicit ev: ItemId[A]): ItemId[A] = ev def apply[A](implicit ev: ItemId[A]): ItemId[A] = ev
def from[A](f: A => Ident): ItemId[A] = def from[A](f: A => Ident): ItemId[A] =
new ItemId[A] { (a: A) => f(a)
def itemId(a: A) = f(a)
}
implicit val listItemId: ItemId[ListItem] = implicit val listItemId: ItemId[ListItem] =
ItemId.from(_.id) ItemId.from(_.id)

View File

@ -18,8 +18,8 @@ import docspell.common._
import docspell.ftsclient.FtsClient import docspell.ftsclient.FtsClient
import docspell.logging.Logger import docspell.logging.Logger
import docspell.notification.api.Event import docspell.notification.api.Event
import docspell.scheduler.JobStore
import docspell.store.queries.{QAttachment, QItem, QMoveAttachment} import docspell.store.queries.{QAttachment, QItem, QMoveAttachment}
import docspell.store.queue.JobQueue
import docspell.store.records._ import docspell.store.records._
import docspell.store.{AddResult, Store, UpdateResult} import docspell.store.{AddResult, Store, UpdateResult}
@ -228,7 +228,7 @@ object OItem {
store: Store[F], store: Store[F],
fts: FtsClient[F], fts: FtsClient[F],
createIndex: CreateIndex[F], createIndex: CreateIndex[F],
queue: JobQueue[F], jobStore: JobStore[F],
joex: OJoex[F] joex: OJoex[F]
): Resource[F, OItem[F]] = ): Resource[F, OItem[F]] =
for { for {
@ -288,7 +288,7 @@ object OItem {
) )
ev = Event.TagsChanged.partial( ev = Event.TagsChanged.partial(
itemIds, itemIds,
added.toList.flatten.map(_.id).toList, added.toList.flatten.map(_.id),
Nil Nil
) )
} yield AttachedEvent(UpdateResult.success)(ev)) } yield AttachedEvent(UpdateResult.success)(ev))
@ -763,7 +763,7 @@ object OItem {
job <- OptionT.liftF( job <- OptionT.liftF(
JobFactory.reprocessItem[F](args, account, Priority.Low) JobFactory.reprocessItem[F](args, account, Priority.Low)
) )
_ <- OptionT.liftF(queue.insertIfNew(job)) _ <- OptionT.liftF(jobStore.insertIfNew(job.encode))
_ <- OptionT.liftF(if (notifyJoex) joex.notifyAllNodes else ().pure[F]) _ <- OptionT.liftF(if (notifyJoex) joex.notifyAllNodes else ().pure[F])
} yield UpdateResult.success).getOrElse(UpdateResult.notFound) } yield UpdateResult.success).getOrElse(UpdateResult.notFound)
@ -777,7 +777,8 @@ object OItem {
jobs <- items jobs <- items
.map(item => ReProcessItemArgs(item, Nil)) .map(item => ReProcessItemArgs(item, Nil))
.traverse(arg => JobFactory.reprocessItem[F](arg, account, Priority.Low)) .traverse(arg => JobFactory.reprocessItem[F](arg, account, Priority.Low))
_ <- queue.insertAllIfNew(jobs) .map(_.map(_.encode))
_ <- jobStore.insertAllIfNew(jobs)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield items.size) } yield items.size)
@ -788,7 +789,7 @@ object OItem {
): F[UpdateResult] = ): F[UpdateResult] =
for { for {
job <- JobFactory.convertAllPdfs[F](collective, submitter, Priority.Low) job <- JobFactory.convertAllPdfs[F](collective, submitter, Priority.Low)
_ <- queue.insertIfNew(job) _ <- jobStore.insertIfNew(job.encode)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield UpdateResult.success } yield UpdateResult.success
@ -799,7 +800,7 @@ object OItem {
): F[UpdateResult] = ): F[UpdateResult] =
for { for {
job <- JobFactory.makePreview[F](args, account.some) job <- JobFactory.makePreview[F](args, account.some)
_ <- queue.insertIfNew(job) _ <- jobStore.insertIfNew(job.encode)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield UpdateResult.success } yield UpdateResult.success
@ -809,7 +810,7 @@ object OItem {
): F[UpdateResult] = ): F[UpdateResult] =
for { for {
job <- JobFactory.allPreviews[F](AllPreviewsArgs(None, storeMode), None) job <- JobFactory.allPreviews[F](AllPreviewsArgs(None, storeMode), None)
_ <- queue.insertIfNew(job) _ <- jobStore.insertIfNew(job.encode)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield UpdateResult.success } yield UpdateResult.success

View File

@ -10,10 +10,10 @@ import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.backend.msg.JobDone
import docspell.backend.ops.OJob.{CollectiveQueueState, JobCancelResult} import docspell.backend.ops.OJob.{CollectiveQueueState, JobCancelResult}
import docspell.common._ import docspell.common._
import docspell.pubsub.api.PubSubT import docspell.pubsub.api.PubSubT
import docspell.scheduler.msg.JobDone
import docspell.store.Store import docspell.store.Store
import docspell.store.UpdateResult import docspell.store.UpdateResult
import docspell.store.queries.QJob import docspell.store.queries.QJob

View File

@ -10,14 +10,16 @@ import cats.Applicative
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.backend.msg.{CancelJob, Topics}
import docspell.common.Ident import docspell.common.Ident
import docspell.pubsub.api.PubSubT import docspell.pubsub.api.PubSubT
import docspell.scheduler.msg.{CancelJob, JobsNotify, PeriodicTaskNotify}
trait OJoex[F[_]] { trait OJoex[F[_]] {
def notifyAllNodes: F[Unit] def notifyAllNodes: F[Unit]
def notifyPeriodicTasks: F[Unit]
def cancelJob(job: Ident, worker: Ident): F[Unit] def cancelJob(job: Ident, worker: Ident): F[Unit]
} }
@ -26,7 +28,10 @@ object OJoex {
Resource.pure[F, OJoex[F]](new OJoex[F] { Resource.pure[F, OJoex[F]](new OJoex[F] {
def notifyAllNodes: F[Unit] = def notifyAllNodes: F[Unit] =
pubSub.publish1IgnoreErrors(Topics.jobsNotify, ()).as(()) pubSub.publish1IgnoreErrors(JobsNotify(), ()).void
def notifyPeriodicTasks: F[Unit] =
pubSub.publish1IgnoreErrors(PeriodicTaskNotify(), ()).void
def cancelJob(job: Ident, worker: Ident): F[Unit] = def cancelJob(job: Ident, worker: Ident): F[Unit] =
pubSub.publish1IgnoreErrors(CancelJob.topic, CancelJob(job, worker)).as(()) pubSub.publish1IgnoreErrors(CancelJob.topic, CancelJob(job, worker)).as(())

View File

@ -14,8 +14,8 @@ import fs2.Stream
import docspell.backend.JobFactory import docspell.backend.JobFactory
import docspell.common._ import docspell.common._
import docspell.scheduler.{Job, JobStore}
import docspell.store.Store import docspell.store.Store
import docspell.store.queue.JobQueue
import docspell.store.records._ import docspell.store.records._
trait OUpload[F[_]] { trait OUpload[F[_]] {
@ -108,7 +108,7 @@ object OUpload {
def apply[F[_]: Sync]( def apply[F[_]: Sync](
store: Store[F], store: Store[F],
queue: JobQueue[F], jobStore: JobStore[F],
joex: OJoex[F] joex: OJoex[F]
): Resource[F, OUpload[F]] = ): Resource[F, OUpload[F]] =
Resource.pure[F, OUpload[F]](new OUpload[F] { Resource.pure[F, OUpload[F]](new OUpload[F] {
@ -187,10 +187,10 @@ object OUpload {
private def submitJobs( private def submitJobs(
notifyJoex: Boolean notifyJoex: Boolean
)(jobs: Vector[RJob]): F[OUpload.UploadResult] = )(jobs: Vector[Job[String]]): F[OUpload.UploadResult] =
for { for {
_ <- logger.debug(s"Storing jobs: $jobs") _ <- logger.debug(s"Storing jobs: $jobs")
_ <- queue.insertAll(jobs) _ <- jobStore.insertAll(jobs)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield UploadResult.Success } yield UploadResult.Success
@ -244,7 +244,9 @@ object OUpload {
account: AccountId, account: AccountId,
prio: Priority, prio: Priority,
tracker: Option[Ident] tracker: Option[Ident]
): F[Vector[RJob]] = ): F[Vector[Job[String]]] =
JobFactory.processItems[F](args, account, prio, tracker) JobFactory
.processItems[F](args, account, prio, tracker)
.map(_.map(_.encode))
}) })
} }

View File

@ -13,10 +13,9 @@ import fs2.Stream
import docspell.common._ import docspell.common._
import docspell.notification.api.{ChannelRef, PeriodicDueItemsArgs, PeriodicQueryArgs} import docspell.notification.api.{ChannelRef, PeriodicDueItemsArgs, PeriodicQueryArgs}
import docspell.scheduler.usertask.{UserTask, UserTaskScope, UserTaskStore}
import docspell.store.Store import docspell.store.Store
import docspell.store.queue.JobQueue
import docspell.store.records.RNotificationChannel import docspell.store.records.RNotificationChannel
import docspell.store.usertask._
import io.circe.Encoder import io.circe.Encoder
@ -86,7 +85,6 @@ object OUserTask {
def apply[F[_]: Async]( def apply[F[_]: Async](
taskStore: UserTaskStore[F], taskStore: UserTaskStore[F],
store: Store[F], store: Store[F],
queue: JobQueue[F],
joex: OJoex[F] joex: OJoex[F]
): Resource[F, OUserTask[F]] = ): Resource[F, OUserTask[F]] =
Resource.pure[F, OUserTask[F]](new OUserTask[F] { Resource.pure[F, OUserTask[F]](new OUserTask[F] {
@ -95,9 +93,7 @@ object OUserTask {
implicit E: Encoder[A] implicit E: Encoder[A]
): F[Unit] = ): F[Unit] =
for { for {
ptask <- task.encode.toPeriodicTask(scope, subject) _ <- taskStore.executeNow(scope, subject, task)
job <- ptask.toJob
_ <- queue.insert(job)
_ <- joex.notifyAllNodes _ <- joex.notifyAllNodes
} yield () } yield ()
@ -124,7 +120,7 @@ object OUserTask {
): F[Unit] = ): F[Unit] =
for { for {
_ <- taskStore.updateTask[ScanMailboxArgs](scope, subject, task) _ <- taskStore.updateTask[ScanMailboxArgs](scope, subject, task)
_ <- joex.notifyAllNodes _ <- joex.notifyPeriodicTasks
} yield () } yield ()
def getNotifyDueItems( def getNotifyDueItems(
@ -153,7 +149,7 @@ object OUserTask {
): F[Unit] = ): F[Unit] =
for { for {
_ <- taskStore.updateTask[PeriodicDueItemsArgs](scope, subject, task) _ <- taskStore.updateTask[PeriodicDueItemsArgs](scope, subject, task)
_ <- joex.notifyAllNodes _ <- joex.notifyPeriodicTasks
} yield () } yield ()
def getPeriodicQuery(scope: UserTaskScope): Stream[F, UserTask[PeriodicQueryArgs]] = def getPeriodicQuery(scope: UserTaskScope): Stream[F, UserTask[PeriodicQueryArgs]] =
@ -180,7 +176,7 @@ object OUserTask {
): F[Unit] = ): F[Unit] =
for { for {
_ <- taskStore.updateTask[PeriodicQueryArgs](scope, subject, task) _ <- taskStore.updateTask[PeriodicQueryArgs](scope, subject, task)
_ <- joex.notifyAllNodes _ <- joex.notifyPeriodicTasks
} yield () } yield ()
// When retrieving arguments containing channel references, we must update // When retrieving arguments containing channel references, we must update

View File

@ -14,7 +14,8 @@ case class Banner(
configFile: Option[String], configFile: Option[String],
appId: Ident, appId: Ident,
baseUrl: LenientUri, baseUrl: LenientUri,
ftsUrl: Option[LenientUri] ftsUrl: Option[LenientUri],
fileStoreConfig: FileStoreConfig
) { ) {
private val banner = private val banner =
@ -36,6 +37,7 @@ case class Banner(
s"Database: ${jdbcUrl.asString}", s"Database: ${jdbcUrl.asString}",
s"Fts: ${ftsUrl.map(_.asString).getOrElse("-")}", s"Fts: ${ftsUrl.map(_.asString).getOrElse("-")}",
s"Config: ${configFile.getOrElse("")}", s"Config: ${configFile.getOrElse("")}",
s"FileRepo: ${fileStoreConfig}",
"" ""
) )

View File

@ -10,6 +10,8 @@ object DocspellSystem {
val user = Ident.unsafe("docspell-system") val user = Ident.unsafe("docspell-system")
val taskGroup = user val taskGroup = user
val account: AccountId = AccountId(taskGroup, user)
val migrationTaskTracker = Ident.unsafe("full-text-index-tracker") val migrationTaskTracker = Ident.unsafe("full-text-index-tracker")
val allPreviewTaskTracker = Ident.unsafe("generate-all-previews") val allPreviewTaskTracker = Ident.unsafe("generate-all-previews")
val allPageCountTaskTracker = Ident.unsafe("all-page-count-tracker") val allPageCountTaskTracker = Ident.unsafe("all-page-count-tracker")

View File

@ -0,0 +1,56 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.common
import cats.data.NonEmptyList
import docspell.common.FileCopyTaskArgs.Selection
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
import io.circe.syntax._
import io.circe.{Decoder, Encoder}
/** This is the input to the `FileCopyTask`. The task copies all files from on
* FileRepository to one ore more target repositories.
*
* If no `from` is given, the default file repository is used. For targets, a list of ids
* can be specified that must match a configured file store in the config file. When
* selecting "all", it means all enabled stores.
*/
final case class FileCopyTaskArgs(from: Option[Ident], to: Selection)
object FileCopyTaskArgs {
val taskName = Ident.unsafe("copy-file-repositories")
sealed trait Selection
object Selection {
case object All extends Selection
case class Stores(ids: NonEmptyList[Ident]) extends Selection
implicit val jsonEncoder: Encoder[Selection] =
Encoder.instance {
case All => "!all".asJson
case Stores(ids) => ids.toList.asJson
}
implicit val jsonDecoder: Decoder[Selection] =
Decoder.instance { cursor =>
cursor.value.asString match {
case Some(s) if s.equalsIgnoreCase("!all") => Right(All)
case _ => cursor.value.as[NonEmptyList[Ident]].map(Stores.apply)
}
}
}
implicit val jsonDecoder: Decoder[FileCopyTaskArgs] =
deriveDecoder
implicit val jsonEncoder: Encoder[FileCopyTaskArgs] =
deriveEncoder
}

View File

@ -0,0 +1,22 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.common
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
import io.circe.{Decoder, Encoder}
final case class FileIntegrityCheckArgs(pattern: FileKeyPart) {}
object FileIntegrityCheckArgs {
val taskName: Ident = Ident.unsafe("all-file-integrity-check")
implicit val jsonDecoder: Decoder[FileIntegrityCheckArgs] =
deriveDecoder
implicit val jsonEncoder: Encoder[FileIntegrityCheckArgs] =
deriveEncoder
}

View File

@ -9,7 +9,10 @@ package docspell.common
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
import io.circe.{Decoder, Encoder} import io.circe.{Decoder, Encoder}
case class FileKey(collective: Ident, category: FileCategory, id: Ident) final case class FileKey(collective: Ident, category: FileCategory, id: Ident) {
override def toString =
s"${collective.id}/${category.id.id}/${id.id}"
}
object FileKey { object FileKey {

View File

@ -0,0 +1,53 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.common
import cats.implicits._
import io.circe.syntax._
import io.circe.{Decoder, DecodingFailure, Encoder}
sealed trait FileKeyPart {}
object FileKeyPart {
case object Empty extends FileKeyPart
final case class Collective(collective: Ident) extends FileKeyPart
final case class Category(collective: Ident, category: FileCategory) extends FileKeyPart
final case class Key(key: FileKey) extends FileKeyPart
implicit val jsonEncoder: Encoder[FileKeyPart] =
Encoder.instance {
case Empty => ().asJson
case Collective(cid) =>
Map("collective" -> cid.asJson).asJson
case Category(cid, cat) =>
Map("collective" -> cid.asJson, "category" -> cat.asJson).asJson
case Key(key) =>
key.asJson
}
implicit val jsonDecoder: Decoder[FileKeyPart] =
Decoder.instance { cursor =>
for {
cid <- cursor.getOrElse[Option[Ident]]("collective")(None)
cat <- cursor.getOrElse[Option[FileCategory]]("category")(None)
emptyObj = cursor.keys.exists(_.isEmpty)
c3 = cursor.as[FileKey].map(Key).toOption
c2 = (cid, cat).mapN(Category)
c1 = cid.map(Collective)
c0 = Option.when(emptyObj)(Empty)
c = c3.orElse(c2).orElse(c1).orElse(c0)
res <- c.toRight(DecodingFailure("", cursor.history))
} yield res
}
}

View File

@ -0,0 +1,39 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.common
import fs2.io.file.Path
sealed trait FileStoreConfig {
def enabled: Boolean
def storeType: FileStoreType
}
object FileStoreConfig {
case class DefaultDatabase(enabled: Boolean) extends FileStoreConfig {
val storeType = FileStoreType.DefaultDatabase
}
case class FileSystem(
enabled: Boolean,
directory: Path
) extends FileStoreConfig {
val storeType = FileStoreType.FileSystem
}
case class S3(
enabled: Boolean,
endpoint: String,
accessKey: String,
secretKey: String,
bucket: String
) extends FileStoreConfig {
val storeType = FileStoreType.S3
override def toString =
s"S3(enabled=$enabled, endpoint=$endpoint, bucket=$bucket, accessKey=$accessKey, secretKey=***)"
}
}

View File

@ -0,0 +1,32 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.common
import cats.data.NonEmptyList
sealed trait FileStoreType { self: Product =>
def name: String =
productPrefix.toLowerCase
}
object FileStoreType {
case object DefaultDatabase extends FileStoreType
case object S3 extends FileStoreType
case object FileSystem extends FileStoreType
val all: NonEmptyList[FileStoreType] =
NonEmptyList.of(DefaultDatabase, S3, FileSystem)
def fromString(str: String): Either[String, FileStoreType] =
all
.find(_.name.equalsIgnoreCase(str))
.toRight(s"Invalid file store type: $str")
def unsafeFromString(str: String): FileStoreType =
fromString(str).fold(sys.error, identity)
}

View File

@ -6,10 +6,8 @@
package docspell.common.syntax package docspell.common.syntax
import cats.implicits._
import io.circe.Decoder import io.circe.Decoder
import io.circe.parser._ import io.circe.parser
trait StringSyntax { trait StringSyntax {
implicit class EvenMoreStringOps(s: String) { implicit class EvenMoreStringOps(s: String) {
@ -18,9 +16,8 @@ trait StringSyntax {
def parseJsonAs[A](implicit d: Decoder[A]): Either[Throwable, A] = def parseJsonAs[A](implicit d: Decoder[A]): Either[Throwable, A] =
for { for {
json <- parse(s).leftMap(_.underlying) json <- parser.decode[A](s)
value <- json.as[A] } yield json
} yield value
} }
} }

View File

@ -18,9 +18,18 @@ import docspell.logging.{Level, LogConfig}
import com.github.eikek.calev.CalEvent import com.github.eikek.calev.CalEvent
import pureconfig.ConfigReader import pureconfig.ConfigReader
import pureconfig.error.{CannotConvert, FailureReason} import pureconfig.error.{CannotConvert, FailureReason}
import pureconfig.generic.{CoproductHint, FieldCoproductHint}
import scodec.bits.ByteVector import scodec.bits.ByteVector
object Implicits { object Implicits {
// the value "s-3" looks strange, this is to allow to write "s3" in the config
implicit val fileStoreCoproductHint: CoproductHint[FileStoreConfig] =
new FieldCoproductHint[FileStoreConfig]("type") {
override def fieldValue(name: String) =
if (name.equalsIgnoreCase("S3")) "s3"
else super.fieldValue(name)
}
implicit val accountIdReader: ConfigReader[AccountId] = implicit val accountIdReader: ConfigReader[AccountId] =
ConfigReader[String].emap(reason(AccountId.parse)) ConfigReader[String].emap(reason(AccountId.parse))
@ -42,6 +51,9 @@ object Implicits {
implicit val identReader: ConfigReader[Ident] = implicit val identReader: ConfigReader[Ident] =
ConfigReader[String].emap(reason(Ident.fromString)) ConfigReader[String].emap(reason(Ident.fromString))
implicit def identMapReader[B: ConfigReader]: ConfigReader[Map[Ident, B]] =
pureconfig.configurable.genericMapReader[Ident, B](reason(Ident.fromString))
implicit val byteVectorReader: ConfigReader[ByteVector] = implicit val byteVectorReader: ConfigReader[ByteVector] =
ConfigReader[String].emap(reason { str => ConfigReader[String].emap(reason { str =>
if (str.startsWith("hex:")) if (str.startsWith("hex:"))
@ -70,6 +82,9 @@ object Implicits {
implicit val logLevelReader: ConfigReader[Level] = implicit val logLevelReader: ConfigReader[Level] =
ConfigReader[String].emap(reason(Level.fromString)) ConfigReader[String].emap(reason(Level.fromString))
implicit val fileStoreTypeReader: ConfigReader[FileStoreType] =
ConfigReader[String].emap(reason(FileStoreType.fromString))
def reason[A: ClassTag]( def reason[A: ClassTag](
f: String => Either[String, A] f: String => Either[String, A]
): String => Either[FailureReason, A] = ): String => Either[FailureReason, A] =

View File

@ -194,6 +194,11 @@ docspell.joex {
# How often the node must be unreachable, before it is removed. # How often the node must be unreachable, before it is removed.
min-not-found = 2 min-not-found = 2
} }
# Checks all files against their checksum
integrity-check {
enabled = true
}
} }
# A periodic task to check for new releases of docspell. It can # A periodic task to check for new releases of docspell. It can
@ -646,6 +651,41 @@ Docpell Update Check
# restrict file types that should be handed over to processing. # restrict file types that should be handed over to processing.
# By default all files are allowed. # By default all files are allowed.
valid-mime-types = [ ] valid-mime-types = [ ]
# The id of an enabled store from the `stores` array that should
# be used.
#
# IMPORTANT NOTE: All nodes must have the exact same file store
# configuration!
default-store = "database"
# A list of possible file stores. Each entry must have a unique
# id. The `type` is one of: default-database, filesystem, s3.
#
# The enabled property serves currently to define target stores
# for te "copy files" task. All stores with enabled=false are
# removed from the list. The `default-store` must be enabled.
stores = {
database =
{ enabled = true
type = "default-database"
}
filesystem =
{ enabled = false
type = "file-system"
directory = "/some/directory"
}
minio =
{ enabled = false
type = "s3"
endpoint = "http://localhost:9000"
access-key = "username"
secret-key = "password"
bucket = "docspell"
}
}
} }
# Configuration of the full-text search engine. # Configuration of the full-text search engine.

View File

@ -19,10 +19,10 @@ import docspell.ftssolr.SolrConfig
import docspell.joex.analysis.RegexNerFile import docspell.joex.analysis.RegexNerFile
import docspell.joex.hk.HouseKeepingConfig import docspell.joex.hk.HouseKeepingConfig
import docspell.joex.routes.InternalHeader import docspell.joex.routes.InternalHeader
import docspell.joex.scheduler.{PeriodicSchedulerConfig, SchedulerConfig}
import docspell.joex.updatecheck.UpdateCheckConfig import docspell.joex.updatecheck.UpdateCheckConfig
import docspell.logging.LogConfig import docspell.logging.LogConfig
import docspell.pubsub.naive.PubSubConfig import docspell.pubsub.naive.PubSubConfig
import docspell.scheduler.{PeriodicSchedulerConfig, SchedulerConfig}
import docspell.store.JdbcConfig import docspell.store.JdbcConfig
case class Config( case class Config(

View File

@ -10,7 +10,7 @@ import cats.effect.Async
import docspell.config.Implicits._ import docspell.config.Implicits._
import docspell.config.{ConfigFactory, Validation} import docspell.config.{ConfigFactory, Validation}
import docspell.joex.scheduler.CountingScheme import docspell.scheduler.CountingScheme
import emil.MailAddress import emil.MailAddress
import emil.javamail.syntax._ import emil.javamail.syntax._
@ -19,6 +19,7 @@ import pureconfig.generic.auto._
import yamusca.imports._ import yamusca.imports._
object ConfigFile { object ConfigFile {
// IntelliJ is wrong, this is required
import Implicits._ import Implicits._
def loadConfig[F[_]: Async](args: List[String]): F[Config] = { def loadConfig[F[_]: Async](args: List[String]): F[Config] = {
@ -51,6 +52,7 @@ object ConfigFile {
Validation.failWhen( Validation.failWhen(
cfg => cfg.updateCheck.enabled && cfg.updateCheck.subject.els.isEmpty, cfg => cfg.updateCheck.enabled && cfg.updateCheck.subject.els.isEmpty,
"No subject given for enabled update check!" "No subject given for enabled update check!"
) ),
Validation(cfg => cfg.files.validate.map(_ => cfg))
) )
} }

View File

@ -7,7 +7,7 @@
package docspell.joex package docspell.joex
import docspell.common.Ident import docspell.common.Ident
import docspell.joex.scheduler.{PeriodicScheduler, Scheduler} import docspell.scheduler.{PeriodicScheduler, Scheduler}
import docspell.store.records.RJobLog import docspell.store.records.RJobLog
trait JoexApp[F[_]] { trait JoexApp[F[_]] {

View File

@ -10,37 +10,23 @@ import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.concurrent.SignallingRef import fs2.concurrent.SignallingRef
import docspell.analysis.TextAnalyser
import docspell.backend.MailAddressCodec import docspell.backend.MailAddressCodec
import docspell.backend.fulltext.CreateIndex
import docspell.backend.msg.{CancelJob, JobQueuePublish, Topics}
import docspell.backend.ops._ import docspell.backend.ops._
import docspell.common._ import docspell.common._
import docspell.ftsclient.FtsClient
import docspell.ftssolr.SolrFtsClient
import docspell.joex.analysis.RegexNerFile
import docspell.joex.emptytrash._ import docspell.joex.emptytrash._
import docspell.joex.fts.{MigrationTask, ReIndexTask} import docspell.joex.fts.MigrationTask
import docspell.joex.hk._ import docspell.joex.hk._
import docspell.joex.learn.LearnClassifierTask
import docspell.joex.notify._
import docspell.joex.pagecount._ import docspell.joex.pagecount._
import docspell.joex.pdfconv.ConvertAllPdfTask
import docspell.joex.pdfconv.PdfConvTask
import docspell.joex.preview._ import docspell.joex.preview._
import docspell.joex.process.ItemHandler
import docspell.joex.process.ReProcessItem
import docspell.joex.scanmailbox._
import docspell.joex.scheduler._
import docspell.joex.updatecheck._ import docspell.joex.updatecheck._
import docspell.notification.api.NotificationModule import docspell.notification.api.NotificationModule
import docspell.notification.impl.NotificationModuleImpl import docspell.notification.impl.NotificationModuleImpl
import docspell.pubsub.api.{PubSub, PubSubT} import docspell.pubsub.api.{PubSub, PubSubT}
import docspell.scheduler._
import docspell.scheduler.impl.{JobStoreModuleBuilder, SchedulerModuleBuilder}
import docspell.scheduler.usertask.{UserTaskScope, UserTaskStore}
import docspell.store.Store import docspell.store.Store
import docspell.store.queue._
import docspell.store.records.{REmptyTrashSetting, RJobLog} import docspell.store.records.{REmptyTrashSetting, RJobLog}
import docspell.store.usertask.UserTaskScope
import docspell.store.usertask.UserTaskStore
import emil.javamail._ import emil.javamail._
import org.http4s.client.Client import org.http4s.client.Client
@ -48,9 +34,8 @@ import org.http4s.client.Client
final class JoexAppImpl[F[_]: Async]( final class JoexAppImpl[F[_]: Async](
cfg: Config, cfg: Config,
store: Store[F], store: Store[F],
queue: JobQueue[F], uts: UserTaskStore[F],
pubSubT: PubSubT[F], jobStore: JobStore[F],
pstore: PeriodicTaskStore[F],
termSignal: SignallingRef[F, Boolean], termSignal: SignallingRef[F, Boolean],
notificationMod: NotificationModule[F], notificationMod: NotificationModule[F],
val scheduler: Scheduler[F], val scheduler: Scheduler[F],
@ -67,20 +52,11 @@ final class JoexAppImpl[F[_]: Async](
_ <- Async[F].start(eventConsume) _ <- Async[F].start(eventConsume)
_ <- scheduler.periodicAwake _ <- scheduler.periodicAwake
_ <- periodicScheduler.periodicAwake _ <- periodicScheduler.periodicAwake
_ <- subscriptions _ <- scheduler.startSubscriptions
_ <- periodicScheduler.startSubscriptions
} yield () } yield ()
} }
def subscriptions =
for {
_ <- Async[F].start(pubSubT.subscribeSink(Topics.jobsNotify) { _ =>
scheduler.notifyChange
})
_ <- Async[F].start(pubSubT.subscribeSink(CancelJob.topic) { msg =>
scheduler.requestCancel(msg.body.jobId).as(())
})
} yield ()
def findLogs(jobId: Ident): F[Vector[RJobLog]] = def findLogs(jobId: Ident): F[Vector[RJobLog]] =
store.transact(RJobLog.findLogs(jobId)) store.transact(RJobLog.findLogs(jobId))
@ -90,32 +66,30 @@ final class JoexAppImpl[F[_]: Async](
private def scheduleBackgroundTasks: F[Unit] = private def scheduleBackgroundTasks: F[Unit] =
HouseKeepingTask HouseKeepingTask
.periodicTask[F](cfg.houseKeeping.schedule) .periodicTask[F](cfg.houseKeeping.schedule)
.flatMap(pstore.insert) *> .flatMap(t => uts.updateTask(UserTaskScope.system, t.summary, t)) *>
scheduleEmptyTrashTasks *> scheduleEmptyTrashTasks *>
UpdateCheckTask UpdateCheckTask
.periodicTask(cfg.updateCheck) .periodicTask(cfg.updateCheck)
.flatMap(pstore.insert) *> .flatMap(t => uts.updateTask(UserTaskScope.system, t.summary, t)) *>
MigrationTask.job.flatMap(queue.insertIfNew) *> MigrationTask.job.flatMap(jobStore.insertIfNew) *>
AllPreviewsTask AllPreviewsTask
.job(MakePreviewArgs.StoreMode.WhenMissing, None) .job(MakePreviewArgs.StoreMode.WhenMissing, None)
.flatMap(queue.insertIfNew) *> .flatMap(jobStore.insertIfNew) *>
AllPageCountTask.job.flatMap(queue.insertIfNew).as(()) AllPageCountTask.job.flatMap(jobStore.insertIfNew).void
private def scheduleEmptyTrashTasks: F[Unit] = private def scheduleEmptyTrashTasks: F[Unit] =
store store
.transact( .transact(
REmptyTrashSetting.findForAllCollectives(OCollective.EmptyTrash.default, 50) REmptyTrashSetting.findForAllCollectives(OCollective.EmptyTrash.default, 50)
) )
.evalMap(es => .evalMap { es =>
UserTaskStore(store).use { uts => val args = EmptyTrashArgs(es.cid, es.minAge)
val args = EmptyTrashArgs(es.cid, es.minAge) uts.updateOneTask(
uts.updateOneTask( UserTaskScope(args.collective),
UserTaskScope(args.collective), args.makeSubject.some,
args.makeSubject.some, EmptyTrashTask.userTask(args, es.schedule)
EmptyTrashTask.userTask(args, es.schedule) )
) }
}
)
.compile .compile
.drain .drain
@ -131,179 +105,45 @@ object JoexAppImpl extends MailAddressCodec {
pubSub: PubSub[F] pubSub: PubSub[F]
): Resource[F, JoexApp[F]] = ): Resource[F, JoexApp[F]] =
for { for {
pstore <- PeriodicTaskStore.create(store) joexLogger <- Resource.pure(docspell.logging.getLogger[F](s"joex-${cfg.appId.id}"))
joexLogger = docspell.logging.getLogger[F](s"joex-${cfg.appId.id}")
pubSubT = PubSubT(pubSub, joexLogger) pubSubT = PubSubT(pubSub, joexLogger)
javaEmil = javaEmil =
JavaMailEmil(Settings.defaultSettings.copy(debug = cfg.mailDebug)) JavaMailEmil(Settings.defaultSettings.copy(debug = cfg.mailDebug))
notificationMod <- Resource.eval( notificationMod <- Resource.eval(
NotificationModuleImpl[F](store, javaEmil, httpClient, 200) NotificationModuleImpl[F](store, javaEmil, httpClient, 200)
) )
queue <- JobQueuePublish(store, pubSubT, notificationMod)
joex <- OJoex(pubSubT) jobStoreModule = JobStoreModuleBuilder(store)
upload <- OUpload(store, queue, joex) .withPubsub(pubSubT)
fts <- createFtsClient(cfg)(httpClient)
createIndex <- CreateIndex.resource(fts, store)
itemOps <- OItem(store, fts, createIndex, queue, joex)
itemSearchOps <- OItemSearch(store)
analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig)
regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, store)
updateCheck <- UpdateCheck.resource(httpClient)
notification <- ONotification(store, notificationMod)
sch <- SchedulerBuilder(cfg.scheduler, store)
.withQueue(queue)
.withPubSub(pubSubT)
.withEventSink(notificationMod) .withEventSink(notificationMod)
.withTask( .build
JobTask.json(
ProcessItemArgs.taskName, tasks <- JoexTasks.resource(
ItemHandler.newItem[F](cfg, itemOps, fts, analyser, regexNer), cfg,
ItemHandler.onCancel[F] jobStoreModule,
) httpClient,
) pubSubT,
.withTask( notificationMod,
JobTask.json( javaEmil
ReProcessItemArgs.taskName,
ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer),
ReProcessItem.onCancel[F]
)
)
.withTask(
JobTask.json(
ScanMailboxArgs.taskName,
ScanMailboxTask[F](cfg.userTasks.scanMailbox, javaEmil, upload, joex),
ScanMailboxTask.onCancel[F]
)
)
.withTask(
JobTask.json(
MigrationTask.taskName,
MigrationTask[F](cfg.fullTextSearch, fts, createIndex),
MigrationTask.onCancel[F]
)
)
.withTask(
JobTask.json(
ReIndexTask.taskName,
ReIndexTask[F](cfg.fullTextSearch, fts, createIndex),
ReIndexTask.onCancel[F]
)
)
.withTask(
JobTask.json(
HouseKeepingTask.taskName,
HouseKeepingTask[F](cfg),
HouseKeepingTask.onCancel[F]
)
)
.withTask(
JobTask.json(
PdfConvTask.taskName,
PdfConvTask[F](cfg),
PdfConvTask.onCancel[F]
)
)
.withTask(
JobTask.json(
ConvertAllPdfArgs.taskName,
ConvertAllPdfTask[F](queue, joex),
ConvertAllPdfTask.onCancel[F]
)
)
.withTask(
JobTask.json(
LearnClassifierArgs.taskName,
LearnClassifierTask[F](cfg.textAnalysis, analyser),
LearnClassifierTask.onCancel[F]
)
)
.withTask(
JobTask.json(
MakePreviewArgs.taskName,
MakePreviewTask[F](cfg.extraction.preview),
MakePreviewTask.onCancel[F]
)
)
.withTask(
JobTask.json(
AllPreviewsArgs.taskName,
AllPreviewsTask[F](queue, joex),
AllPreviewsTask.onCancel[F]
)
)
.withTask(
JobTask.json(
MakePageCountArgs.taskName,
MakePageCountTask[F](),
MakePageCountTask.onCancel[F]
)
)
.withTask(
JobTask.json(
AllPageCountTask.taskName,
AllPageCountTask[F](queue, joex),
AllPageCountTask.onCancel[F]
)
)
.withTask(
JobTask.json(
EmptyTrashArgs.taskName,
EmptyTrashTask[F](itemOps, itemSearchOps),
EmptyTrashTask.onCancel[F]
)
)
.withTask(
JobTask.json(
UpdateCheckTask.taskName,
UpdateCheckTask[F](
cfg.updateCheck,
cfg.sendMail,
javaEmil,
updateCheck,
ThisVersion.default
),
UpdateCheckTask.onCancel[F]
)
)
.withTask(
JobTask.json(
PeriodicQueryTask.taskName,
PeriodicQueryTask[F](notification),
PeriodicQueryTask.onCancel[F]
)
)
.withTask(
JobTask.json(
PeriodicDueItemsTask.taskName,
PeriodicDueItemsTask[F](notification),
PeriodicDueItemsTask.onCancel[F]
)
)
.resource
psch <- PeriodicScheduler.create(
cfg.periodicScheduler,
sch,
queue,
pstore,
joex
) )
schedulerModule <- SchedulerModuleBuilder(jobStoreModule)
.withSchedulerConfig(cfg.scheduler)
.withPeriodicSchedulerConfig(cfg.periodicScheduler)
.withTaskRegistry(tasks.get)
.resource
app = new JoexAppImpl( app = new JoexAppImpl(
cfg, cfg,
store, store,
queue, jobStoreModule.userTasks,
pubSubT, jobStoreModule.jobs,
pstore,
termSignal, termSignal,
notificationMod, notificationMod,
sch, schedulerModule.scheduler,
psch schedulerModule.periodicScheduler
) )
appR <- Resource.make(app.init.map(_ => app))(_.initShutdown) appR <- Resource.make(app.init.map(_ => app))(_.initShutdown)
} yield appR } yield appR
private def createFtsClient[F[_]: Async](
cfg: Config
)(client: Client[F]): Resource[F, FtsClient[F]] =
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
} }

View File

@ -41,7 +41,7 @@ object JoexServer {
store <- Store.create[F]( store <- Store.create[F](
cfg.jdbc, cfg.jdbc,
cfg.files.chunkSize, cfg.files.defaultFileRepositoryConfig,
pools.connectEC pools.connectEC
) )
settings <- Resource.eval(store.transact(RInternalSetting.create)) settings <- Resource.eval(store.transact(RInternalSetting.create))

View File

@ -0,0 +1,248 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex
import cats.effect.{Async, Resource}
import docspell.analysis.TextAnalyser
import docspell.backend.fulltext.CreateIndex
import docspell.backend.ops._
import docspell.common._
import docspell.ftsclient.FtsClient
import docspell.ftssolr.SolrFtsClient
import docspell.joex.analysis.RegexNerFile
import docspell.joex.emptytrash.EmptyTrashTask
import docspell.joex.filecopy.{FileCopyTask, FileIntegrityCheckTask}
import docspell.joex.fts.{MigrationTask, ReIndexTask}
import docspell.joex.hk.HouseKeepingTask
import docspell.joex.learn.LearnClassifierTask
import docspell.joex.notify.{PeriodicDueItemsTask, PeriodicQueryTask}
import docspell.joex.pagecount.{AllPageCountTask, MakePageCountTask}
import docspell.joex.pdfconv.{ConvertAllPdfTask, PdfConvTask}
import docspell.joex.preview.{AllPreviewsTask, MakePreviewTask}
import docspell.joex.process.{ItemHandler, ReProcessItem}
import docspell.joex.scanmailbox.ScanMailboxTask
import docspell.joex.updatecheck.{ThisVersion, UpdateCheck, UpdateCheckTask}
import docspell.notification.api.NotificationModule
import docspell.pubsub.api.PubSubT
import docspell.scheduler.impl.JobStoreModuleBuilder
import docspell.scheduler.{JobStoreModule, JobTask, JobTaskRegistry}
import docspell.store.Store
import emil.Emil
import org.http4s.client.Client
final class JoexTasks[F[_]: Async](
cfg: Config,
store: Store[F],
itemOps: OItem[F],
fts: FtsClient[F],
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F],
updateCheck: UpdateCheck[F],
notification: ONotification[F],
fileRepo: OFileRepository[F],
javaEmil: Emil[F],
jobStoreModule: JobStoreModule[F],
upload: OUpload[F],
createIndex: CreateIndex[F],
joex: OJoex[F],
itemSearch: OItemSearch[F]
) {
def get: JobTaskRegistry[F] =
JobTaskRegistry
.empty[F]
.withTask(
JobTask.json(
ProcessItemArgs.taskName,
ItemHandler.newItem[F](cfg, store, itemOps, fts, analyser, regexNer),
ItemHandler.onCancel[F](store)
)
)
.withTask(
JobTask.json(
ReProcessItemArgs.taskName,
ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer, store),
ReProcessItem.onCancel[F]
)
)
.withTask(
JobTask.json(
ScanMailboxArgs.taskName,
ScanMailboxTask[F](cfg.userTasks.scanMailbox, store, javaEmil, upload, joex),
ScanMailboxTask.onCancel[F]
)
)
.withTask(
JobTask.json(
MigrationTask.taskName,
MigrationTask[F](cfg.fullTextSearch, store, fts, createIndex),
MigrationTask.onCancel[F]
)
)
.withTask(
JobTask.json(
ReIndexTask.taskName,
ReIndexTask[F](cfg.fullTextSearch, store, fts, createIndex),
ReIndexTask.onCancel[F]
)
)
.withTask(
JobTask.json(
HouseKeepingTask.taskName,
HouseKeepingTask[F](cfg, store, fileRepo),
HouseKeepingTask.onCancel[F]
)
)
.withTask(
JobTask.json(
PdfConvTask.taskName,
PdfConvTask[F](cfg, store),
PdfConvTask.onCancel[F]
)
)
.withTask(
JobTask.json(
ConvertAllPdfArgs.taskName,
ConvertAllPdfTask[F](jobStoreModule.jobs, joex, store),
ConvertAllPdfTask.onCancel[F]
)
)
.withTask(
JobTask.json(
LearnClassifierArgs.taskName,
LearnClassifierTask[F](cfg.textAnalysis, store, analyser),
LearnClassifierTask.onCancel[F]
)
)
.withTask(
JobTask.json(
MakePreviewArgs.taskName,
MakePreviewTask[F](cfg.extraction.preview, store),
MakePreviewTask.onCancel[F]
)
)
.withTask(
JobTask.json(
AllPreviewsArgs.taskName,
AllPreviewsTask[F](jobStoreModule.jobs, joex, store),
AllPreviewsTask.onCancel[F]
)
)
.withTask(
JobTask.json(
MakePageCountArgs.taskName,
MakePageCountTask[F](store),
MakePageCountTask.onCancel[F]
)
)
.withTask(
JobTask.json(
AllPageCountTask.taskName,
AllPageCountTask[F](store, jobStoreModule.jobs, joex),
AllPageCountTask.onCancel[F]
)
)
.withTask(
JobTask.json(
EmptyTrashArgs.taskName,
EmptyTrashTask[F](itemOps, itemSearch),
EmptyTrashTask.onCancel[F]
)
)
.withTask(
JobTask.json(
UpdateCheckTask.taskName,
UpdateCheckTask[F](
cfg.updateCheck,
cfg.sendMail,
store,
javaEmil,
updateCheck,
ThisVersion.default
),
UpdateCheckTask.onCancel[F]
)
)
.withTask(
JobTask.json(
PeriodicQueryTask.taskName,
PeriodicQueryTask[F](store, notification),
PeriodicQueryTask.onCancel[F]
)
)
.withTask(
JobTask.json(
PeriodicDueItemsTask.taskName,
PeriodicDueItemsTask[F](store, notification),
PeriodicDueItemsTask.onCancel[F]
)
)
.withTask(
JobTask.json(
FileCopyTaskArgs.taskName,
FileCopyTask[F](cfg, store),
FileCopyTask.onCancel[F]
)
)
.withTask(
JobTask.json(
FileIntegrityCheckArgs.taskName,
FileIntegrityCheckTask[F](fileRepo, store),
FileIntegrityCheckTask.onCancel[F]
)
)
}
object JoexTasks {
def resource[F[_]: Async](
cfg: Config,
jobStoreModule: JobStoreModuleBuilder.Module[F],
httpClient: Client[F],
pubSub: PubSubT[F],
notificationModule: NotificationModule[F],
emailService: Emil[F]
): Resource[F, JoexTasks[F]] =
for {
joex <- OJoex(pubSub)
store = jobStoreModule.store
upload <- OUpload(store, jobStoreModule.jobs, joex)
fts <- createFtsClient(cfg)(httpClient)
createIndex <- CreateIndex.resource(fts, store)
itemOps <- OItem(store, fts, createIndex, jobStoreModule.jobs, joex)
itemSearchOps <- OItemSearch(store)
analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig)
regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, store)
updateCheck <- UpdateCheck.resource(httpClient)
notification <- ONotification(store, notificationModule)
fileRepo <- OFileRepository(store, jobStoreModule.jobs, joex)
} yield new JoexTasks[F](
cfg,
store,
itemOps,
fts,
analyser,
regexNer,
updateCheck,
notification,
fileRepo,
emailService,
jobStoreModule,
upload,
createIndex,
joex,
itemSearchOps
)
private def createFtsClient[F[_]: Async](
cfg: Config
)(client: Client[F]): Resource[F, FtsClient[F]] =
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
}

View File

@ -31,7 +31,8 @@ object Main extends IOApp {
Option(System.getProperty("config.file")), Option(System.getProperty("config.file")),
cfg.appId, cfg.appId,
cfg.baseUrl, cfg.baseUrl,
Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled) Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled),
cfg.files.defaultStoreConfig
) )
_ <- logger.info(s"\n${banner.render("***>")}") _ <- logger.info(s"\n${banner.render("***>")}")
_ <- _ <-

View File

@ -12,9 +12,9 @@ import fs2.Stream
import docspell.backend.ops.{OItem, OItemSearch} import docspell.backend.ops.{OItem, OItemSearch}
import docspell.common._ import docspell.common._
import docspell.joex.scheduler._ import docspell.scheduler._
import docspell.scheduler.usertask.UserTask
import docspell.store.records.RItem import docspell.store.records.RItem
import docspell.store.usertask.UserTask
import com.github.eikek.calev.CalEvent import com.github.eikek.calev.CalEvent

View File

@ -0,0 +1,144 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex.filecopy
import cats.data.NonEmptyList
import cats.effect._
import cats.implicits._
import docspell.common.FileCopyTaskArgs.Selection
import docspell.common.{FileCopyTaskArgs, Ident}
import docspell.joex.Config
import docspell.logging.Logger
import docspell.scheduler.{JobTaskResultEncoder, Task}
import docspell.store.Store
import docspell.store.file.{BinnyUtils, FileRepository, FileRepositoryConfig}
import binny.CopyTool.Counter
import binny.{BinaryId, BinaryStore, CopyTool}
import io.circe.generic.semiauto.deriveCodec
import io.circe.{Codec, Decoder, Encoder}
object FileCopyTask {
type Args = FileCopyTaskArgs
case class CopyResult(success: Boolean, message: String, counter: List[Counter])
object CopyResult {
def noSourceImpl: CopyResult =
CopyResult(false, "No source BinaryStore implementation found!", Nil)
def noTargetImpl: CopyResult =
CopyResult(false, "No target BinaryStore implementation found!", Nil)
def noSourceStore(id: Ident): CopyResult =
CopyResult(
false,
s"No source file repo found with id: ${id.id}. Make sure it is present in the config.",
Nil
)
def noTargetStore: CopyResult =
CopyResult(false, "No target file repositories defined", Nil)
def success(counter: NonEmptyList[Counter]): CopyResult =
CopyResult(true, "Done", counter.toList)
implicit val binaryIdCodec: Codec[BinaryId] =
Codec.from(
Decoder.decodeString.map(BinaryId.apply),
Encoder.encodeString.contramap(_.id)
)
implicit val counterEncoder: Codec[Counter] =
deriveCodec
implicit val jsonCodec: Codec[CopyResult] =
deriveCodec
implicit val jobTaskResultEncoder: JobTaskResultEncoder[CopyResult] =
JobTaskResultEncoder.fromJson[CopyResult].withMessage { result =>
val allGood = result.counter.map(_.success).sum
val failed = result.counter.map(_.failed.size).sum
if (result.success)
s"Successfully copied $allGood files to ${result.counter.size} stores."
else
s"Copying files failed for ${failed} files! ${allGood} were copied successfully."
}
}
def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn(s"Cancelling ${FileCopyTaskArgs.taskName.id} task"))
def apply[F[_]: Async](cfg: Config, store: Store[F]): Task[F, Args, CopyResult] =
Task { ctx =>
val src = ctx.args.from
.map(id =>
cfg.files.getFileRepositoryConfig(id).toRight(CopyResult.noSourceStore(id))
)
.getOrElse(Right(cfg.files.defaultFileRepositoryConfig))
val targets = ctx.args.to match {
case Selection.All =>
cfg.files.enabledStores.values.toList
.map(FileRepositoryConfig.fromFileStoreConfig(cfg.files.chunkSize, _))
case Selection.Stores(ids) =>
ids.traverse(cfg.files.getFileRepositoryConfig).map(_.toList).getOrElse(Nil)
}
// remove source from targets if present there
val data =
for {
srcConfig <- src
trgConfig <- NonEmptyList
.fromList(targets.filter(_ != srcConfig))
.toRight(CopyResult.noTargetStore)
srcRepo = store.createFileRepository(srcConfig, true)
targetRepos = trgConfig.map(store.createFileRepository(_, false))
} yield (srcRepo, targetRepos)
data match {
case Right((from, tos)) =>
ctx.logger.info(s"Start copying all files from $from") *>
copy(ctx.logger, from, tos).flatTap(r =>
if (r.success) ctx.logger.info(s"Copying finished: ${r.counter}")
else ctx.logger.error(s"Copying failed: $r")
)
case Left(res) =>
ctx.logger.error(s"Copying failed: $res") *> res.pure[F]
}
}
def copy[F[_]: Async](
logger: Logger[F],
from: FileRepository[F],
to: NonEmptyList[FileRepository[F]]
): F[CopyResult] =
FileRepository.getDelegate(from) match {
case None =>
CopyResult.noSourceImpl.pure[F]
case Some((src, srcMeta)) =>
to.traverse(FileRepository.getDelegate).map(_.map(_._1)) match {
case None =>
CopyResult.noTargetImpl.pure[F]
case Some(targets) =>
val log = BinnyUtils.LoggerAdapter(logger)
val maxConcurrent = {
val nCores = Runtime.getRuntime.availableProcessors()
if (nCores > 2) nCores / 2 else 1
}
def copyTo(to: BinaryStore[F]) =
CopyTool.copyAll[F](log, src, srcMeta, to, 50, maxConcurrent)
logger.info(s"Start copying ${from.config} -> ${to.map(_.config)}") *>
targets.traverse(copyTo).map(CopyResult.success)
}
}
}

View File

@ -0,0 +1,91 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex.filecopy
import cats.Monoid
import cats.effect._
import cats.implicits._
import docspell.backend.ops.OFileRepository
import docspell.backend.ops.OFileRepository.IntegrityResult
import docspell.common.{FileIntegrityCheckArgs, FileKey}
import docspell.scheduler.{JobTaskResultEncoder, Task}
import docspell.store.Store
import docspell.store.records.RFileMeta
import io.circe.Encoder
import io.circe.generic.semiauto.deriveEncoder
object FileIntegrityCheckTask {
type Args = FileIntegrityCheckArgs
case class Result(ok: Int, failedKeys: Set[FileKey], notFoundKeys: Set[FileKey]) {
override def toString: String =
s"Result(ok=$ok, failed=${failedKeys.size}, notFound=${notFoundKeys.size}, " +
s"keysFailed=$failedKeys, notFoundKeys=$notFoundKeys)"
}
object Result {
val empty = Result(0, Set.empty, Set.empty)
def notFound(key: FileKey) = Result(0, Set.empty, Set(key))
def from(r: IntegrityResult): Result =
if (r.ok) Result(1, Set.empty, Set.empty) else Result(0, Set(r.key), Set.empty)
implicit val monoid: Monoid[Result] =
Monoid.instance(
empty,
(a, b) =>
Result(
a.ok + b.ok,
a.failedKeys ++ b.failedKeys,
a.notFoundKeys ++ b.notFoundKeys
)
)
implicit val jsonEncoder: Encoder[Result] =
deriveEncoder
implicit val jobTaskResultEncoder: JobTaskResultEncoder[Result] =
JobTaskResultEncoder.fromJson[Result].withMessage { result =>
s"Integrity check finished. Ok: ${result.ok}, " +
s"Failed: ${result.failedKeys.size}, Not found: ${result.notFoundKeys.size}"
}
}
def apply[F[_]: Sync](ops: OFileRepository[F], store: Store[F]): Task[F, Args, Result] =
Task { ctx =>
store
.transact(
RFileMeta
.findAll(ctx.args.pattern, 50)
)
.chunks
.evalTap(c => ctx.logger.info(s"Checking next ${c.size} files…"))
.unchunks
.evalMap(meta =>
ops.checkIntegrity(meta.id, meta.checksum.some).flatMap {
case Some(r) =>
Result.from(r).pure[F]
case None =>
ctx.logger
.error(s"File '${meta.id.toString}' not found in file repository")
.as(Result.notFound(meta.id))
}
)
.foldMonoid
.compile
.lastOrError
.flatTap(result =>
ctx.logger
.infoWith(s"File check result: $result")(_.data("integrityCheck", result))
)
}
def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn(s"Cancelling ${FileIntegrityCheckArgs.taskName.id} task"))
}

View File

@ -9,25 +9,13 @@ package docspell.joex.fts
import docspell.backend.fulltext.CreateIndex import docspell.backend.fulltext.CreateIndex
import docspell.ftsclient.FtsClient import docspell.ftsclient.FtsClient
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.scheduler.Context
import docspell.logging.Logger import docspell.logging.Logger
import docspell.store.Store import docspell.store.Store
case class FtsContext[F[_]]( final case class FtsContext[F[_]](
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
store: Store[F], store: Store[F],
fulltext: CreateIndex[F], fulltext: CreateIndex[F],
fts: FtsClient[F], fts: FtsClient[F],
logger: Logger[F] logger: Logger[F]
) )
object FtsContext {
def apply[F[_]](
cfg: Config.FullTextSearch,
fts: FtsClient[F],
fulltext: CreateIndex[F],
ctx: Context[F, _]
): FtsContext[F] =
FtsContext(cfg, ctx.store, fulltext, fts, ctx.logger)
}

View File

@ -14,8 +14,9 @@ import docspell.backend.fulltext.CreateIndex
import docspell.common._ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.scheduler.Context
import docspell.logging.Logger import docspell.logging.Logger
import docspell.scheduler.Context
import docspell.store.Store
object FtsWork { object FtsWork {
import syntax._ import syntax._
@ -106,10 +107,11 @@ object FtsWork {
def forContext( def forContext(
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
store: Store[F],
fts: FtsClient[F], fts: FtsClient[F],
fulltext: CreateIndex[F] fulltext: CreateIndex[F]
): Kleisli[F, Context[F, _], Unit] = ): Kleisli[F, Context[F, _], Unit] =
mt.local(ctx => FtsContext(cfg, fts, fulltext, ctx)) mt.local(ctx => FtsContext(cfg, store, fulltext, fts, ctx.logger))
} }
} }
} }

View File

@ -13,14 +13,15 @@ import docspell.backend.fulltext.CreateIndex
import docspell.common._ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.scheduler.Task import docspell.scheduler.{Job, Task}
import docspell.store.records.RJob import docspell.store.Store
object MigrationTask { object MigrationTask {
val taskName = Ident.unsafe("full-text-index") val taskName = Ident.unsafe("full-text-index")
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
store: Store[F],
fts: FtsClient[F], fts: FtsClient[F],
createIndex: CreateIndex[F] createIndex: CreateIndex[F]
): Task[F, Unit, Unit] = ): Task[F, Unit, Unit] =
@ -30,7 +31,7 @@ object MigrationTask {
Task(ctx => Task(ctx =>
for { for {
migs <- migrationTasks[F](fts) migs <- migrationTasks[F](fts)
res <- Migration[F](cfg, fts, ctx.store, createIndex, ctx.logger).run(migs) res <- Migration[F](cfg, fts, store, createIndex, ctx.logger).run(migs)
} yield res } yield res
) )
) )
@ -38,21 +39,18 @@ object MigrationTask {
def onCancel[F[_]]: Task[F, Unit, Unit] = def onCancel[F[_]]: Task[F, Unit, Unit] =
Task.log[F, Unit](_.warn("Cancelling full-text-index task")) Task.log[F, Unit](_.warn("Cancelling full-text-index task"))
def job[F[_]: Sync]: F[RJob] = def job[F[_]: Sync]: F[Job[String]] =
for { Job
id <- Ident.randomId[F] .createNew(
now <- Timestamp.current[F] taskName,
} yield RJob.newJob( DocspellSystem.taskGroup,
id, (),
taskName, "Create full-text index",
DocspellSystem.taskGroup, DocspellSystem.taskGroup,
(), Priority.Low,
"Create full-text index", Some(DocspellSystem.migrationTaskTracker)
now, )
DocspellSystem.taskGroup, .map(_.encode)
Priority.Low,
Some(DocspellSystem.migrationTaskTracker)
)
def migrationTasks[F[_]: Async](fts: FtsClient[F]): F[List[Migration[F]]] = def migrationTasks[F[_]: Async](fts: FtsClient[F]): F[List[Migration[F]]] =
fts.initialize.map(_.map(fm => Migration.from(fm))) fts.initialize.map(_.map(fm => Migration.from(fm)))

View File

@ -7,13 +7,15 @@
package docspell.joex.fts package docspell.joex.fts
import cats.effect._ import cats.effect._
import cats.implicits._
import docspell.backend.fulltext.CreateIndex import docspell.backend.fulltext.CreateIndex
import docspell.common._ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.fts.FtsWork.syntax._ import docspell.joex.fts.FtsWork.syntax._
import docspell.joex.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
object ReIndexTask { object ReIndexTask {
type Args = ReIndexTaskArgs type Args = ReIndexTaskArgs
@ -23,6 +25,7 @@ object ReIndexTask {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
store: Store[F],
fts: FtsClient[F], fts: FtsClient[F],
fulltext: CreateIndex[F] fulltext: CreateIndex[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
@ -30,7 +33,7 @@ object ReIndexTask {
.log[F, Args](_.info(s"Running full-text re-index now")) .log[F, Args](_.info(s"Running full-text re-index now"))
.flatMap(_ => .flatMap(_ =>
Task(ctx => Task(ctx =>
clearData[F](ctx.args.collective).forContext(cfg, fts, fulltext).run(ctx) clearData[F](ctx.args.collective).forContext(cfg, store, fts, fulltext).run(ctx)
) )
) )
@ -42,7 +45,7 @@ object ReIndexTask {
(collective match { (collective match {
case Some(_) => case Some(_) =>
FtsWork FtsWork
.clearIndex(collective) .clearIndex[F](collective)
.recoverWith( .recoverWith(
FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing.")) FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing."))
) ++ ) ++

View File

@ -10,44 +10,51 @@ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.logging.Logger import docspell.logging.Logger
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
import org.http4s.blaze.client.BlazeClientBuilder import org.http4s.blaze.client.BlazeClientBuilder
import org.http4s.client.Client import org.http4s.client.Client
object CheckNodesTask { object CheckNodesTask {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: HouseKeepingConfig.CheckNodes cfg: HouseKeepingConfig.CheckNodes,
): Task[F, Unit, Unit] = store: Store[F]
): Task[F, Unit, CleanupResult] =
Task { ctx => Task { ctx =>
if (cfg.enabled) if (cfg.enabled)
for { for {
_ <- ctx.logger.info("Check nodes reachability") _ <- ctx.logger.info("Check nodes reachability")
ec = scala.concurrent.ExecutionContext.global ec = scala.concurrent.ExecutionContext.global
_ <- BlazeClientBuilder[F].withExecutionContext(ec).resource.use { client => _ <- BlazeClientBuilder[F].withExecutionContext(ec).resource.use { client =>
checkNodes(ctx, client) checkNodes(ctx.logger, store, client)
} }
_ <- ctx.logger.info( _ <- ctx.logger.info(
s"Remove nodes not found more than ${cfg.minNotFound} times" s"Remove nodes not found more than ${cfg.minNotFound} times"
) )
n <- removeNodes(ctx, cfg) n <- removeNodes(store, cfg)
_ <- ctx.logger.info(s"Removed $n nodes") _ <- ctx.logger.info(s"Removed $n nodes")
} yield () } yield CleanupResult.of(n)
else else
ctx.logger.info("CheckNodes task is disabled in the configuration") ctx.logger.info("CheckNodes task is disabled in the configuration") *>
CleanupResult.disabled.pure[F]
} }
def checkNodes[F[_]: Async](ctx: Context[F, _], client: Client[F]): F[Unit] = def checkNodes[F[_]: Async](
ctx.store logger: Logger[F],
store: Store[F],
client: Client[F]
): F[Unit] =
store
.transact(RNode.streamAll) .transact(RNode.streamAll)
.evalMap(node => .evalMap(node =>
checkNode(ctx.logger, client)(node.url) checkNode(logger, client)(node.url)
.flatMap(seen => .flatMap(seen =>
if (seen) ctx.store.transact(RNode.resetNotFound(node.id)) if (seen) store.transact(RNode.resetNotFound(node.id))
else ctx.store.transact(RNode.incrementNotFound(node.id)) else store.transact(RNode.incrementNotFound(node.id))
) )
) )
.compile .compile
@ -67,9 +74,9 @@ object CheckNodesTask {
} }
def removeNodes[F[_]]( def removeNodes[F[_]](
ctx: Context[F, _], store: Store[F],
cfg: HouseKeepingConfig.CheckNodes cfg: HouseKeepingConfig.CheckNodes
): F[Int] = ): F[Int] =
ctx.store.transact(RNode.deleteNotFound(cfg.minNotFound)) store.transact(RNode.deleteNotFound(cfg.minNotFound))
} }

View File

@ -10,22 +10,27 @@ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
object CleanupInvitesTask { object CleanupInvitesTask {
def apply[F[_]: Sync](cfg: HouseKeepingConfig.CleanupInvites): Task[F, Unit, Unit] = def apply[F[_]: Sync](
cfg: HouseKeepingConfig.CleanupInvites,
store: Store[F]
): Task[F, Unit, CleanupResult] =
Task { ctx => Task { ctx =>
if (cfg.enabled) if (cfg.enabled)
for { for {
now <- Timestamp.current[F] now <- Timestamp.current[F]
ts = now - cfg.olderThan ts = now - cfg.olderThan
_ <- ctx.logger.info(s"Cleanup invitations older than $ts") _ <- ctx.logger.info(s"Cleanup invitations older than $ts")
n <- ctx.store.transact(RInvitation.deleteOlderThan(ts)) n <- store.transact(RInvitation.deleteOlderThan(ts))
_ <- ctx.logger.info(s"Removed $n invitations") _ <- ctx.logger.info(s"Removed $n invitations")
} yield () } yield CleanupResult.of(n)
else else
ctx.logger.info("CleanupInvites task is disabled in the configuration") ctx.logger.info("CleanupInvites task is disabled in the configuration") *>
CleanupResult.disabled.pure[F]
} }
} }

View File

@ -11,24 +11,28 @@ import cats.implicits._
import fs2.Stream import fs2.Stream
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
object CleanupJobsTask { object CleanupJobsTask {
def apply[F[_]: Sync](cfg: HouseKeepingConfig.CleanupJobs): Task[F, Unit, Unit] = def apply[F[_]: Sync](
cfg: HouseKeepingConfig.CleanupJobs,
store: Store[F]
): Task[F, Unit, CleanupResult] =
Task { ctx => Task { ctx =>
if (cfg.enabled) if (cfg.enabled)
for { for {
now <- Timestamp.current[F] now <- Timestamp.current[F]
ts = now - cfg.olderThan ts = now - cfg.olderThan
_ <- ctx.logger.info(s"Cleanup jobs older than $ts") _ <- ctx.logger.info(s"Cleanup jobs older than $ts")
n <- deleteDoneJobs(ctx.store, ts, cfg.deleteBatch) n <- deleteDoneJobs(store, ts, cfg.deleteBatch)
_ <- ctx.logger.info(s"Removed $n jobs") _ <- ctx.logger.info(s"Removed $n jobs")
} yield () } yield CleanupResult.of(n)
else else
ctx.logger.info("CleanupJobs task is disabled in the configuration") ctx.logger.info("CleanupJobs task is disabled in the configuration") *>
CleanupResult.disabled.pure[F]
} }
def deleteDoneJobs[F[_]: Sync](store: Store[F], ts: Timestamp, batch: Int): F[Int] = def deleteDoneJobs[F[_]: Sync](store: Store[F], ts: Timestamp, batch: Int): F[Int] =

View File

@ -10,22 +10,26 @@ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
object CleanupRememberMeTask { object CleanupRememberMeTask {
def apply[F[_]: Sync](
def apply[F[_]: Sync](cfg: HouseKeepingConfig.CleanupRememberMe): Task[F, Unit, Unit] = cfg: HouseKeepingConfig.CleanupRememberMe,
store: Store[F]
): Task[F, Unit, CleanupResult] =
Task { ctx => Task { ctx =>
if (cfg.enabled) if (cfg.enabled)
for { for {
now <- Timestamp.current[F] now <- Timestamp.current[F]
ts = now - cfg.olderThan ts = now - cfg.olderThan
_ <- ctx.logger.info(s"Cleanup remember-me tokens older than $ts") _ <- ctx.logger.info(s"Cleanup remember-me tokens older than $ts")
n <- ctx.store.transact(RRememberMe.deleteOlderThan(ts)) n <- store.transact(RRememberMe.deleteOlderThan(ts))
_ <- ctx.logger.info(s"Removed $n tokens") _ <- ctx.logger.info(s"Removed $n tokens")
} yield () } yield CleanupResult.of(n)
else else
ctx.logger.info("CleanupRememberMe task is disabled in the configuration") ctx.logger.info("CleanupRememberMe task is disabled in the configuration") *>
CleanupResult.disabled.pure[F]
} }
} }

View File

@ -0,0 +1,21 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex.hk
import io.circe.Encoder
import io.circe.generic.semiauto.deriveEncoder
case class CleanupResult(removed: Int, disabled: Boolean) {
def asString = if (disabled) "disabled" else s"$removed"
}
object CleanupResult {
def of(n: Int): CleanupResult = CleanupResult(n, false)
def disabled: CleanupResult = CleanupResult(0, true)
implicit val jsonEncoder: Encoder[CleanupResult] =
deriveEncoder
}

View File

@ -16,7 +16,8 @@ case class HouseKeepingConfig(
cleanupInvites: CleanupInvites, cleanupInvites: CleanupInvites,
cleanupJobs: CleanupJobs, cleanupJobs: CleanupJobs,
cleanupRememberMe: CleanupRememberMe, cleanupRememberMe: CleanupRememberMe,
checkNodes: CheckNodes checkNodes: CheckNodes,
integrityCheck: IntegrityCheck
) )
object HouseKeepingConfig { object HouseKeepingConfig {
@ -29,4 +30,5 @@ object HouseKeepingConfig {
case class CheckNodes(enabled: Boolean, minNotFound: Int) case class CheckNodes(enabled: Boolean, minNotFound: Int)
case class IntegrityCheck(enabled: Boolean)
} }

View File

@ -9,41 +9,75 @@ package docspell.joex.hk
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.backend.ops.OFileRepository
import docspell.common._ import docspell.common._
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.scheduler.Task import docspell.joex.filecopy.FileIntegrityCheckTask
import docspell.store.records._ import docspell.scheduler.usertask.UserTask
import docspell.store.usertask.UserTaskScope import docspell.scheduler.{JobTaskResultEncoder, Task}
import docspell.store.Store
import com.github.eikek.calev._ import com.github.eikek.calev._
import io.circe.Encoder
import io.circe.generic.semiauto.deriveEncoder
object HouseKeepingTask { object HouseKeepingTask {
private val periodicId = Ident.unsafe("docspell-houskeeping") private val periodicId = Ident.unsafe("docspell-houskeeping")
val taskName: Ident = Ident.unsafe("housekeeping") val taskName: Ident = Ident.unsafe("housekeeping")
def apply[F[_]: Async](cfg: Config): Task[F, Unit, Unit] = def apply[F[_]: Async](
cfg: Config,
store: Store[F],
fileRepo: OFileRepository[F]
): Task[F, Unit, Result] = {
val combined =
(
CheckNodesTask(cfg.houseKeeping.checkNodes, store),
CleanupInvitesTask(cfg.houseKeeping.cleanupInvites, store),
CleanupJobsTask(cfg.houseKeeping.cleanupJobs, store),
CleanupRememberMeTask(cfg.houseKeeping.cleanupRememberMe, store),
IntegrityCheckTask(cfg.houseKeeping.integrityCheck, store, fileRepo)
).mapN(Result.apply)
Task Task
.log[F, Unit](_.info(s"Running house-keeping task now")) .log[F, Unit](_.info(s"Running house-keeping task now"))
.flatMap(_ => CleanupInvitesTask(cfg.houseKeeping.cleanupInvites)) .flatMap(_ => combined)
.flatMap(_ => CleanupRememberMeTask(cfg.houseKeeping.cleanupRememberMe)) }
.flatMap(_ => CleanupJobsTask(cfg.houseKeeping.cleanupJobs))
.flatMap(_ => CheckNodesTask(cfg.houseKeeping.checkNodes))
def onCancel[F[_]]: Task[F, Unit, Unit] = def onCancel[F[_]]: Task[F, Unit, Unit] =
Task.log[F, Unit](_.warn("Cancelling house-keeping task")) Task.log[F, Unit](_.warn("Cancelling house-keeping task"))
def periodicTask[F[_]: Sync](ce: CalEvent): F[RPeriodicTask] = def periodicTask[F[_]: Sync](ce: CalEvent): F[UserTask[Unit]] =
RPeriodicTask UserTask(
.createJson( periodicId,
true, taskName,
UserTaskScope(DocspellSystem.taskGroup), true,
taskName, ce,
(), "Docspell house-keeping".some,
"Docspell house-keeping", ()
Priority.Low, ).pure[F]
ce,
None case class Result(
) checkNodes: CleanupResult,
.map(_.copy(id = periodicId)) cleanupInvites: CleanupResult,
cleanupJobs: CleanupResult,
cleanupRememberMe: CleanupResult,
integrityCheck: FileIntegrityCheckTask.Result
)
object Result {
implicit val jsonEncoder: Encoder[Result] =
deriveEncoder
implicit val jobTaskResultEncoder: JobTaskResultEncoder[Result] =
JobTaskResultEncoder.fromJson[Result].withMessage { r =>
s"- Nodes removed: ${r.checkNodes.asString}\n" +
s"- Invites removed: ${r.cleanupInvites.asString}\n" +
s"- Jobs removed: ${r.cleanupJobs.asString}\n" +
s"- RememberMe removed: ${r.cleanupRememberMe.asString}\n" +
s"- Integrity check: ok=${r.integrityCheck.ok}, failed=${r.integrityCheck.failedKeys.size}, notFound=${r.integrityCheck.notFoundKeys.size}"
}
}
} }

View File

@ -0,0 +1,34 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex.hk
import cats.effect._
import cats.implicits._
import docspell.backend.ops.OFileRepository
import docspell.common._
import docspell.joex.filecopy.FileIntegrityCheckTask
import docspell.scheduler.Task
import docspell.store.Store
object IntegrityCheckTask {
def apply[F[_]: Sync](
cfg: HouseKeepingConfig.IntegrityCheck,
store: Store[F],
fileRepo: OFileRepository[F]
): Task[F, Unit, FileIntegrityCheckTask.Result] =
Task { ctx =>
if (cfg.enabled)
FileIntegrityCheckTask(fileRepo, store).run(
ctx.map(_ => FileIntegrityCheckArgs(FileKeyPart.Empty))
)
else
ctx.logger.info("Integrity check task is disabled in the configuration") *>
FileIntegrityCheckTask.Result.empty.pure[F]
}
}

View File

@ -14,8 +14,9 @@ import docspell.analysis.TextAnalyser
import docspell.backend.ops.OCollective import docspell.backend.ops.OCollective
import docspell.common._ import docspell.common._
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.scheduler._
import docspell.logging.Logger import docspell.logging.Logger
import docspell.scheduler._
import docspell.store.Store
import docspell.store.records.{RClassifierModel, RClassifierSetting} import docspell.store.records.{RClassifierModel, RClassifierSetting}
object LearnClassifierTask { object LearnClassifierTask {
@ -29,14 +30,16 @@ object LearnClassifierTask {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
store: Store[F],
analyser: TextAnalyser[F] analyser: TextAnalyser[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
learnTags(cfg, analyser) learnTags(cfg, store, analyser)
.flatMap(_ => learnItemEntities(cfg, analyser)) .flatMap(_ => learnItemEntities(cfg, store, analyser))
.flatMap(_ => Task(_ => Sync[F].delay(System.gc()))) .flatMap(_ => Task(_ => Sync[F].delay(System.gc())))
private def learnItemEntities[F[_]: Async]( private def learnItemEntities[F[_]: Async](
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
store: Store[F],
analyser: TextAnalyser[F] analyser: TextAnalyser[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
@ -44,6 +47,7 @@ object LearnClassifierTask {
LearnItemEntities LearnItemEntities
.learnAll( .learnAll(
analyser, analyser,
store,
ctx.args.collective, ctx.args.collective,
cfg.classification.itemCount, cfg.classification.itemCount,
cfg.maxLength cfg.maxLength
@ -54,16 +58,17 @@ object LearnClassifierTask {
private def learnTags[F[_]: Async]( private def learnTags[F[_]: Async](
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
store: Store[F],
analyser: TextAnalyser[F] analyser: TextAnalyser[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
val learnTags = val learnTags =
for { for {
sett <- findActiveSettings[F](ctx, cfg) sett <- findActiveSettings[F](ctx, store, cfg)
maxItems = cfg.classification.itemCountOrWhenLower(sett.itemCount) maxItems = cfg.classification.itemCountOrWhenLower(sett.itemCount)
_ <- OptionT.liftF( _ <- OptionT.liftF(
LearnTags LearnTags
.learnAllTagCategories(analyser)( .learnAllTagCategories(analyser, store)(
ctx.args.collective, ctx.args.collective,
maxItems, maxItems,
cfg.maxLength cfg.maxLength
@ -74,34 +79,38 @@ object LearnClassifierTask {
// learn classifier models from active tag categories // learn classifier models from active tag categories
learnTags.getOrElseF(logInactiveWarning(ctx.logger)) *> learnTags.getOrElseF(logInactiveWarning(ctx.logger)) *>
// delete classifier model files for categories that have been removed // delete classifier model files for categories that have been removed
clearObsoleteTagModels(ctx) *> clearObsoleteTagModels(ctx, store) *>
// when tags are deleted, categories may get removed. fix the json array // when tags are deleted, categories may get removed. fix the json array
ctx.store store
.transact(RClassifierSetting.fixCategoryList(ctx.args.collective)) .transact(RClassifierSetting.fixCategoryList(ctx.args.collective))
.map(_ => ()) .map(_ => ())
} }
private def clearObsoleteTagModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] = private def clearObsoleteTagModels[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F]
): F[Unit] =
for { for {
list <- ctx.store.transact( list <- store.transact(
ClassifierName.findOrphanTagModels(ctx.args.collective) ClassifierName.findOrphanTagModels(ctx.args.collective)
) )
_ <- ctx.logger.info( _ <- ctx.logger.info(
s"Found ${list.size} obsolete model files that are deleted now." s"Found ${list.size} obsolete model files that are deleted now."
) )
n <- ctx.store.transact(RClassifierModel.deleteAll(list.map(_.id))) n <- store.transact(RClassifierModel.deleteAll(list.map(_.id)))
_ <- list _ <- list
.map(_.fileId) .map(_.fileId)
.traverse(id => ctx.store.fileRepo.delete(id)) .traverse(id => store.fileRepo.delete(id))
_ <- ctx.logger.debug(s"Deleted $n model files.") _ <- ctx.logger.debug(s"Deleted $n model files.")
} yield () } yield ()
private def findActiveSettings[F[_]: Sync]( private def findActiveSettings[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis cfg: Config.TextAnalysis
): OptionT[F, OCollective.Classifier] = ): OptionT[F, OCollective.Classifier] =
if (cfg.classification.enabled) if (cfg.classification.enabled)
OptionT(ctx.store.transact(RClassifierSetting.findById(ctx.args.collective))) OptionT(store.transact(RClassifierSetting.findById(ctx.args.collective)))
.filter(_.autoTagEnabled) .filter(_.autoTagEnabled)
.map(OCollective.Classifier.fromRecord) .map(OCollective.Classifier.fromRecord)
else else

View File

@ -14,72 +14,81 @@ import fs2.Stream
import docspell.analysis.TextAnalyser import docspell.analysis.TextAnalyser
import docspell.analysis.classifier.TextClassifier.Data import docspell.analysis.classifier.TextClassifier.Data
import docspell.common._ import docspell.common._
import docspell.joex.scheduler._ import docspell.scheduler._
import docspell.store.Store
object LearnItemEntities { object LearnItemEntities {
def learnAll[F[_]: Async, A]( def learnAll[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
store: Store[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
): Task[F, A, Unit] = ): Task[F, A, Unit] =
learnCorrOrg(analyser, collective, maxItems, maxTextLen) learnCorrOrg[F, A](analyser, store, collective, maxItems, maxTextLen)
.flatMap(_ => learnCorrPerson[F, A](analyser, collective, maxItems, maxTextLen)) .flatMap(_ =>
.flatMap(_ => learnConcPerson(analyser, collective, maxItems, maxTextLen)) learnCorrPerson[F, A](analyser, store, collective, maxItems, maxTextLen)
.flatMap(_ => learnConcEquip(analyser, collective, maxItems, maxTextLen)) )
.flatMap(_ => learnConcPerson(analyser, store, collective, maxItems, maxTextLen))
.flatMap(_ => learnConcEquip(analyser, store, collective, maxItems, maxTextLen))
def learnCorrOrg[F[_]: Async, A]( def learnCorrOrg[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
store: Store[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
): Task[F, A, Unit] = ): Task[F, A, Unit] =
learn(analyser, collective)( learn(store, analyser, collective)(
ClassifierName.correspondentOrg, ClassifierName.correspondentOrg,
ctx => SelectItems.forCorrOrg(ctx.store, collective, maxItems, maxTextLen) _ => SelectItems.forCorrOrg(store, collective, maxItems, maxTextLen)
) )
def learnCorrPerson[F[_]: Async, A]( def learnCorrPerson[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
store: Store[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
): Task[F, A, Unit] = ): Task[F, A, Unit] =
learn(analyser, collective)( learn(store, analyser, collective)(
ClassifierName.correspondentPerson, ClassifierName.correspondentPerson,
ctx => SelectItems.forCorrPerson(ctx.store, collective, maxItems, maxTextLen) _ => SelectItems.forCorrPerson(store, collective, maxItems, maxTextLen)
) )
def learnConcPerson[F[_]: Async, A]( def learnConcPerson[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
store: Store[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
): Task[F, A, Unit] = ): Task[F, A, Unit] =
learn(analyser, collective)( learn(store, analyser, collective)(
ClassifierName.concernedPerson, ClassifierName.concernedPerson,
ctx => SelectItems.forConcPerson(ctx.store, collective, maxItems, maxTextLen) _ => SelectItems.forConcPerson(store, collective, maxItems, maxTextLen)
) )
def learnConcEquip[F[_]: Async, A]( def learnConcEquip[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
store: Store[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
): Task[F, A, Unit] = ): Task[F, A, Unit] =
learn(analyser, collective)( learn(store, analyser, collective)(
ClassifierName.concernedEquip, ClassifierName.concernedEquip,
ctx => SelectItems.forConcEquip(ctx.store, collective, maxItems, maxTextLen) _ => SelectItems.forConcEquip(store, collective, maxItems, maxTextLen)
) )
private def learn[F[_]: Async, A]( private def learn[F[_]: Async, A](
store: Store[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
collective: Ident collective: Ident
)(cname: ClassifierName, data: Context[F, _] => Stream[F, Data]): Task[F, A, Unit] = )(cname: ClassifierName, data: Context[F, _] => Stream[F, Data]): Task[F, A, Unit] =
Task { ctx => Task { ctx =>
ctx.logger.info(s"Learn classifier ${cname.name}") *> ctx.logger.info(s"Learn classifier ${cname.name}") *>
analyser.classifier.trainClassifier(ctx.logger, data(ctx))( analyser.classifier.trainClassifier(ctx.logger, data(ctx))(
Kleisli(StoreClassifierModel.handleModel(ctx, collective, cname)) Kleisli(StoreClassifierModel.handleModel(store, ctx.logger, collective, cname))
) )
} }
} }

View File

@ -12,13 +12,15 @@ import cats.implicits._
import docspell.analysis.TextAnalyser import docspell.analysis.TextAnalyser
import docspell.common._ import docspell.common._
import docspell.joex.scheduler._ import docspell.scheduler._
import docspell.store.Store
import docspell.store.records.RClassifierSetting import docspell.store.records.RClassifierSetting
object LearnTags { object LearnTags {
def learnTagCategory[F[_]: Async, A]( def learnTagCategory[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
store: Store[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
@ -26,12 +28,14 @@ object LearnTags {
category: String category: String
): Task[F, A, Unit] = ): Task[F, A, Unit] =
Task { ctx => Task { ctx =>
val data = SelectItems.forCategory(ctx, collective)(maxItems, category, maxTextLen) val data =
SelectItems.forCategory(store, collective)(maxItems, category, maxTextLen)
ctx.logger.info(s"Learn classifier for tag category: $category") *> ctx.logger.info(s"Learn classifier for tag category: $category") *>
analyser.classifier.trainClassifier(ctx.logger, data)( analyser.classifier.trainClassifier(ctx.logger, data)(
Kleisli( Kleisli(
StoreClassifierModel.handleModel( StoreClassifierModel.handleModel(
ctx, store,
ctx.logger,
collective, collective,
ClassifierName.tagCategory(category) ClassifierName.tagCategory(category)
) )
@ -39,15 +43,15 @@ object LearnTags {
) )
} }
def learnAllTagCategories[F[_]: Async, A](analyser: TextAnalyser[F])( def learnAllTagCategories[F[_]: Async, A](analyser: TextAnalyser[F], store: Store[F])(
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
): Task[F, A, Unit] = ): Task[F, A, Unit] =
Task { ctx => Task { ctx =>
for { for {
cats <- ctx.store.transact(RClassifierSetting.getActiveCategories(collective)) cats <- store.transact(RClassifierSetting.getActiveCategories(collective))
task = learnTagCategory[F, A](analyser, collective, maxItems, maxTextLen) _ task = learnTagCategory[F, A](analyser, store, collective, maxItems, maxTextLen) _
_ <- cats.map(task).traverse(_.run(ctx)) _ <- cats.map(task).traverse(_.run(ctx))
} yield () } yield ()
} }

View File

@ -10,7 +10,6 @@ import fs2.{Pipe, Stream}
import docspell.analysis.classifier.TextClassifier.Data import docspell.analysis.classifier.TextClassifier.Data
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.Context
import docspell.store.Store import docspell.store.Store
import docspell.store.qb.Batch import docspell.store.qb.Batch
import docspell.store.queries.{QItem, TextAndTag} import docspell.store.queries.{QItem, TextAndTag}
@ -21,16 +20,7 @@ object SelectItems {
val pageSep = LearnClassifierTask.pageSep val pageSep = LearnClassifierTask.pageSep
val noClass = LearnClassifierTask.noClass val noClass = LearnClassifierTask.noClass
def forCategory[F[_]](ctx: Context[F, _], collective: Ident)( def forCategory[F[_]](store: Store[F], collective: Ident)(
maxItems: Int,
category: String,
maxTextLen: Int
): Stream[F, Data] =
forCategory(ctx.store, collective, maxItems, category, maxTextLen)
def forCategory[F[_]](
store: Store[F],
collective: Ident,
maxItems: Int, maxItems: Int,
category: String, category: String,
maxTextLen: Int maxTextLen: Int

View File

@ -12,7 +12,6 @@ import fs2.io.file.Files
import docspell.analysis.classifier.ClassifierModel import docspell.analysis.classifier.ClassifierModel
import docspell.common._ import docspell.common._
import docspell.joex.scheduler._
import docspell.logging.Logger import docspell.logging.Logger
import docspell.store.Store import docspell.store.Store
import docspell.store.records.RClassifierModel import docspell.store.records.RClassifierModel
@ -20,21 +19,12 @@ import docspell.store.records.RClassifierModel
object StoreClassifierModel { object StoreClassifierModel {
def handleModel[F[_]: Async]( def handleModel[F[_]: Async](
ctx: Context[F, _], store: Store[F],
logger: Logger[F],
collective: Ident, collective: Ident,
modelName: ClassifierName modelName: ClassifierName
)( )(
trainedModel: ClassifierModel trainedModel: ClassifierModel
): F[Unit] =
handleModel(ctx.store, ctx.logger)(collective, modelName, trainedModel)
def handleModel[F[_]: Async](
store: Store[F],
logger: Logger[F]
)(
collective: Ident,
modelName: ClassifierName,
trainedModel: ClassifierModel
): F[Unit] = ): F[Unit] =
for { for {
oldFile <- store.transact( oldFile <- store.transact(

View File

@ -70,7 +70,7 @@ object ReadMail {
HtmlBodyViewConfig.default.copy( HtmlBodyViewConfig.default.copy(
textToHtml = MarkdownBody.makeHtml(markdownCfg) textToHtml = MarkdownBody.makeHtml(markdownCfg)
) )
).map(makeHtmlBinary[F] _).map(b => Some(b)) ).map(makeHtmlBinary[F]).map(b => Some(b))
} }
for { for {

View File

@ -12,14 +12,15 @@ import cats.implicits._
import docspell.backend.ops.ONotification import docspell.backend.ops.ONotification
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.Context
import docspell.joex.scheduler.Task
import docspell.notification.api.EventContext import docspell.notification.api.EventContext
import docspell.notification.api.NotificationChannel import docspell.notification.api.NotificationChannel
import docspell.notification.api.PeriodicDueItemsArgs import docspell.notification.api.PeriodicDueItemsArgs
import docspell.query.Date import docspell.query.Date
import docspell.query.ItemQuery._ import docspell.query.ItemQuery._
import docspell.query.ItemQueryDsl._ import docspell.query.ItemQueryDsl._
import docspell.scheduler.Context
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.qb.Batch import docspell.store.qb.Batch
import docspell.store.queries.ListItem import docspell.store.queries.ListItem
import docspell.store.queries.{QItem, Query} import docspell.store.queries.{QItem, Query}
@ -32,11 +33,14 @@ object PeriodicDueItemsTask {
def onCancel[F[_]]: Task[F, Args, Unit] = def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn(s"Cancelling ${taskName.id} task")) Task.log(_.warn(s"Cancelling ${taskName.id} task"))
def apply[F[_]: Sync](notificationOps: ONotification[F]): Task[F, Args, Unit] = def apply[F[_]: Sync](
store: Store[F],
notificationOps: ONotification[F]
): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
val limit = 7 val limit = 7
Timestamp.current[F].flatMap { now => Timestamp.current[F].flatMap { now =>
withItems(ctx, limit, now) { items => withItems(ctx, store, limit, now) { items =>
withEventContext(ctx, items, limit, now) { eventCtx => withEventContext(ctx, items, limit, now) { eventCtx =>
withChannel(ctx, notificationOps) { channels => withChannel(ctx, notificationOps) { channels =>
notificationOps.sendMessage(ctx.logger, eventCtx, channels) notificationOps.sendMessage(ctx.logger, eventCtx, channels)
@ -51,7 +55,12 @@ object PeriodicDueItemsTask {
): F[Unit] = ): F[Unit] =
TaskOperations.withChannel(ctx.logger, ctx.args.channels, ctx.args.account, ops)(cont) TaskOperations.withChannel(ctx.logger, ctx.args.channels, ctx.args.account, ops)(cont)
def withItems[F[_]: Sync](ctx: Context[F, Args], limit: Int, now: Timestamp)( def withItems[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
limit: Int,
now: Timestamp
)(
cont: Vector[ListItem] => F[Unit] cont: Vector[ListItem] => F[Unit]
): F[Unit] = { ): F[Unit] = {
val rightDate = Date((now + Duration.days(ctx.args.remindDays.toLong)).toMillis) val rightDate = Date((now + Duration.days(ctx.args.remindDays.toLong)).toMillis)
@ -77,7 +86,7 @@ object PeriodicDueItemsTask {
for { for {
res <- res <-
ctx.store store
.transact( .transact(
QItem QItem
.findItems(q, now.toUtcDate, 0, Batch.limit(limit)) .findItems(q, now.toUtcDate, 0, Batch.limit(limit))

View File

@ -13,8 +13,6 @@ import cats.implicits._
import docspell.backend.ops.ONotification import docspell.backend.ops.ONotification
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.Context
import docspell.joex.scheduler.Task
import docspell.notification.api.EventContext import docspell.notification.api.EventContext
import docspell.notification.api.NotificationChannel import docspell.notification.api.NotificationChannel
import docspell.notification.api.PeriodicQueryArgs import docspell.notification.api.PeriodicQueryArgs
@ -22,6 +20,9 @@ import docspell.query.ItemQuery
import docspell.query.ItemQuery.Expr import docspell.query.ItemQuery.Expr
import docspell.query.ItemQuery.Expr.AndExpr import docspell.query.ItemQuery.Expr.AndExpr
import docspell.query.ItemQueryParser import docspell.query.ItemQueryParser
import docspell.scheduler.Context
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.qb.Batch import docspell.store.qb.Batch
import docspell.store.queries.ListItem import docspell.store.queries.ListItem
import docspell.store.queries.{QItem, Query} import docspell.store.queries.{QItem, Query}
@ -36,11 +37,14 @@ object PeriodicQueryTask {
def onCancel[F[_]]: Task[F, Args, Unit] = def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn(s"Cancelling ${taskName.id} task")) Task.log(_.warn(s"Cancelling ${taskName.id} task"))
def apply[F[_]: Sync](notificationOps: ONotification[F]): Task[F, Args, Unit] = def apply[F[_]: Sync](
store: Store[F],
notificationOps: ONotification[F]
): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
val limit = 7 val limit = 7
Timestamp.current[F].flatMap { now => Timestamp.current[F].flatMap { now =>
withItems(ctx, limit, now) { items => withItems(ctx, store, limit, now) { items =>
withEventContext(ctx, items, limit, now) { eventCtx => withEventContext(ctx, items, limit, now) { eventCtx =>
withChannel(ctx, notificationOps) { channels => withChannel(ctx, notificationOps) { channels =>
notificationOps.sendMessage(ctx.logger, eventCtx, channels) notificationOps.sendMessage(ctx.logger, eventCtx, channels)
@ -58,9 +62,11 @@ object PeriodicQueryTask {
private def queryString(q: ItemQuery.Expr) = private def queryString(q: ItemQuery.Expr) =
ItemQueryParser.asString(q) ItemQueryParser.asString(q)
def withQuery[F[_]: Sync](ctx: Context[F, Args])(cont: Query => F[Unit]): F[Unit] = { def withQuery[F[_]: Sync](ctx: Context[F, Args], store: Store[F])(
cont: Query => F[Unit]
): F[Unit] = {
def fromBookmark(id: String) = def fromBookmark(id: String) =
ctx.store store
.transact(RQueryBookmark.findByNameOrId(ctx.args.account, id)) .transact(RQueryBookmark.findByNameOrId(ctx.args.account, id))
.map(_.map(_.query)) .map(_.map(_.query))
.flatTap(q => .flatTap(q =>
@ -68,7 +74,7 @@ object PeriodicQueryTask {
) )
def fromShare(id: String) = def fromShare(id: String) =
ctx.store store
.transact(RShare.findOneByCollective(ctx.args.account.collective, Some(true), id)) .transact(RShare.findOneByCollective(ctx.args.account.collective, Some(true), id))
.map(_.map(_.query)) .map(_.map(_.query))
.flatTap(q => .flatTap(q =>
@ -120,11 +126,16 @@ object PeriodicQueryTask {
} }
} }
def withItems[F[_]: Sync](ctx: Context[F, Args], limit: Int, now: Timestamp)( def withItems[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
limit: Int,
now: Timestamp
)(
cont: Vector[ListItem] => F[Unit] cont: Vector[ListItem] => F[Unit]
): F[Unit] = ): F[Unit] =
withQuery(ctx) { query => withQuery(ctx, store) { query =>
val items = ctx.store val items = store
.transact(QItem.findItems(query, now.toUtcDate, 0, Batch.limit(limit))) .transact(QItem.findItems(query, now.toUtcDate, 0, Batch.limit(limit)))
.compile .compile
.to(Vector) .to(Vector)

View File

@ -13,22 +13,24 @@ import fs2.{Chunk, Stream}
import docspell.backend.JobFactory import docspell.backend.JobFactory
import docspell.backend.ops.OJoex import docspell.backend.ops.OJoex
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.Context import docspell.scheduler._
import docspell.joex.scheduler.Task import docspell.store.Store
import docspell.store.queue.JobQueue
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
import docspell.store.records.RJob
object AllPageCountTask { object AllPageCountTask {
val taskName = Ident.unsafe("all-page-count") val taskName = Ident.unsafe("all-page-count")
type Args = Unit type Args = Unit
def apply[F[_]: Sync](queue: JobQueue[F], joex: OJoex[F]): Task[F, Args, Unit] = def apply[F[_]: Sync](
store: Store[F],
jobStore: JobStore[F],
joex: OJoex[F]
): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
for { for {
_ <- ctx.logger.info("Generating previews for attachments") _ <- ctx.logger.info("Generating previews for attachments")
n <- submitConversionJobs(ctx, queue) n <- submitConversionJobs(ctx, store, jobStore)
_ <- ctx.logger.info(s"Submitted $n jobs") _ <- ctx.logger.info(s"Submitted $n jobs")
_ <- joex.notifyAllNodes _ <- joex.notifyAllNodes
} yield () } yield ()
@ -39,14 +41,15 @@ object AllPageCountTask {
def submitConversionJobs[F[_]: Sync]( def submitConversionJobs[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
queue: JobQueue[F] store: Store[F],
jobStore: JobStore[F]
): F[Int] = ): F[Int] =
ctx.store store
.transact(findAttachments) .transact(findAttachments)
.chunks .chunks
.flatMap(createJobs[F]) .flatMap(createJobs[F])
.chunks .chunks
.evalMap(jobs => queue.insertAllIfNew(jobs.toVector).map(_ => jobs.size)) .evalMap(jobs => jobStore.insertAllIfNew(jobs.toVector).map(_ => jobs.size))
.evalTap(n => ctx.logger.debug(s"Submitted $n jobs …")) .evalTap(n => ctx.logger.debug(s"Submitted $n jobs …"))
.compile .compile
.foldMonoid .foldMonoid
@ -54,28 +57,25 @@ object AllPageCountTask {
private def findAttachments[F[_]] = private def findAttachments[F[_]] =
RAttachment.findAllWithoutPageCount(50) RAttachment.findAllWithoutPageCount(50)
private def createJobs[F[_]: Sync](ras: Chunk[RAttachment]): Stream[F, RJob] = { private def createJobs[F[_]: Sync](ras: Chunk[RAttachment]): Stream[F, Job[String]] = {
def mkJob(ra: RAttachment): F[RJob] = def mkJob(ra: RAttachment): F[Job[MakePageCountArgs]] =
JobFactory.makePageCount(MakePageCountArgs(ra.id), None) JobFactory.makePageCount(MakePageCountArgs(ra.id), None)
val jobs = ras.traverse(mkJob) val jobs = ras.traverse(mkJob)
Stream.evalUnChunk(jobs) Stream.evalUnChunk(jobs).map(_.encode)
} }
def job[F[_]: Sync]: F[RJob] = def job[F[_]: Sync]: F[Job[String]] =
for { Job
id <- Ident.randomId[F] .createNew(
now <- Timestamp.current[F] AllPageCountTask.taskName,
} yield RJob.newJob( DocspellSystem.taskGroup,
id, (),
AllPageCountTask.taskName, "Create all page-counts",
DocspellSystem.taskGroup, DocspellSystem.taskGroup,
(), Priority.Low,
"Create all page-counts", Some(DocspellSystem.allPageCountTaskTracker)
now, )
DocspellSystem.taskGroup, .map(_.encode)
Priority.Low,
Some(DocspellSystem.allPageCountTaskTracker)
)
} }

View File

@ -11,8 +11,9 @@ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.joex.process.AttachmentPageCount import docspell.joex.process.AttachmentPageCount
import docspell.joex.scheduler.Context import docspell.scheduler.Context
import docspell.joex.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
import docspell.store.records.RAttachmentMeta import docspell.store.records.RAttachmentMeta
@ -20,10 +21,10 @@ object MakePageCountTask {
type Args = MakePageCountArgs type Args = MakePageCountArgs
def apply[F[_]: Sync](): Task[F, Args, Unit] = def apply[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
for { for {
exists <- pageCountExists(ctx) exists <- pageCountExists(ctx, store)
_ <- _ <-
if (exists) if (exists)
ctx.logger.info( ctx.logger.info(
@ -32,7 +33,7 @@ object MakePageCountTask {
else else
ctx.logger.info( ctx.logger.info(
s"Reading page-count for attachment ${ctx.args.attachment}" s"Reading page-count for attachment ${ctx.args.attachment}"
) *> generatePageCount(ctx) ) *> generatePageCount(ctx, store)
} yield () } yield ()
} }
@ -40,19 +41,20 @@ object MakePageCountTask {
Task.log(_.warn("Cancelling make-page-count task")) Task.log(_.warn("Cancelling make-page-count task"))
private def generatePageCount[F[_]: Sync]( private def generatePageCount[F[_]: Sync](
ctx: Context[F, Args] ctx: Context[F, Args],
store: Store[F]
): F[Unit] = ): F[Unit] =
for { for {
ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment)) ra <- store.transact(RAttachment.findById(ctx.args.attachment))
_ <- ra _ <- ra
.map(AttachmentPageCount.createPageCount(ctx)) .map(AttachmentPageCount.createPageCount(ctx, store))
.getOrElse( .getOrElse(
ctx.logger.warn(s"No attachment found with id: ${ctx.args.attachment}") ctx.logger.warn(s"No attachment found with id: ${ctx.args.attachment}")
) )
} yield () } yield ()
private def pageCountExists[F[_]](ctx: Context[F, Args]): F[Boolean] = private def pageCountExists[F[_]](ctx: Context[F, Args], store: Store[F]): F[Boolean] =
ctx.store.transact( store.transact(
RAttachmentMeta RAttachmentMeta
.findPageCountById(ctx.args.attachment) .findPageCountById(ctx.args.attachment)
.map(_.exists(_ > 0)) .map(_.exists(_ > 0))

View File

@ -12,10 +12,9 @@ import fs2.{Chunk, Stream}
import docspell.backend.ops.OJoex import docspell.backend.ops.OJoex
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.{Context, Task} import docspell.scheduler._
import docspell.store.queue.JobQueue import docspell.store.Store
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
import docspell.store.records._
/* A task to find all non-converted pdf files (of a collective, or /* A task to find all non-converted pdf files (of a collective, or
* all) and converting them using ocrmypdf by submitting a job for * all) and converting them using ocrmypdf by submitting a job for
@ -24,11 +23,15 @@ import docspell.store.records._
object ConvertAllPdfTask { object ConvertAllPdfTask {
type Args = ConvertAllPdfArgs type Args = ConvertAllPdfArgs
def apply[F[_]: Sync](queue: JobQueue[F], joex: OJoex[F]): Task[F, Args, Unit] = def apply[F[_]: Sync](
jobStore: JobStore[F],
joex: OJoex[F],
store: Store[F]
): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
for { for {
_ <- ctx.logger.info("Converting pdfs using ocrmypdf") _ <- ctx.logger.info("Converting pdfs using ocrmypdf")
n <- submitConversionJobs(ctx, queue) n <- submitConversionJobs(ctx, store, jobStore)
_ <- ctx.logger.info(s"Submitted $n file conversion jobs") _ <- ctx.logger.info(s"Submitted $n file conversion jobs")
_ <- joex.notifyAllNodes _ <- joex.notifyAllNodes
} yield () } yield ()
@ -39,40 +42,36 @@ object ConvertAllPdfTask {
def submitConversionJobs[F[_]: Sync]( def submitConversionJobs[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
queue: JobQueue[F] store: Store[F],
jobStore: JobStore[F]
): F[Int] = ): F[Int] =
ctx.store store
.transact(RAttachment.findNonConvertedPdf(ctx.args.collective, 50)) .transact(RAttachment.findNonConvertedPdf(ctx.args.collective, 50))
.chunks .chunks
.flatMap(createJobs[F](ctx)) .flatMap(createJobs[F](ctx))
.chunks .chunks
.evalMap(jobs => queue.insertAllIfNew(jobs.toVector).map(_ => jobs.size)) .evalMap(jobs => jobStore.insertAllIfNew(jobs.toVector).map(_ => jobs.size))
.evalTap(n => ctx.logger.debug(s"Submitted $n jobs …")) .evalTap(n => ctx.logger.debug(s"Submitted $n jobs …"))
.compile .compile
.foldMonoid .foldMonoid
private def createJobs[F[_]: Sync]( private def createJobs[F[_]: Sync](
ctx: Context[F, Args] ctx: Context[F, Args]
)(ras: Chunk[RAttachment]): Stream[F, RJob] = { )(ras: Chunk[RAttachment]): Stream[F, Job[String]] = {
val collectiveOrSystem = ctx.args.collective.getOrElse(DocspellSystem.taskGroup) val collectiveOrSystem = ctx.args.collective.getOrElse(DocspellSystem.taskGroup)
def mkJob(ra: RAttachment): F[RJob] = def mkJob(ra: RAttachment): F[Job[PdfConvTask.Args]] =
for { Job.createNew(
id <- Ident.randomId[F]
now <- Timestamp.current[F]
} yield RJob.newJob(
id,
PdfConvTask.taskName, PdfConvTask.taskName,
collectiveOrSystem, collectiveOrSystem,
PdfConvTask.Args(ra.id), PdfConvTask.Args(ra.id),
s"Convert pdf ${ra.id.id}/${ra.name.getOrElse("-")}", s"Convert pdf ${ra.id.id}/${ra.name.getOrElse("-")}",
now,
collectiveOrSystem, collectiveOrSystem,
Priority.Low, Priority.Low,
Some(PdfConvTask.taskName / ra.id) Some(PdfConvTask.taskName / ra.id)
) )
val jobs = ras.traverse(mkJob) val jobs = ras.traverse(mkJob)
Stream.evalUnChunk(jobs) Stream.evalUnChunk(jobs).map(_.encode)
} }
} }

View File

@ -16,7 +16,8 @@ import docspell.common._
import docspell.convert.ConversionResult import docspell.convert.ConversionResult
import docspell.convert.extern.OcrMyPdf import docspell.convert.extern.OcrMyPdf
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
import io.circe.generic.semiauto._ import io.circe.generic.semiauto._
@ -36,12 +37,12 @@ object PdfConvTask {
val taskName = Ident.unsafe("pdf-files-migration") val taskName = Ident.unsafe("pdf-files-migration")
def apply[F[_]: Async](cfg: Config): Task[F, Args, Unit] = def apply[F[_]: Async](cfg: Config, store: Store[F]): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
for { for {
_ <- ctx.logger.info(s"Converting pdf file ${ctx.args} using ocrmypdf") _ <- ctx.logger.info(s"Converting pdf file ${ctx.args} using ocrmypdf")
meta <- checkInputs(cfg, ctx) meta <- checkInputs(cfg, ctx, store)
_ <- meta.traverse(fm => convert(cfg, ctx, fm)) _ <- meta.traverse(fm => convert(cfg, ctx, store, fm))
} yield () } yield ()
} }
@ -53,19 +54,20 @@ object PdfConvTask {
// check if file exists and if it is pdf and if source id is the same and if ocrmypdf is enabled // check if file exists and if it is pdf and if source id is the same and if ocrmypdf is enabled
def checkInputs[F[_]: Sync]( def checkInputs[F[_]: Sync](
cfg: Config, cfg: Config,
ctx: Context[F, Args] ctx: Context[F, Args],
store: Store[F]
): F[Option[RFileMeta]] = { ): F[Option[RFileMeta]] = {
val none: Option[RFileMeta] = None val none: Option[RFileMeta] = None
val checkSameFiles = val checkSameFiles =
(for { (for {
ra <- OptionT(ctx.store.transact(RAttachment.findById(ctx.args.attachId))) ra <- OptionT(store.transact(RAttachment.findById(ctx.args.attachId)))
isSame <- OptionT.liftF( isSame <- OptionT.liftF(
ctx.store.transact(RAttachmentSource.isSameFile(ra.id, ra.fileId)) store.transact(RAttachmentSource.isSameFile(ra.id, ra.fileId))
) )
} yield isSame).getOrElse(false) } yield isSame).getOrElse(false)
val existsPdf = val existsPdf =
for { for {
meta <- ctx.store.transact(RAttachment.findMeta(ctx.args.attachId)) meta <- store.transact(RAttachment.findMeta(ctx.args.attachId))
res = meta.filter(_.mimetype.matches(MimeType.pdf)) res = meta.filter(_.mimetype.matches(MimeType.pdf))
_ <- _ <-
if (res.isEmpty) if (res.isEmpty)
@ -90,18 +92,19 @@ object PdfConvTask {
def convert[F[_]: Async]( def convert[F[_]: Async](
cfg: Config, cfg: Config,
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
in: RFileMeta in: RFileMeta
): F[Unit] = { ): F[Unit] = {
val fs = ctx.store.fileRepo val fs = store.fileRepo
val data = fs.getBytes(in.id) val data = fs.getBytes(in.id)
val storeResult: ConversionResult.Handler[F, Unit] = val storeResult: ConversionResult.Handler[F, Unit] =
Kleisli { Kleisli {
case ConversionResult.SuccessPdf(file) => case ConversionResult.SuccessPdf(file) =>
storeToAttachment(ctx, in, file) storeToAttachment(ctx, store, in, file)
case ConversionResult.SuccessPdfTxt(file, _) => case ConversionResult.SuccessPdfTxt(file, _) =>
storeToAttachment(ctx, in, file) storeToAttachment(ctx, store, in, file)
case ConversionResult.UnsupportedFormat(mime) => case ConversionResult.UnsupportedFormat(mime) =>
ctx.logger.warn( ctx.logger.warn(
@ -124,19 +127,20 @@ object PdfConvTask {
)(data, storeResult) )(data, storeResult)
for { for {
lang <- getLanguage(ctx) lang <- getLanguage(ctx, store)
_ <- ocrMyPdf(lang) _ <- ocrMyPdf(lang)
} yield () } yield ()
} }
def getLanguage[F[_]: Sync](ctx: Context[F, Args]): F[Language] = def getLanguage[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): F[Language] =
(for { (for {
coll <- OptionT(ctx.store.transact(RCollective.findByAttachment(ctx.args.attachId))) coll <- OptionT(store.transact(RCollective.findByAttachment(ctx.args.attachId)))
lang = coll.language lang = coll.language
} yield lang).getOrElse(Language.German) } yield lang).getOrElse(Language.German)
def storeToAttachment[F[_]: Sync]( def storeToAttachment[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
meta: RFileMeta, meta: RFileMeta,
newFile: Stream[F, Byte] newFile: Stream[F, Byte]
): F[Unit] = { ): F[Unit] = {
@ -146,10 +150,10 @@ object PdfConvTask {
for { for {
fid <- fid <-
newFile newFile
.through(ctx.store.fileRepo.save(collective, cat, mimeHint)) .through(store.fileRepo.save(collective, cat, mimeHint))
.compile .compile
.lastOrError .lastOrError
_ <- ctx.store.transact(RAttachment.updateFileId(ctx.args.attachId, fid)) _ <- store.transact(RAttachment.updateFileId(ctx.args.attachId, fid))
} yield () } yield ()
} }
} }

View File

@ -14,21 +14,23 @@ import docspell.backend.JobFactory
import docspell.backend.ops.OJoex import docspell.backend.ops.OJoex
import docspell.common.MakePreviewArgs.StoreMode import docspell.common.MakePreviewArgs.StoreMode
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.Context import docspell.scheduler._
import docspell.joex.scheduler.Task import docspell.store.Store
import docspell.store.queue.JobQueue
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
import docspell.store.records.RJob
object AllPreviewsTask { object AllPreviewsTask {
type Args = AllPreviewsArgs type Args = AllPreviewsArgs
def apply[F[_]: Sync](queue: JobQueue[F], joex: OJoex[F]): Task[F, Args, Unit] = def apply[F[_]: Sync](
jobStore: JobStore[F],
joex: OJoex[F],
store: Store[F]
): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
for { for {
_ <- ctx.logger.info("Generating previews for attachments") _ <- ctx.logger.info("Generating previews for attachments")
n <- submitConversionJobs(ctx, queue) n <- submitConversionJobs(ctx, store, jobStore)
_ <- ctx.logger.info(s"Submitted $n jobs") _ <- ctx.logger.info(s"Submitted $n jobs")
_ <- joex.notifyAllNodes _ <- joex.notifyAllNodes
} yield () } yield ()
@ -39,14 +41,17 @@ object AllPreviewsTask {
def submitConversionJobs[F[_]: Sync]( def submitConversionJobs[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
queue: JobQueue[F] store: Store[F],
jobStore: JobStore[F]
): F[Int] = ): F[Int] =
ctx.store store
.transact(findAttachments(ctx)) .transact(findAttachments(ctx))
.chunks .chunks
.flatMap(createJobs[F](ctx)) .flatMap(createJobs[F](ctx))
.chunks .chunks
.evalMap(jobs => queue.insertAllIfNew(jobs.toVector).map(_ => jobs.size)) .evalMap(jobs =>
jobStore.insertAllIfNew(jobs.map(_.encode).toVector).map(_ => jobs.size)
)
.evalTap(n => ctx.logger.debug(s"Submitted $n jobs …")) .evalTap(n => ctx.logger.debug(s"Submitted $n jobs …"))
.compile .compile
.foldMonoid .foldMonoid
@ -61,13 +66,13 @@ object AllPreviewsTask {
private def createJobs[F[_]: Sync]( private def createJobs[F[_]: Sync](
ctx: Context[F, Args] ctx: Context[F, Args]
)(ras: Chunk[RAttachment]): Stream[F, RJob] = { )(ras: Chunk[RAttachment]): Stream[F, Job[MakePreviewArgs]] = {
val collectiveOrSystem = { val collectiveOrSystem = {
val cid = ctx.args.collective.getOrElse(DocspellSystem.taskGroup) val cid = ctx.args.collective.getOrElse(DocspellSystem.taskGroup)
AccountId(cid, DocspellSystem.user) AccountId(cid, DocspellSystem.user)
} }
def mkJob(ra: RAttachment): F[RJob] = def mkJob(ra: RAttachment): F[Job[MakePreviewArgs]] =
JobFactory.makePreview( JobFactory.makePreview(
MakePreviewArgs(ra.id, ctx.args.storeMode), MakePreviewArgs(ra.id, ctx.args.storeMode),
collectiveOrSystem.some collectiveOrSystem.some
@ -77,7 +82,10 @@ object AllPreviewsTask {
Stream.evalUnChunk(jobs) Stream.evalUnChunk(jobs)
} }
def job[F[_]: Sync](storeMode: MakePreviewArgs.StoreMode, cid: Option[Ident]): F[RJob] = def job[F[_]: Sync](
JobFactory.allPreviews(AllPreviewsArgs(cid, storeMode), None) storeMode: MakePreviewArgs.StoreMode,
cid: Option[Ident]
): F[Job[String]] =
JobFactory.allPreviews(AllPreviewsArgs(cid, storeMode), None).map(_.encode)
} }

View File

@ -13,8 +13,9 @@ import docspell.common._
import docspell.extract.pdfbox.PdfboxPreview import docspell.extract.pdfbox.PdfboxPreview
import docspell.extract.pdfbox.PreviewConfig import docspell.extract.pdfbox.PreviewConfig
import docspell.joex.process.AttachmentPreview import docspell.joex.process.AttachmentPreview
import docspell.joex.scheduler.Context import docspell.scheduler.Context
import docspell.joex.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
import docspell.store.records.RAttachmentPreview import docspell.store.records.RAttachmentPreview
@ -22,10 +23,10 @@ object MakePreviewTask {
type Args = MakePreviewArgs type Args = MakePreviewArgs
def apply[F[_]: Sync](pcfg: PreviewConfig): Task[F, Args, Unit] = def apply[F[_]: Sync](pcfg: PreviewConfig, store: Store[F]): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
for { for {
exists <- previewExists(ctx) exists <- previewExists(ctx, store)
preview <- PdfboxPreview(pcfg) preview <- PdfboxPreview(pcfg)
_ <- _ <-
if (exists) if (exists)
@ -35,7 +36,7 @@ object MakePreviewTask {
else else
ctx.logger.info( ctx.logger.info(
s"Generating preview image for attachment ${ctx.args.attachment}" s"Generating preview image for attachment ${ctx.args.attachment}"
) *> generatePreview(ctx, preview) ) *> generatePreview(ctx, store, preview)
} yield () } yield ()
} }
@ -44,20 +45,24 @@ object MakePreviewTask {
private def generatePreview[F[_]: Sync]( private def generatePreview[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
preview: PdfboxPreview[F] preview: PdfboxPreview[F]
): F[Unit] = ): F[Unit] =
for { for {
ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment)) ra <- store.transact(RAttachment.findById(ctx.args.attachment))
_ <- ra _ <- ra
.map(AttachmentPreview.createPreview(ctx, preview)) .map(AttachmentPreview.createPreview(ctx, store, preview))
.getOrElse( .getOrElse(
ctx.logger.error(s"No attachment found with id: ${ctx.args.attachment}") ctx.logger.error(s"No attachment found with id: ${ctx.args.attachment}")
) )
} yield () } yield ()
private def previewExists[F[_]: Sync](ctx: Context[F, Args]): F[Boolean] = private def previewExists[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F]
): F[Boolean] =
if (ctx.args.store == MakePreviewArgs.StoreMode.WhenMissing) if (ctx.args.store == MakePreviewArgs.StoreMode.WhenMissing)
ctx.store.transact( store.transact(
RAttachmentPreview.findById(ctx.args.attachment).map(_.isDefined) RAttachmentPreview.findById(ctx.args.attachment).map(_.isDefined)
) )
else else

View File

@ -15,7 +15,8 @@ import fs2.Stream
import docspell.common._ import docspell.common._
import docspell.extract.pdfbox.PdfMetaData import docspell.extract.pdfbox.PdfMetaData
import docspell.extract.pdfbox.PdfboxExtract import docspell.extract.pdfbox.PdfboxExtract
import docspell.joex.scheduler._ import docspell.scheduler._
import docspell.store.Store
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
import docspell.store.records._ import docspell.store.records._
@ -24,7 +25,7 @@ import docspell.store.records._
*/ */
object AttachmentPageCount { object AttachmentPageCount {
def apply[F[_]: Sync]()( def apply[F[_]: Sync](store: Store[F])(
item: ItemData item: ItemData
): Task[F, ProcessItemArgs, ItemData] = ): Task[F, ProcessItemArgs, ItemData] =
Task { ctx => Task { ctx =>
@ -33,7 +34,7 @@ object AttachmentPageCount {
s"Retrieving page count for ${item.attachments.size} files…" s"Retrieving page count for ${item.attachments.size} files…"
) )
_ <- item.attachments _ <- item.attachments
.traverse(createPageCount(ctx)) .traverse(createPageCount(ctx, store))
.attempt .attempt
.flatMap { .flatMap {
case Right(_) => ().pure[F] case Right(_) => ().pure[F]
@ -46,14 +47,15 @@ object AttachmentPageCount {
} }
def createPageCount[F[_]: Sync]( def createPageCount[F[_]: Sync](
ctx: Context[F, _] ctx: Context[F, _],
store: Store[F]
)(ra: RAttachment): F[Option[PdfMetaData]] = )(ra: RAttachment): F[Option[PdfMetaData]] =
findMime[F](ctx)(ra).flatMap { findMime[F](store)(ra).flatMap {
case MimeType.PdfMatch(_) => case MimeType.PdfMatch(_) =>
PdfboxExtract.getMetaData(loadFile(ctx)(ra)).flatMap { PdfboxExtract.getMetaData(loadFile(store)(ra)).flatMap {
case Right(md) => case Right(md) =>
ctx.logger.debug(s"Found number of pages: ${md.pageCount}") *> ctx.logger.debug(s"Found number of pages: ${md.pageCount}") *>
updatePageCount(ctx, md, ra).map(_.some) updatePageCount(ctx, store, md, ra).map(_.some)
case Left(ex) => case Left(ex) =>
ctx.logger.warn(s"Error obtaining pages count: ${ex.getMessage}") *> ctx.logger.warn(s"Error obtaining pages count: ${ex.getMessage}") *>
(None: Option[PdfMetaData]).pure[F] (None: Option[PdfMetaData]).pure[F]
@ -66,6 +68,7 @@ object AttachmentPageCount {
private def updatePageCount[F[_]: Sync]( private def updatePageCount[F[_]: Sync](
ctx: Context[F, _], ctx: Context[F, _],
store: Store[F],
md: PdfMetaData, md: PdfMetaData,
ra: RAttachment ra: RAttachment
): F[PdfMetaData] = ): F[PdfMetaData] =
@ -73,12 +76,12 @@ object AttachmentPageCount {
_ <- ctx.logger.debug( _ <- ctx.logger.debug(
s"Update attachment ${ra.id.id} with page count ${md.pageCount.some}" s"Update attachment ${ra.id.id} with page count ${md.pageCount.some}"
) )
n <- ctx.store.transact(RAttachmentMeta.updatePageCount(ra.id, md.pageCount.some)) n <- store.transact(RAttachmentMeta.updatePageCount(ra.id, md.pageCount.some))
m <- m <-
if (n == 0) if (n == 0)
ctx.logger.warn( ctx.logger.warn(
s"No attachmentmeta record exists for ${ra.id.id}. Creating new." s"No attachmentmeta record exists for ${ra.id.id}. Creating new."
) *> ctx.store.transact( ) *> store.transact(
RAttachmentMeta.insert( RAttachmentMeta.insert(
RAttachmentMeta( RAttachmentMeta(
ra.id, ra.id,
@ -94,11 +97,11 @@ object AttachmentPageCount {
_ <- ctx.logger.debug(s"Stored page count (${n + m}).") _ <- ctx.logger.debug(s"Stored page count (${n + m}).")
} yield md } yield md
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = def findMime[F[_]: Functor](store: Store[F])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) OptionT(store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(MimeType.octetStream) .getOrElse(MimeType.octetStream)
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] = def loadFile[F[_]](store: Store[F])(ra: RAttachment): Stream[F, Byte] =
ctx.store.fileRepo.getBytes(ra.fileId) store.fileRepo.getBytes(ra.fileId)
} }

View File

@ -15,7 +15,8 @@ import fs2.Stream
import docspell.common._ import docspell.common._
import docspell.extract.pdfbox.PdfboxPreview import docspell.extract.pdfbox.PdfboxPreview
import docspell.extract.pdfbox.PreviewConfig import docspell.extract.pdfbox.PreviewConfig
import docspell.joex.scheduler._ import docspell.scheduler._
import docspell.store.Store
import docspell.store.queries.QAttachment import docspell.store.queries.QAttachment
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
import docspell.store.records._ import docspell.store.records._
@ -26,7 +27,7 @@ import docspell.store.records._
*/ */
object AttachmentPreview { object AttachmentPreview {
def apply[F[_]: Sync](pcfg: PreviewConfig)( def apply[F[_]: Sync](pcfg: PreviewConfig, store: Store[F])(
item: ItemData item: ItemData
): Task[F, ProcessItemArgs, ItemData] = ): Task[F, ProcessItemArgs, ItemData] =
Task { ctx => Task { ctx =>
@ -36,7 +37,7 @@ object AttachmentPreview {
) )
preview <- PdfboxPreview(pcfg) preview <- PdfboxPreview(pcfg)
_ <- item.attachments _ <- item.attachments
.traverse(createPreview(ctx, preview)) .traverse(createPreview(ctx, store, preview))
.attempt .attempt
.flatMap { .flatMap {
case Right(_) => ().pure[F] case Right(_) => ().pure[F]
@ -50,16 +51,17 @@ object AttachmentPreview {
def createPreview[F[_]: Sync]( def createPreview[F[_]: Sync](
ctx: Context[F, _], ctx: Context[F, _],
store: Store[F],
preview: PdfboxPreview[F] preview: PdfboxPreview[F]
)( )(
ra: RAttachment ra: RAttachment
): F[Option[RAttachmentPreview]] = ): F[Option[RAttachmentPreview]] =
findMime[F](ctx)(ra).flatMap { findMime[F](store)(ra).flatMap {
case MimeType.PdfMatch(_) => case MimeType.PdfMatch(_) =>
preview.previewPNG(loadFile(ctx)(ra)).flatMap { preview.previewPNG(loadFile(store)(ra)).flatMap {
case Some(out) => case Some(out) =>
ctx.logger.debug("Preview generated, saving to database…") *> ctx.logger.debug("Preview generated, saving to database…") *>
createRecord(ctx, ra.fileId.collective, out, ra).map(_.some) createRecord(store, ra.fileId.collective, out, ra).map(_.some)
case None => case None =>
ctx.logger ctx.logger
.info(s"Preview could not be generated. Maybe the pdf has no pages?") *> .info(s"Preview could not be generated. Maybe the pdf has no pages?") *>
@ -72,7 +74,7 @@ object AttachmentPreview {
} }
private def createRecord[F[_]: Sync]( private def createRecord[F[_]: Sync](
ctx: Context[F, _], store: Store[F],
collective: Ident, collective: Ident,
png: Stream[F, Byte], png: Stream[F, Byte],
ra: RAttachment ra: RAttachment
@ -83,7 +85,7 @@ object AttachmentPreview {
for { for {
fileId <- png fileId <- png
.through( .through(
ctx.store.fileRepo.save( store.fileRepo.save(
collective, collective,
FileCategory.PreviewImage, FileCategory.PreviewImage,
MimeTypeHint(name.map(_.fullName), Some("image/png")) MimeTypeHint(name.map(_.fullName), Some("image/png"))
@ -93,16 +95,16 @@ object AttachmentPreview {
.lastOrError .lastOrError
now <- Timestamp.current[F] now <- Timestamp.current[F]
rp = RAttachmentPreview(ra.id, fileId, name.map(_.fullName), now) rp = RAttachmentPreview(ra.id, fileId, name.map(_.fullName), now)
_ <- QAttachment.deletePreview(ctx.store)(ra.id) _ <- QAttachment.deletePreview(store)(ra.id)
_ <- ctx.store.transact(RAttachmentPreview.insert(rp)) _ <- store.transact(RAttachmentPreview.insert(rp))
} yield rp } yield rp
} }
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = def findMime[F[_]: Functor](store: Store[F])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) OptionT(store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(MimeType.octetStream) .getOrElse(MimeType.octetStream)
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] = def loadFile[F[_]](store: Store[F])(ra: RAttachment): Stream[F, Byte] =
ctx.store.fileRepo.getBytes(ra.fileId) store.fileRepo.getBytes(ra.fileId)
} }

View File

@ -17,7 +17,8 @@ import docspell.convert.ConversionResult.Handler
import docspell.convert.SanitizeHtml import docspell.convert.SanitizeHtml
import docspell.convert._ import docspell.convert._
import docspell.joex.extract.JsoupSanitizer import docspell.joex.extract.JsoupSanitizer
import docspell.joex.scheduler._ import docspell.scheduler._
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
/** Goes through all attachments and creates a PDF version of it where supported. /** Goes through all attachments and creates a PDF version of it where supported.
@ -36,21 +37,22 @@ object ConvertPdf {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: ConvertConfig, cfg: ConvertConfig,
store: Store[F],
item: ItemData item: ItemData
): Task[F, Args, ItemData] = ): Task[F, Args, ItemData] =
Task { ctx => Task { ctx =>
def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] = def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] =
isConverted(ctx)(ra).flatMap { isConverted(store)(ra).flatMap {
case true if ctx.args.isNormalProcessing => case true if ctx.args.isNormalProcessing =>
ctx.logger.info( ctx.logger.info(
s"Conversion to pdf already done for attachment ${ra.name}." s"Conversion to pdf already done for attachment ${ra.name}."
) *> ) *>
ctx.store store
.transact(RAttachmentMeta.findById(ra.id)) .transact(RAttachmentMeta.findById(ra.id))
.map(rmOpt => (ra, rmOpt)) .map(rmOpt => (ra, rmOpt))
case _ => case _ =>
findMime(ctx)(ra).flatMap(m => findMime(store)(ra).flatMap(m =>
convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m) convertSafe(cfg, JsoupSanitizer.clean, ctx, store, item)(ra, m)
) )
} }
@ -62,13 +64,15 @@ object ConvertPdf {
} }
def isConverted[F[_]](ctx: Context[F, Args])( def isConverted[F[_]](store: Store[F])(
ra: RAttachment ra: RAttachment
): F[Boolean] = ): F[Boolean] =
ctx.store.transact(RAttachmentSource.isConverted(ra.id)) store.transact(RAttachmentSource.isConverted(ra.id))
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = def findMime[F[_]: Functor](store: Store[F])(
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) ra: RAttachment
): F[MimeType] =
OptionT(store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(MimeType.octetStream) .getOrElse(MimeType.octetStream)
@ -76,14 +80,15 @@ object ConvertPdf {
cfg: ConvertConfig, cfg: ConvertConfig,
sanitizeHtml: SanitizeHtml, sanitizeHtml: SanitizeHtml,
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
item: ItemData item: ItemData
)(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] = )(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] =
loadCollectivePasswords(ctx).flatMap(collPass => loadCollectivePasswords(ctx, store).flatMap(collPass =>
Conversion.create[F](cfg, sanitizeHtml, collPass, ctx.logger).use { conv => Conversion.create[F](cfg, sanitizeHtml, collPass, ctx.logger).use { conv =>
mime match { mime match {
case mt => case mt =>
val data = ctx.store.fileRepo.getBytes(ra.fileId) val data = store.fileRepo.getBytes(ra.fileId)
val handler = conversionHandler[F](ctx, cfg, ra, item) val handler = conversionHandler[F](ctx, store, cfg, ra, item)
ctx.logger ctx.logger
.info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *> .info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *>
conv.toPDF(DataType(mt), ctx.args.meta.language, handler)( conv.toPDF(DataType(mt), ctx.args.meta.language, handler)(
@ -94,14 +99,16 @@ object ConvertPdf {
) )
private def loadCollectivePasswords[F[_]: Async]( private def loadCollectivePasswords[F[_]: Async](
ctx: Context[F, Args] ctx: Context[F, Args],
store: Store[F]
): F[List[Password]] = ): F[List[Password]] =
ctx.store store
.transact(RCollectivePassword.findAll(ctx.args.meta.collective)) .transact(RCollectivePassword.findAll(ctx.args.meta.collective))
.map(_.map(_.password).distinct) .map(_.map(_.password).distinct)
private def conversionHandler[F[_]: Sync]( private def conversionHandler[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: ConvertConfig, cfg: ConvertConfig,
ra: RAttachment, ra: RAttachment,
item: ItemData item: ItemData
@ -109,12 +116,12 @@ object ConvertPdf {
Kleisli { Kleisli {
case ConversionResult.SuccessPdf(pdf) => case ConversionResult.SuccessPdf(pdf) =>
ctx.logger.info(s"Conversion to pdf successful. Saving file.") *> ctx.logger.info(s"Conversion to pdf successful. Saving file.") *>
storePDF(ctx, cfg, ra, pdf) storePDF(ctx, store, cfg, ra, pdf)
.map(r => (r, None)) .map(r => (r, None))
case ConversionResult.SuccessPdfTxt(pdf, txt) => case ConversionResult.SuccessPdfTxt(pdf, txt) =>
ctx.logger.info(s"Conversion to pdf+txt successful. Saving file.") *> ctx.logger.info(s"Conversion to pdf+txt successful. Saving file.") *>
storePDF(ctx, cfg, ra, pdf) storePDF(ctx, store, cfg, ra, pdf)
.flatMap(r => .flatMap(r =>
txt.map(t => txt.map(t =>
( (
@ -148,6 +155,7 @@ object ConvertPdf {
private def storePDF[F[_]: Sync]( private def storePDF[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: ConvertConfig, cfg: ConvertConfig,
ra: RAttachment, ra: RAttachment,
pdf: Stream[F, Byte] pdf: Stream[F, Byte]
@ -162,7 +170,7 @@ object ConvertPdf {
pdf pdf
.through( .through(
ctx.store.fileRepo.save( store.fileRepo.save(
ctx.args.meta.collective, ctx.args.meta.collective,
FileCategory.AttachmentConvert, FileCategory.AttachmentConvert,
MimeTypeHint(hint.filename, hint.advertised) MimeTypeHint(hint.filename, hint.advertised)
@ -170,32 +178,33 @@ object ConvertPdf {
) )
.compile .compile
.lastOrError .lastOrError
.flatMap(fmId => updateAttachment[F](ctx, ra, fmId, newName).map(_ => fmId)) .flatMap(fmId => updateAttachment[F](ctx, store, ra, fmId, newName).map(_ => fmId))
.map(fmId => ra.copy(fileId = fmId, name = newName)) .map(fmId => ra.copy(fileId = fmId, name = newName))
} }
private def updateAttachment[F[_]: Sync]( private def updateAttachment[F[_]: Sync](
ctx: Context[F, _], ctx: Context[F, _],
store: Store[F],
ra: RAttachment, ra: RAttachment,
fmId: FileKey, fmId: FileKey,
newName: Option[String] newName: Option[String]
): F[Unit] = ): F[Unit] =
for { for {
oldFile <- ctx.store.transact(RAttachment.findById(ra.id)) oldFile <- store.transact(RAttachment.findById(ra.id))
_ <- _ <-
ctx.store store
.transact(RAttachment.updateFileIdAndName(ra.id, fmId, newName)) .transact(RAttachment.updateFileIdAndName(ra.id, fmId, newName))
_ <- oldFile match { _ <- oldFile match {
case Some(raPrev) => case Some(raPrev) =>
for { for {
sameFile <- sameFile <-
ctx.store store
.transact(RAttachmentSource.isSameFile(ra.id, raPrev.fileId)) .transact(RAttachmentSource.isSameFile(ra.id, raPrev.fileId))
_ <- _ <-
if (sameFile) ().pure[F] if (sameFile) ().pure[F]
else else
ctx.logger.info("Deleting previous attachment file") *> ctx.logger.info("Deleting previous attachment file") *>
ctx.store.fileRepo store.fileRepo
.delete(raPrev.fileId) .delete(raPrev.fileId)
.attempt .attempt
.flatMap { .flatMap {

View File

@ -13,7 +13,8 @@ import cats.implicits._
import fs2.Stream import fs2.Stream
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.file.FileMetadata import docspell.store.file.FileMetadata
import docspell.store.queries.QItem import docspell.store.queries.QItem
import docspell.store.records._ import docspell.store.records._
@ -21,13 +22,13 @@ import docspell.store.records._
/** Task that creates the item. */ /** Task that creates the item. */
object CreateItem { object CreateItem {
def apply[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] = def apply[F[_]: Sync](store: Store[F]): Task[F, ProcessItemArgs, ItemData] =
findExisting[F].flatMap { findExisting[F](store).flatMap {
case Some(ri) => Task.pure(ri) case Some(ri) => Task.pure(ri)
case None => createNew[F] case None => createNew[F](store)
} }
def createNew[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] = def createNew[F[_]: Sync](store: Store[F]): Task[F, ProcessItemArgs, ItemData] =
Task { ctx => Task { ctx =>
def isValidFile(fm: FileMetadata) = def isValidFile(fm: FileMetadata) =
ctx.args.meta.validFileTypes.isEmpty || ctx.args.meta.validFileTypes.isEmpty ||
@ -36,11 +37,11 @@ object CreateItem {
def fileMetas(itemId: Ident, now: Timestamp) = def fileMetas(itemId: Ident, now: Timestamp) =
Stream Stream
.eval(ctx.store.transact(RAttachment.nextPosition(itemId))) .eval(store.transact(RAttachment.nextPosition(itemId)))
.flatMap { offset => .flatMap { offset =>
Stream Stream
.emits(ctx.args.files) .emits(ctx.args.files)
.evalMap(f => ctx.store.fileRepo.findMeta(f.fileMetaId).map(fm => (f, fm))) .evalMap(f => store.fileRepo.findMeta(f.fileMetaId).map(fm => (f, fm)))
.collect { case (f, Some(fm)) if isValidFile(fm) => f } .collect { case (f, Some(fm)) if isValidFile(fm) => f }
.zipWithIndex .zipWithIndex
.evalMap { case (f, index) => .evalMap { case (f, index) =>
@ -67,11 +68,11 @@ object CreateItem {
(for { (for {
_ <- OptionT.liftF( _ <- OptionT.liftF(
ctx.logger.info( ctx.logger.info(
s"Loading item with id ${id.id} to ammend" s"Loading item with id ${id.id} to amend"
) )
) )
item <- OptionT( item <- OptionT(
ctx.store store
.transact(RItem.findByIdAndCollective(id, ctx.args.meta.collective)) .transact(RItem.findByIdAndCollective(id, ctx.args.meta.collective))
) )
} yield (1, item)) } yield (1, item))
@ -88,7 +89,7 @@ object CreateItem {
ctx.args.meta.direction.getOrElse(Direction.Incoming), ctx.args.meta.direction.getOrElse(Direction.Incoming),
ItemState.Premature ItemState.Premature
) )
n <- ctx.store.transact(RItem.insert(item)) n <- store.transact(RItem.insert(item))
} yield (n, item) } yield (n, item)
} }
@ -98,7 +99,7 @@ object CreateItem {
_ <- if (it._1 != 1) storeItemError[F](ctx) else ().pure[F] _ <- if (it._1 != 1) storeItemError[F](ctx) else ().pure[F]
now <- Timestamp.current[F] now <- Timestamp.current[F]
fm <- fileMetas(it._2.id, now) fm <- fileMetas(it._2.id, now)
k <- fm.traverse(insertAttachment(ctx)) k <- fm.traverse(insertAttachment(store))
_ <- logDifferences(ctx, fm, k.sum) _ <- logDifferences(ctx, fm, k.sum)
dur <- time dur <- time
_ <- ctx.logger.info(s"Creating item finished in ${dur.formatExact}") _ <- ctx.logger.info(s"Creating item finished in ${dur.formatExact}")
@ -115,25 +116,27 @@ object CreateItem {
) )
} }
def insertAttachment[F[_]](ctx: Context[F, _])(ra: RAttachment): F[Int] = { def insertAttachment[F[_]](store: Store[F])(ra: RAttachment): F[Int] = {
val rs = RAttachmentSource.of(ra) val rs = RAttachmentSource.of(ra)
ctx.store.transact(for { store.transact(for {
n <- RAttachment.insert(ra) n <- RAttachment.insert(ra)
_ <- RAttachmentSource.insert(rs) _ <- RAttachmentSource.insert(rs)
} yield n) } yield n)
} }
private def findExisting[F[_]: Sync]: Task[F, ProcessItemArgs, Option[ItemData]] = private def findExisting[F[_]: Sync](
store: Store[F]
): Task[F, ProcessItemArgs, Option[ItemData]] =
Task { ctx => Task { ctx =>
val states = ItemState.invalidStates val states = ItemState.invalidStates
val fileMetaIds = ctx.args.files.map(_.fileMetaId).toSet val fileMetaIds = ctx.args.files.map(_.fileMetaId).toSet
for { for {
cand <- ctx.store.transact(QItem.findByFileIds(fileMetaIds.toSeq, states)) cand <- store.transact(QItem.findByFileIds(fileMetaIds.toSeq, states))
_ <- _ <-
if (cand.nonEmpty) if (cand.nonEmpty)
ctx.logger.warn(s"Found ${cand.size} existing item with these files.") ctx.logger.warn(s"Found ${cand.size} existing item with these files.")
else ().pure[F] else ().pure[F]
ht <- cand.drop(1).traverse(ri => QItem.delete(ctx.store)(ri.id, ri.cid)) ht <- cand.drop(1).traverse(ri => QItem.delete(store)(ri.id, ri.cid))
_ <- _ <-
if (ht.sum > 0) if (ht.sum > 0)
ctx.logger.warn(s"Removed ${ht.sum} items with same attachments") ctx.logger.warn(s"Removed ${ht.sum} items with same attachments")
@ -144,7 +147,7 @@ object CreateItem {
OptionT( OptionT(
// load attachments but only those mentioned in the task's arguments // load attachments but only those mentioned in the task's arguments
cand.headOption.traverse(ri => cand.headOption.traverse(ri =>
ctx.store store
.transact(RAttachment.findByItemCollectiveSource(ri.id, ri.cid, fids)) .transact(RAttachment.findByItemCollectiveSource(ri.id, ri.cid, fids))
.flatTap(ats => .flatTap(ats =>
ctx.logger.debug( ctx.logger.debug(
@ -156,7 +159,7 @@ object CreateItem {
) )
.getOrElse(Vector.empty) .getOrElse(Vector.empty)
orig <- rms.traverse(a => orig <- rms.traverse(a =>
ctx.store.transact(RAttachmentSource.findById(a.id)).map(s => (a, s)) store.transact(RAttachmentSource.findById(a.id)).map(s => (a, s))
) )
origMap = origMap =
orig orig

View File

@ -12,8 +12,9 @@ import cats.effect.Sync
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.Task
import docspell.logging.Logger import docspell.logging.Logger
import docspell.scheduler.Task
import docspell.store.Store
/** After candidates have been determined, the set is reduced by doing some cross checks. /** After candidates have been determined, the set is reduced by doing some cross checks.
* For example: if a organization is suggested as correspondent, the correspondent person * For example: if a organization is suggested as correspondent, the correspondent person
@ -22,13 +23,15 @@ import docspell.logging.Logger
*/ */
object CrossCheckProposals { object CrossCheckProposals {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = def apply[F[_]: Sync](
store: Store[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { ctx => Task { ctx =>
val proposals = data.finalProposals val proposals = data.finalProposals
val corrOrg = proposals.find(MetaProposalType.CorrOrg) val corrOrg = proposals.find(MetaProposalType.CorrOrg)
(for { (for {
orgRef <- OptionT.fromOption[F](corrOrg) orgRef <- OptionT.fromOption[F](corrOrg)
persRefs <- OptionT.liftF(EvalProposals.findOrganizationRelation(data, ctx)) persRefs <- OptionT.liftF(EvalProposals.findOrganizationRelation(data, store))
clProps <- OptionT.liftF( clProps <- OptionT.liftF(
personOrgCheck[F](ctx.logger, data.classifyProposals, persRefs)(orgRef) personOrgCheck[F](ctx.logger, data.classifyProposals, persRefs)(orgRef)
) )
@ -53,7 +56,7 @@ object CrossCheckProposals {
mpl.find(MetaProposalType.CorrPerson) match { mpl.find(MetaProposalType.CorrPerson) match {
case Some(ppl) => case Some(ppl) =>
val list = ppl.values.filter(c => val list = ppl.values.filter(c =>
persRefs.get(c.ref.id).exists(_.organization == Some(orgId)) persRefs.get(c.ref.id).exists(_.organization.contains(orgId))
) )
if (ppl.values.toList == list) mpl.pure[F] if (ppl.values.toList == list) mpl.pure[F]

View File

@ -10,7 +10,8 @@ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.queries.QItem import docspell.store.queries.QItem
import docspell.store.records.RFileMeta import docspell.store.records.RFileMeta
import docspell.store.records.RJob import docspell.store.records.RJob
@ -20,46 +21,52 @@ import doobie._
object DuplicateCheck { object DuplicateCheck {
type Args = ProcessItemArgs type Args = ProcessItemArgs
def apply[F[_]: Sync]: Task[F, Args, Args] = def apply[F[_]: Sync](store: Store[F]): Task[F, Args, Args] =
Task { ctx => Task { ctx =>
if (ctx.args.meta.skipDuplicate) if (ctx.args.meta.skipDuplicate)
for { for {
retries <- getRetryCount(ctx) retries <- getRetryCount(ctx, store)
res <- res <-
if (retries == 0) if (retries == 0)
ctx.logger.debug("Checking for duplicate files") *> removeDuplicates(ctx) ctx.logger
.debug("Checking for duplicate files") *> removeDuplicates(ctx, store)
else ctx.args.pure[F] else ctx.args.pure[F]
} yield res } yield res
else ctx.logger.debug("Not checking for duplicates") *> ctx.args.pure[F] else ctx.logger.debug("Not checking for duplicates") *> ctx.args.pure[F]
} }
def removeDuplicates[F[_]: Sync](ctx: Context[F, Args]): F[ProcessItemArgs] = def removeDuplicates[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F]
): F[ProcessItemArgs] =
for { for {
fileMetas <- findDuplicates(ctx) fileMetas <- findDuplicates(ctx, store)
_ <- fileMetas.traverse(deleteDuplicate(ctx)) _ <- fileMetas.traverse(deleteDuplicate(ctx, store))
ids = fileMetas.filter(_.exists).map(_.fm.id).toSet ids = fileMetas.filter(_.exists).map(_.fm.id).toSet
} yield ctx.args.copy(files = } yield ctx.args.copy(files =
ctx.args.files.filterNot(f => ids.contains(f.fileMetaId)) ctx.args.files.filterNot(f => ids.contains(f.fileMetaId))
) )
private def getRetryCount[F[_]: Sync](ctx: Context[F, Args]): F[Int] = private def getRetryCount[F[_]: Sync](ctx: Context[F, _], store: Store[F]): F[Int] =
ctx.store.transact(RJob.getRetries(ctx.jobId)).map(_.getOrElse(0)) store.transact(RJob.getRetries(ctx.jobId)).map(_.getOrElse(0))
private def deleteDuplicate[F[_]: Sync]( private def deleteDuplicate[F[_]: Sync](
ctx: Context[F, Args] ctx: Context[F, Args],
store: Store[F]
)(fd: FileMetaDupes): F[Unit] = { )(fd: FileMetaDupes): F[Unit] = {
val fname = ctx.args.files.find(_.fileMetaId == fd.fm.id).flatMap(_.name) val fname = ctx.args.files.find(_.fileMetaId == fd.fm.id).flatMap(_.name)
if (fd.exists) if (fd.exists)
ctx.logger ctx.logger
.info(s"Deleting duplicate file $fname!") *> ctx.store.fileRepo .info(s"Deleting duplicate file $fname!") *> store.fileRepo
.delete(fd.fm.id) .delete(fd.fm.id)
else ().pure[F] else ().pure[F]
} }
private def findDuplicates[F[_]]( private def findDuplicates[F[_]](
ctx: Context[F, Args] ctx: Context[F, Args],
store: Store[F]
): F[Vector[FileMetaDupes]] = ): F[Vector[FileMetaDupes]] =
ctx.store.transact(for { store.transact(for {
fileMetas <- RFileMeta.findByIds(ctx.args.files.map(_.fileMetaId)) fileMetas <- RFileMeta.findByIds(ctx.args.files.map(_.fileMetaId))
dupes <- fileMetas.traverse(checkDuplicate(ctx)) dupes <- fileMetas.traverse(checkDuplicate(ctx))
} yield dupes) } yield dupes)

View File

@ -12,25 +12,28 @@ import cats.effect.Sync
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.{Context, Task} import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records.{RAttachmentMeta, RPerson} import docspell.store.records.{RAttachmentMeta, RPerson}
/** Calculate weights for candidates that adds the most likely candidate a lower number. /** Calculate weights for candidates that adds the most likely candidate a lower number.
*/ */
object EvalProposals { object EvalProposals {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = def apply[F[_]: Sync](
Task { ctx => store: Store[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { _ =>
for { for {
now <- Timestamp.current[F] now <- Timestamp.current[F]
personRefs <- findOrganizationRelation[F](data, ctx) personRefs <- findOrganizationRelation[F](data, store)
metas = data.metas.map(calcCandidateWeight(now.toUtcDate, personRefs)) metas = data.metas.map(calcCandidateWeight(now.toUtcDate, personRefs))
} yield data.copy(metas = metas) } yield data.copy(metas = metas)
} }
def findOrganizationRelation[F[_]: Sync]( def findOrganizationRelation[F[_]: Sync](
data: ItemData, data: ItemData,
ctx: Context[F, _] store: Store[F]
): F[Map[Ident, PersonRef]] = { ): F[Map[Ident, PersonRef]] = {
val corrPersIds = data.metas val corrPersIds = data.metas
.map(_.proposals) .map(_.proposals)
@ -38,7 +41,7 @@ object EvalProposals {
.flatMap(_.find(MetaProposalType.CorrPerson)) .flatMap(_.find(MetaProposalType.CorrPerson))
.flatMap(_.values.toList.map(_.ref.id)) .flatMap(_.values.toList.map(_.ref.id))
.toSet .toSet
ctx.store store
.transact(RPerson.findOrganization(corrPersIds)) .transact(RPerson.findOrganization(corrPersIds))
.map(_.map(p => (p.id, p)).toMap) .map(_.map(p => (p.id, p)).toMap)
} }

View File

@ -18,7 +18,8 @@ import fs2.Stream
import docspell.common._ import docspell.common._
import docspell.files.Zip import docspell.files.Zip
import docspell.joex.mail._ import docspell.joex.mail._
import docspell.joex.scheduler._ import docspell.scheduler._
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
import emil.Mail import emil.Mail
@ -34,39 +35,41 @@ import emil.Mail
object ExtractArchive { object ExtractArchive {
type Args = ProcessItemArgs type Args = ProcessItemArgs
def apply[F[_]: Async]( def apply[F[_]: Async](store: Store[F])(
item: ItemData item: ItemData
): Task[F, Args, ItemData] = ): Task[F, Args, ItemData] =
multiPass(item, None).map(_._2) multiPass(store, item, None).map(_._2)
def multiPass[F[_]: Async]( def multiPass[F[_]: Async](
store: Store[F],
item: ItemData, item: ItemData,
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
): Task[F, Args, (Option[RAttachmentArchive], ItemData)] = ): Task[F, Args, (Option[RAttachmentArchive], ItemData)] =
singlePass(item, archive).flatMap { t => singlePass(store, item, archive).flatMap { t =>
if (t._1.isEmpty) Task.pure(t) if (t._1.isEmpty) Task.pure(t)
else multiPass(t._2, t._1) else multiPass(store, t._2, t._1)
} }
def singlePass[F[_]: Async]( def singlePass[F[_]: Async](
store: Store[F],
item: ItemData, item: ItemData,
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
): Task[F, Args, (Option[RAttachmentArchive], ItemData)] = ): Task[F, Args, (Option[RAttachmentArchive], ItemData)] =
Task { ctx => Task { ctx =>
def extract(ra: RAttachment, pos: Int): F[Extracted] = def extract(ra: RAttachment, pos: Int): F[Extracted] =
findMime(ctx)(ra).flatMap(m => extractSafe(ctx, archive)(ra, pos, m)) findMime(store)(ra).flatMap(m => extractSafe(ctx, store, archive)(ra, pos, m))
for { for {
lastPos <- ctx.store.transact(RAttachment.nextPosition(item.item.id)) lastPos <- store.transact(RAttachment.nextPosition(item.item.id))
extracts <- extracts <-
item.attachments.zipWithIndex item.attachments.zipWithIndex
.traverse(t => extract(t._1, lastPos + t._2)) .traverse(t => extract(t._1, lastPos + t._2))
.map(Monoid[Extracted].combineAll) .map(Monoid[Extracted].combineAll)
.map(fixPositions) .map(fixPositions)
nra = extracts.files nra = extracts.files
_ <- extracts.files.traverse(storeAttachment(ctx)) _ <- extracts.files.traverse(storeAttachment(store))
naa = extracts.archives naa = extracts.archives
_ <- naa.traverse(storeArchive(ctx)) _ <- naa.traverse(storeArchive(store))
} yield naa.headOption -> item.copy( } yield naa.headOption -> item.copy(
attachments = nra, attachments = nra,
originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap, originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap,
@ -83,25 +86,26 @@ object ExtractArchive {
if (extract.archives.isEmpty) extract if (extract.archives.isEmpty) extract
else extract.updatePositions else extract.updatePositions
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = def findMime[F[_]: Functor](store: Store[F])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) OptionT(store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(MimeType.octetStream) .getOrElse(MimeType.octetStream)
def extractSafe[F[_]: Async]( def extractSafe[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int, mime: MimeType): F[Extracted] = )(ra: RAttachment, pos: Int, mime: MimeType): F[Extracted] =
mime match { mime match {
case MimeType.ZipMatch(_) if ra.name.exists(_.toLowerCase.endsWith(".zip")) => case MimeType.ZipMatch(_) if ra.name.exists(_.toLowerCase.endsWith(".zip")) =>
ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("<noname>")}.") *> ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("<noname>")}.") *>
extractZip(ctx, archive)(ra, pos) extractZip(ctx, store, archive)(ra, pos)
.flatMap(cleanupParents(ctx, ra, archive)) .flatMap(cleanupParents(ctx, store, ra, archive))
case MimeType.EmailMatch(_) => case MimeType.EmailMatch(_) =>
ctx.logger.info(s"Reading e-mail ${ra.name.getOrElse("<noname>")}") *> ctx.logger.info(s"Reading e-mail ${ra.name.getOrElse("<noname>")}") *>
extractMail(ctx, archive)(ra, pos) extractMail(ctx, store, archive)(ra, pos)
.flatMap(cleanupParents(ctx, ra, archive)) .flatMap(cleanupParents(ctx, store, ra, archive))
case _ => case _ =>
ctx.logger.debug(s"Not an archive: ${mime.asString}") *> ctx.logger.debug(s"Not an archive: ${mime.asString}") *>
@ -110,6 +114,7 @@ object ExtractArchive {
def cleanupParents[F[_]: Sync]( def cleanupParents[F[_]: Sync](
ctx: Context[F, _], ctx: Context[F, _],
store: Store[F],
ra: RAttachment, ra: RAttachment,
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
)(extracted: Extracted): F[Extracted] = )(extracted: Extracted): F[Extracted] =
@ -119,30 +124,31 @@ object ExtractArchive {
_ <- ctx.logger.debug( _ <- ctx.logger.debug(
s"Extracted inner attachment ${ra.name}. Remove it completely." s"Extracted inner attachment ${ra.name}. Remove it completely."
) )
_ <- ctx.store.transact(RAttachmentArchive.delete(ra.id)) _ <- store.transact(RAttachmentArchive.delete(ra.id))
_ <- ctx.store.transact(RAttachment.delete(ra.id)) _ <- store.transact(RAttachment.delete(ra.id))
_ <- ctx.store.fileRepo.delete(ra.fileId) _ <- store.fileRepo.delete(ra.fileId)
} yield extracted } yield extracted
case None => case None =>
for { for {
_ <- ctx.logger.debug( _ <- ctx.logger.debug(
s"Extracted attachment ${ra.name}. Remove it from the item." s"Extracted attachment ${ra.name}. Remove it from the item."
) )
_ <- ctx.store.transact(RAttachment.delete(ra.id)) _ <- store.transact(RAttachment.delete(ra.id))
} yield extracted.copy(files = extracted.files.filter(_.id != ra.id)) } yield extracted.copy(files = extracted.files.filter(_.id != ra.id))
} }
def extractZip[F[_]: Async]( def extractZip[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int): F[Extracted] = { )(ra: RAttachment, pos: Int): F[Extracted] = {
val zipData = ctx.store.fileRepo.getBytes(ra.fileId) val zipData = store.fileRepo.getBytes(ra.fileId)
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *> ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *>
zipData zipData
.through(Zip.unzipP[F](8192, glob)) .through(Zip.unzipP[F](8192, glob))
.zipWithIndex .zipWithIndex
.flatMap(handleEntry(ctx, ra, pos, archive, None)) .flatMap(handleEntry(ctx, store, ra, pos, archive, None))
.foldMonoid .foldMonoid
.compile .compile
.lastOrError .lastOrError
@ -150,9 +156,10 @@ object ExtractArchive {
def extractMail[F[_]: Async]( def extractMail[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int): F[Extracted] = { )(ra: RAttachment, pos: Int): F[Extracted] = {
val email: Stream[F, Byte] = ctx.store.fileRepo.getBytes(ra.fileId) val email: Stream[F, Byte] = store.fileRepo.getBytes(ra.fileId)
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
val attachOnly = ctx.args.meta.attachmentsOnly.getOrElse(false) val attachOnly = ctx.args.meta.attachmentsOnly.getOrElse(false)
@ -170,7 +177,9 @@ object ExtractArchive {
ReadMail ReadMail
.mailToEntries(ctx.logger, glob, attachOnly)(mail) .mailToEntries(ctx.logger, glob, attachOnly)(mail)
.zipWithIndex .zipWithIndex
.flatMap(handleEntry(ctx, ra, pos, archive, mId)) ++ Stream.eval(givenMeta) .flatMap(handleEntry(ctx, store, ra, pos, archive, mId)) ++ Stream.eval(
givenMeta
)
} }
.foldMonoid .foldMonoid
.compile .compile
@ -185,6 +194,7 @@ object ExtractArchive {
def handleEntry[F[_]: Sync]( def handleEntry[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
ra: RAttachment, ra: RAttachment,
pos: Int, pos: Int,
archive: Option[RAttachmentArchive], archive: Option[RAttachmentArchive],
@ -195,7 +205,7 @@ object ExtractArchive {
val (entry, subPos) = tentry val (entry, subPos) = tentry
val mimeHint = MimeTypeHint.filename(entry.name).withAdvertised(entry.mime.asString) val mimeHint = MimeTypeHint.filename(entry.name).withAdvertised(entry.mime.asString)
val fileId = entry.data.through( val fileId = entry.data.through(
ctx.store.fileRepo store.fileRepo
.save(ctx.args.meta.collective, FileCategory.AttachmentSource, mimeHint) .save(ctx.args.meta.collective, FileCategory.AttachmentSource, mimeHint)
) )
@ -217,16 +227,16 @@ object ExtractArchive {
} }
def storeAttachment[F[_]: Sync](ctx: Context[F, _])(ra: RAttachment): F[Int] = { def storeAttachment[F[_]: Sync](store: Store[F])(ra: RAttachment): F[Int] = {
val insert = CreateItem.insertAttachment(ctx)(ra) val insert = CreateItem.insertAttachment(store)(ra)
for { for {
n1 <- ctx.store.transact(RAttachment.updatePosition(ra.id, ra.position)) n1 <- store.transact(RAttachment.updatePosition(ra.id, ra.position))
n2 <- if (n1 > 0) 0.pure[F] else insert n2 <- if (n1 > 0) 0.pure[F] else insert
} yield n1 + n2 } yield n1 + n2
} }
def storeArchive[F[_]](ctx: Context[F, _])(aa: RAttachmentArchive): F[Int] = def storeArchive[F[_]](store: Store[F])(aa: RAttachmentArchive): F[Int] =
ctx.store.transact(RAttachmentArchive.insert(aa)) store.transact(RAttachmentArchive.insert(aa))
case class Extracted( case class Extracted(
files: Vector[RAttachment], files: Vector[RAttachment],

View File

@ -16,7 +16,8 @@ import docspell.analysis.contact._
import docspell.common.MetaProposal.Candidate import docspell.common.MetaProposal.Candidate
import docspell.common._ import docspell.common._
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
/** Super simple approach to find corresponding meta data to an item by looking up values /** Super simple approach to find corresponding meta data to an item by looking up values
@ -26,7 +27,8 @@ object FindProposal {
type Args = ProcessItemArgs type Args = ProcessItemArgs
def apply[F[_]: Sync]( def apply[F[_]: Sync](
cfg: Config.TextAnalysis cfg: Config.TextAnalysis,
store: Store[F]
)(data: ItemData): Task[F, Args, ItemData] = )(data: ItemData): Task[F, Args, ItemData] =
Task { ctx => Task { ctx =>
val rmas = data.metas.map(rm => rm.copy(nerlabels = removeDuplicates(rm.nerlabels))) val rmas = data.metas.map(rm => rm.copy(nerlabels = removeDuplicates(rm.nerlabels)))
@ -34,15 +36,16 @@ object FindProposal {
_ <- ctx.logger.info("Starting find-proposal") _ <- ctx.logger.info("Starting find-proposal")
rmv <- rmas rmv <- rmas
.traverse(rm => .traverse(rm =>
processAttachment(cfg, rm, data.findDates(rm), ctx) processAttachment(cfg, rm, data.findDates(rm), ctx, store)
.map(ml => rm.copy(proposals = ml)) .map(ml => rm.copy(proposals = ml))
) )
clp <- lookupClassifierProposals(ctx, data.classifyProposals) clp <- lookupClassifierProposals(ctx, store, data.classifyProposals)
} yield data.copy(metas = rmv, classifyProposals = clp) } yield data.copy(metas = rmv, classifyProposals = clp)
} }
def lookupClassifierProposals[F[_]: Sync]( def lookupClassifierProposals[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
mpList: MetaProposalList mpList: MetaProposalList
): F[MetaProposalList] = { ): F[MetaProposalList] = {
val coll = ctx.args.meta.collective val coll = ctx.args.meta.collective
@ -50,7 +53,7 @@ object FindProposal {
def lookup(mp: MetaProposal): F[Option[IdRef]] = def lookup(mp: MetaProposal): F[Option[IdRef]] =
mp.proposalType match { mp.proposalType match {
case MetaProposalType.CorrOrg => case MetaProposalType.CorrOrg =>
ctx.store store
.transact( .transact(
ROrganization ROrganization
.findLike(coll, mp.values.head.ref.name.toLowerCase, OrgUse.notDisabled) .findLike(coll, mp.values.head.ref.name.toLowerCase, OrgUse.notDisabled)
@ -60,7 +63,7 @@ object FindProposal {
ctx.logger.debug(s"Found classifier organization for $mp: $oref") ctx.logger.debug(s"Found classifier organization for $mp: $oref")
) )
case MetaProposalType.CorrPerson => case MetaProposalType.CorrPerson =>
ctx.store store
.transact( .transact(
RPerson RPerson
.findLike( .findLike(
@ -74,7 +77,7 @@ object FindProposal {
ctx.logger.debug(s"Found classifier corr-person for $mp: $oref") ctx.logger.debug(s"Found classifier corr-person for $mp: $oref")
) )
case MetaProposalType.ConcPerson => case MetaProposalType.ConcPerson =>
ctx.store store
.transact( .transact(
RPerson RPerson
.findLike( .findLike(
@ -88,7 +91,7 @@ object FindProposal {
ctx.logger.debug(s"Found classifier conc-person for $mp: $oref") ctx.logger.debug(s"Found classifier conc-person for $mp: $oref")
) )
case MetaProposalType.ConcEquip => case MetaProposalType.ConcEquip =>
ctx.store store
.transact( .transact(
REquipment REquipment
.findLike( .findLike(
@ -123,9 +126,10 @@ object FindProposal {
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
rm: RAttachmentMeta, rm: RAttachmentMeta,
rd: Vector[NerDateLabel], rd: Vector[NerDateLabel],
ctx: Context[F, ProcessItemArgs] ctx: Context[F, Args],
store: Store[F]
): F[MetaProposalList] = { ): F[MetaProposalList] = {
val finder = Finder.searchExact(ctx).next(Finder.searchFuzzy(ctx)) val finder = Finder.searchExact(ctx, store).next(Finder.searchFuzzy(ctx, store))
List(finder.find(rm.nerlabels), makeDateProposal(cfg, rd)) List(finder.find(rm.nerlabels), makeDateProposal(cfg, rd))
.traverse(identity) .traverse(identity)
.map(MetaProposalList.flatten) .map(MetaProposalList.flatten)
@ -215,19 +219,24 @@ object FindProposal {
def unit[F[_]: Applicative](value: MetaProposalList): Finder[F] = def unit[F[_]: Applicative](value: MetaProposalList): Finder[F] =
_ => value.pure[F] _ => value.pure[F]
def searchExact[F[_]: Sync](ctx: Context[F, ProcessItemArgs]): Finder[F] = def searchExact[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): Finder[F] =
labels => labels =>
labels.toList.traverse(nl => search(nl, true, ctx)).map(MetaProposalList.flatten) labels.toList
.traverse(nl => search(nl, true, ctx, store))
.map(MetaProposalList.flatten)
def searchFuzzy[F[_]: Sync](ctx: Context[F, ProcessItemArgs]): Finder[F] = def searchFuzzy[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): Finder[F] =
labels => labels =>
labels.toList.traverse(nl => search(nl, false, ctx)).map(MetaProposalList.flatten) labels.toList
.traverse(nl => search(nl, false, ctx, store))
.map(MetaProposalList.flatten)
} }
private def search[F[_]: Sync]( private def search[F[_]: Sync](
nt: NerLabel, nt: NerLabel,
exact: Boolean, exact: Boolean,
ctx: Context[F, ProcessItemArgs] ctx: Context[F, ProcessItemArgs],
store: Store[F]
): F[MetaProposalList] = { ): F[MetaProposalList] = {
val value = val value =
if (exact) normalizeSearchValue(nt.label) if (exact) normalizeSearchValue(nt.label)
@ -243,7 +252,7 @@ object FindProposal {
nt.tag match { nt.tag match {
case NerTag.Organization => case NerTag.Organization =>
ctx.logger.debug(s"Looking for organizations: $value") *> ctx.logger.debug(s"Looking for organizations: $value") *>
ctx.store store
.transact( .transact(
ROrganization ROrganization
.findLike(ctx.args.meta.collective, value, OrgUse.notDisabled) .findLike(ctx.args.meta.collective, value, OrgUse.notDisabled)
@ -251,20 +260,20 @@ object FindProposal {
.map(MetaProposalList.from(MetaProposalType.CorrOrg, nt)) .map(MetaProposalList.from(MetaProposalType.CorrOrg, nt))
case NerTag.Person => case NerTag.Person =>
val s1 = ctx.store val s1 = store
.transact( .transact(
RPerson RPerson
.findLike(ctx.args.meta.collective, value, PersonUse.concerningAndBoth) .findLike(ctx.args.meta.collective, value, PersonUse.concerningAndBoth)
) )
.map(MetaProposalList.from(MetaProposalType.ConcPerson, nt)) .map(MetaProposalList.from(MetaProposalType.ConcPerson, nt))
val s2 = ctx.store val s2 = store
.transact( .transact(
RPerson RPerson
.findLike(ctx.args.meta.collective, value, PersonUse.correspondentAndBoth) .findLike(ctx.args.meta.collective, value, PersonUse.correspondentAndBoth)
) )
.map(MetaProposalList.from(MetaProposalType.CorrPerson, nt)) .map(MetaProposalList.from(MetaProposalType.CorrPerson, nt))
val s3 = val s3 =
ctx.store store
.transact( .transact(
ROrganization ROrganization
.findLike(ctx.args.meta.collective, value, OrgUse.notDisabled) .findLike(ctx.args.meta.collective, value, OrgUse.notDisabled)
@ -283,7 +292,7 @@ object FindProposal {
case NerTag.Misc => case NerTag.Misc =>
ctx.logger.debug(s"Looking for equipments: $value") *> ctx.logger.debug(s"Looking for equipments: $value") *>
ctx.store store
.transact( .transact(
REquipment REquipment
.findLike(ctx.args.meta.collective, value, EquipmentUse.notDisabled) .findLike(ctx.args.meta.collective, value, EquipmentUse.notDisabled)
@ -291,7 +300,7 @@ object FindProposal {
.map(MetaProposalList.from(MetaProposalType.ConcEquip, nt)) .map(MetaProposalList.from(MetaProposalType.ConcEquip, nt))
case NerTag.Email => case NerTag.Email =>
searchContact(nt, ContactKind.Email, value, ctx) searchContact(nt, ContactKind.Email, value, ctx, store)
case NerTag.Website => case NerTag.Website =>
if (!exact) { if (!exact) {
@ -301,9 +310,9 @@ object FindProposal {
.map(_.toPrimaryDomain.asString) .map(_.toPrimaryDomain.asString)
.map(s => s"%$s%") .map(s => s"%$s%")
.getOrElse(value) .getOrElse(value)
searchContact(nt, ContactKind.Website, searchString, ctx) searchContact(nt, ContactKind.Website, searchString, ctx, store)
} else } else
searchContact(nt, ContactKind.Website, value, ctx) searchContact(nt, ContactKind.Website, value, ctx, store)
case NerTag.Date => case NerTag.Date =>
// There is no database search required for this tag // There is no database search required for this tag
@ -315,18 +324,19 @@ object FindProposal {
nt: NerLabel, nt: NerLabel,
kind: ContactKind, kind: ContactKind,
value: String, value: String,
ctx: Context[F, ProcessItemArgs] ctx: Context[F, ProcessItemArgs],
store: Store[F]
): F[MetaProposalList] = { ): F[MetaProposalList] = {
val orgs = ctx.store val orgs = store
.transact(ROrganization.findLike(ctx.args.meta.collective, kind, value)) .transact(ROrganization.findLike(ctx.args.meta.collective, kind, value))
.map(MetaProposalList.from(MetaProposalType.CorrOrg, nt)) .map(MetaProposalList.from(MetaProposalType.CorrOrg, nt))
val corrP = ctx.store val corrP = store
.transact( .transact(
RPerson RPerson
.findLike(ctx.args.meta.collective, kind, value, PersonUse.correspondentAndBoth) .findLike(ctx.args.meta.collective, kind, value, PersonUse.correspondentAndBoth)
) )
.map(MetaProposalList.from(MetaProposalType.CorrPerson, nt)) .map(MetaProposalList.from(MetaProposalType.CorrPerson, nt))
val concP = ctx.store val concP = store
.transact( .transact(
RPerson RPerson
.findLike(ctx.args.meta.collective, kind, value, PersonUse.concerningAndBoth) .findLike(ctx.args.meta.collective, kind, value, PersonUse.concerningAndBoth)

View File

@ -8,6 +8,7 @@ package docspell.joex.process
import docspell.common._ import docspell.common._
import docspell.joex.process.ItemData.AttachmentDates import docspell.joex.process.ItemData.AttachmentDates
import docspell.scheduler.JobTaskResultEncoder
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem} import docspell.store.records.{RAttachment, RAttachmentMeta, RItem}
import io.circe.syntax.EncoderOps import io.circe.syntax.EncoderOps
@ -118,7 +119,28 @@ object ItemData {
) )
.asJson, .asJson,
"tags" -> data.tags.asJson, "tags" -> data.tags.asJson,
"assumedTags" -> data.classifyTags.asJson "assumedTags" -> data.classifyTags.asJson,
"assumedCorrOrg" -> data.finalProposals
.find(MetaProposalType.CorrOrg)
.map(_.values.head.ref)
.asJson
) )
} }
implicit val jobTaskResultEncoder: JobTaskResultEncoder[ItemData] =
JobTaskResultEncoder.fromJson[ItemData].withMessage { data =>
val tags =
if (data.tags.isEmpty && data.classifyTags.isEmpty) ""
else (data.tags ++ data.classifyTags).mkString("[", ", ", "]")
val corg =
data.finalProposals.find(MetaProposalType.CorrOrg).map(_.values.head.ref.name)
val cpers =
data.finalProposals.find(MetaProposalType.CorrPerson).map(_.values.head.ref.name)
val org = corg match {
case Some(o) => s" by $o" + cpers.map(p => s"/$p").getOrElse("")
case None => cpers.map(p => s" by $p").getOrElse("")
}
s"Processed '${data.item.name}' $tags$org"
}
} }

View File

@ -17,59 +17,62 @@ import docspell.common.{ItemState, ProcessItemArgs}
import docspell.ftsclient.FtsClient import docspell.ftsclient.FtsClient
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.analysis.RegexNerFile import docspell.joex.analysis.RegexNerFile
import docspell.joex.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.queries.QItem import docspell.store.queries.QItem
import docspell.store.records.RItem import docspell.store.records.RItem
object ItemHandler { object ItemHandler {
type Args = ProcessItemArgs type Args = ProcessItemArgs
def onCancel[F[_]: Sync]: Task[F, Args, Unit] = def onCancel[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] =
logWarn("Now cancelling.").flatMap(_ => logWarn[F]("Now cancelling.").flatMap(_ =>
markItemCreated.flatMap { markItemCreated(store).flatMap {
case true => case true =>
Task.pure(()) Task.pure(())
case false => case false =>
deleteByFileIds[F].flatMap(_ => deleteFiles) deleteByFileIds[F](store).flatMap(_ => deleteFiles(store))
} }
) )
def newItem[F[_]: Async]( def newItem[F[_]: Async](
cfg: Config, cfg: Config,
store: Store[F],
itemOps: OItem[F], itemOps: OItem[F],
fts: FtsClient[F], fts: FtsClient[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F]
): Task[F, Args, Option[ItemData]] = ): Task[F, Args, Option[ItemData]] =
logBeginning.flatMap(_ => logBeginning[F].flatMap(_ =>
DuplicateCheck[F] DuplicateCheck[F](store)
.flatMap(args => .flatMap(args =>
if (args.files.isEmpty) logNoFiles.map(_ => None) if (args.files.isEmpty) logNoFiles[F].map(_ => None)
else { else {
val create: Task[F, Args, ItemData] = val create: Task[F, Args, ItemData] =
CreateItem[F].contramap(_ => args.pure[F]) CreateItem[F](store).contramap(_ => args.pure[F])
create create
.flatMap(itemStateTask(ItemState.Processing)) .flatMap(itemStateTask(store, ItemState.Processing))
.flatMap(safeProcess[F](cfg, itemOps, fts, analyser, regexNer)) .flatMap(safeProcess[F](cfg, store, itemOps, fts, analyser, regexNer))
.map(_.some) .map(_.some)
} }
) )
) )
def itemStateTask[F[_]: Sync, A]( def itemStateTask[F[_]: Sync, A](store: Store[F], state: ItemState)(
state: ItemState data: ItemData
)(data: ItemData): Task[F, A, ItemData] = ): Task[F, A, ItemData] =
Task(ctx => Task(_ =>
ctx.store store
.transact(RItem.updateState(data.item.id, state, ItemState.invalidStates)) .transact(RItem.updateState(data.item.id, state, ItemState.invalidStates))
.map(_ => data) .map(_ => data)
) )
def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] = def isLastRetry[F[_]]: Task[F, Args, Boolean] =
Task(_.isLastRetry) Task(_.isLastRetry)
def safeProcess[F[_]: Async]( def safeProcess[F[_]: Async](
cfg: Config, cfg: Config,
store: Store[F],
itemOps: OItem[F], itemOps: OItem[F],
fts: FtsClient[F], fts: FtsClient[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
@ -77,30 +80,31 @@ object ItemHandler {
)(data: ItemData): Task[F, Args, ItemData] = )(data: ItemData): Task[F, Args, ItemData] =
isLastRetry[F].flatMap { isLastRetry[F].flatMap {
case true => case true =>
ProcessItem[F](cfg, itemOps, fts, analyser, regexNer)(data).attempt.flatMap { ProcessItem[F](cfg, itemOps, fts, analyser, regexNer, store)(data).attempt
case Right(d) => .flatMap {
Task.pure(d) case Right(d) =>
case Left(ex) => Task.pure(d)
logWarn[F]( case Left(ex) =>
"Processing failed on last retry. Creating item but without proposals." logWarn[F](
).flatMap(_ => itemStateTask(ItemState.Created)(data)) "Processing failed on last retry. Creating item but without proposals."
.andThen(_ => Sync[F].raiseError(ex)) ).flatMap(_ => itemStateTask(store, ItemState.Created)(data))
} .andThen(_ => Sync[F].raiseError(ex))
}
case false => case false =>
ProcessItem[F](cfg, itemOps, fts, analyser, regexNer)(data) ProcessItem[F](cfg, itemOps, fts, analyser, regexNer, store)(data)
.flatMap(itemStateTask(ItemState.Created)) .flatMap(itemStateTask(store, ItemState.Created))
} }
private def markItemCreated[F[_]: Sync]: Task[F, Args, Boolean] = private def markItemCreated[F[_]: Sync](store: Store[F]): Task[F, Args, Boolean] =
Task { ctx => Task { ctx =>
val fileMetaIds = ctx.args.files.map(_.fileMetaId).toSet val fileMetaIds = ctx.args.files.map(_.fileMetaId).toSet
(for { (for {
item <- OptionT(ctx.store.transact(QItem.findOneByFileIds(fileMetaIds.toSeq))) item <- OptionT(store.transact(QItem.findOneByFileIds(fileMetaIds.toSeq)))
_ <- OptionT.liftF( _ <- OptionT.liftF(
ctx.logger.info("Processing cancelled. Marking item as created anyways.") ctx.logger.info("Processing cancelled. Marking item as created anyways.")
) )
_ <- OptionT.liftF( _ <- OptionT.liftF(
ctx.store store
.transact( .transact(
RItem.updateState(item.id, ItemState.Created, ItemState.invalidStates) RItem.updateState(item.id, ItemState.Created, ItemState.invalidStates)
) )
@ -111,11 +115,11 @@ object ItemHandler {
) )
} }
private def deleteByFileIds[F[_]: Sync]: Task[F, Args, Unit] = private def deleteByFileIds[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
val states = ItemState.invalidStates val states = ItemState.invalidStates
for { for {
items <- ctx.store.transact( items <- store.transact(
QItem.findByFileIds(ctx.args.files.map(_.fileMetaId), states) QItem.findByFileIds(ctx.args.files.map(_.fileMetaId), states)
) )
_ <- _ <-
@ -124,16 +128,16 @@ object ItemHandler {
ctx.logger.info( ctx.logger.info(
s"No items found for file ids ${ctx.args.files.map(_.fileMetaId)}" s"No items found for file ids ${ctx.args.files.map(_.fileMetaId)}"
) )
_ <- items.traverse(i => QItem.delete(ctx.store)(i.id, ctx.args.meta.collective)) _ <- items.traverse(i => QItem.delete(store)(i.id, ctx.args.meta.collective))
} yield () } yield ()
} }
private def deleteFiles[F[_]: Sync]: Task[F, Args, Unit] = private def deleteFiles[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] =
Task(ctx => Task(ctx =>
ctx.logger.info("Deleting input files …") *> ctx.logger.info("Deleting input files …") *>
Stream Stream
.emits(ctx.args.files.map(_.fileMetaId)) .emits(ctx.args.files.map(_.fileMetaId))
.evalMap(id => ctx.store.fileRepo.delete(id).attempt) .evalMap(id => store.fileRepo.delete(id).attempt)
.compile .compile
.drain .drain
) )

View File

@ -11,20 +11,25 @@ import cats.effect.Sync
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.records.RItem import docspell.store.records.RItem
object LinkProposal { object LinkProposal {
def onlyNew[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = def onlyNew[F[_]: Sync](
store: Store[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
if (data.item.state.isValid) if (data.item.state.isValid)
Task Task
.log[F, ProcessItemArgs](_.debug(s"Not linking proposals on existing item")) .log[F, ProcessItemArgs](_.debug(s"Not linking proposals on existing item"))
.map(_ => data) .map(_ => data)
else else
LinkProposal[F](data) LinkProposal[F](store)(data)
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = def apply[F[_]: Sync](
store: Store[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
if (data.item.state == ItemState.Confirmed) if (data.item.state == ItemState.Confirmed)
Task Task
.log[F, ProcessItemArgs](_.debug(s"Not linking proposals on confirmed item")) .log[F, ProcessItemArgs](_.debug(s"Not linking proposals on confirmed item"))
@ -35,7 +40,7 @@ object LinkProposal {
ctx.logger.info(s"Starting linking proposals") *> ctx.logger.info(s"Starting linking proposals") *>
MetaProposalType.all MetaProposalType.all
.traverse(applyValue(data, proposals, ctx)) .traverse(applyValue(data, proposals, ctx, store))
.map(result => ctx.logger.info(s"Results from proposal processing: $result")) .map(result => ctx.logger.info(s"Results from proposal processing: $result"))
.map(_ => data) .map(_ => data)
} }
@ -43,7 +48,8 @@ object LinkProposal {
def applyValue[F[_]: Sync]( def applyValue[F[_]: Sync](
data: ItemData, data: ItemData,
proposalList: MetaProposalList, proposalList: MetaProposalList,
ctx: Context[F, ProcessItemArgs] ctx: Context[F, ProcessItemArgs],
store: Store[F]
)(mpt: MetaProposalType): F[Result] = )(mpt: MetaProposalType): F[Result] =
data.givenMeta.find(mpt).orElse(proposalList.find(mpt)) match { data.givenMeta.find(mpt).orElse(proposalList.find(mpt)) match {
case None => case None =>
@ -51,29 +57,30 @@ object LinkProposal {
Result.noneFound(mpt).pure[F] Result.noneFound(mpt).pure[F]
case Some(a) if a.isSingleValue => case Some(a) if a.isSingleValue =>
ctx.logger.info(s"Found one candidate for ${a.proposalType}") *> ctx.logger.info(s"Found one candidate for ${a.proposalType}") *>
setItemMeta(data.item.id, ctx, a.proposalType, a.values.head.ref.id).map(_ => setItemMeta(data.item.id, ctx, store, a.proposalType, a.values.head.ref.id).map(
Result.single(mpt) _ => Result.single(mpt)
) )
case Some(a) => case Some(a) =>
val ids = a.values.map(_.ref.id.id) val ids = a.values.map(_.ref.id.id)
ctx.logger.info( ctx.logger.info(
s"Found many (${a.size}, $ids) candidates for ${a.proposalType}. Setting first." s"Found many (${a.size}, $ids) candidates for ${a.proposalType}. Setting first."
) *> ) *>
setItemMeta(data.item.id, ctx, a.proposalType, a.values.head.ref.id).map(_ => setItemMeta(data.item.id, ctx, store, a.proposalType, a.values.head.ref.id).map(
Result.multiple(mpt) _ => Result.multiple(mpt)
) )
} }
def setItemMeta[F[_]: Sync]( def setItemMeta[F[_]: Sync](
itemId: Ident, itemId: Ident,
ctx: Context[F, ProcessItemArgs], ctx: Context[F, ProcessItemArgs],
store: Store[F],
mpt: MetaProposalType, mpt: MetaProposalType,
value: Ident value: Ident
): F[Int] = ): F[Int] =
mpt match { mpt match {
case MetaProposalType.CorrOrg => case MetaProposalType.CorrOrg =>
ctx.logger.debug(s"Updating item organization with: ${value.id}") *> ctx.logger.debug(s"Updating item organization with: ${value.id}") *>
ctx.store.transact( store.transact(
RItem.updateCorrOrg( RItem.updateCorrOrg(
NonEmptyList.of(itemId), NonEmptyList.of(itemId),
ctx.args.meta.collective, ctx.args.meta.collective,
@ -82,7 +89,7 @@ object LinkProposal {
) )
case MetaProposalType.ConcPerson => case MetaProposalType.ConcPerson =>
ctx.logger.debug(s"Updating item concerning person with: $value") *> ctx.logger.debug(s"Updating item concerning person with: $value") *>
ctx.store.transact( store.transact(
RItem.updateConcPerson( RItem.updateConcPerson(
NonEmptyList.of(itemId), NonEmptyList.of(itemId),
ctx.args.meta.collective, ctx.args.meta.collective,
@ -91,7 +98,7 @@ object LinkProposal {
) )
case MetaProposalType.CorrPerson => case MetaProposalType.CorrPerson =>
ctx.logger.debug(s"Updating item correspondent person with: $value") *> ctx.logger.debug(s"Updating item correspondent person with: $value") *>
ctx.store.transact( store.transact(
RItem.updateCorrPerson( RItem.updateCorrPerson(
NonEmptyList.of(itemId), NonEmptyList.of(itemId),
ctx.args.meta.collective, ctx.args.meta.collective,
@ -100,7 +107,7 @@ object LinkProposal {
) )
case MetaProposalType.ConcEquip => case MetaProposalType.ConcEquip =>
ctx.logger.debug(s"Updating item concerning equipment with: $value") *> ctx.logger.debug(s"Updating item concerning equipment with: $value") *>
ctx.store.transact( store.transact(
RItem.updateConcEquip( RItem.updateConcEquip(
NonEmptyList.of(itemId), NonEmptyList.of(itemId),
ctx.args.meta.collective, ctx.args.meta.collective,
@ -112,7 +119,7 @@ object LinkProposal {
case Some(ld) => case Some(ld) =>
val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC)) val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC))
ctx.logger.debug(s"Updating item date ${value.id}") *> ctx.logger.debug(s"Updating item date ${value.id}") *>
ctx.store.transact( store.transact(
RItem.updateDate( RItem.updateDate(
NonEmptyList.of(itemId), NonEmptyList.of(itemId),
ctx.args.meta.collective, ctx.args.meta.collective,
@ -128,7 +135,7 @@ object LinkProposal {
case Some(ld) => case Some(ld) =>
val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC)) val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC))
ctx.logger.debug(s"Updating item due-date suggestion ${value.id}") *> ctx.logger.debug(s"Updating item due-date suggestion ${value.id}") *>
ctx.store.transact( store.transact(
RItem.updateDueDate( RItem.updateDueDate(
NonEmptyList.of(itemId), NonEmptyList.of(itemId),
ctx.args.meta.collective, ctx.args.meta.collective,

View File

@ -7,6 +7,7 @@
package docspell.joex.process package docspell.joex.process
import cats.effect._ import cats.effect._
import cats.implicits._
import docspell.analysis.TextAnalyser import docspell.analysis.TextAnalyser
import docspell.backend.ops.OItem import docspell.backend.ops.OItem
@ -14,7 +15,8 @@ import docspell.common.ProcessItemArgs
import docspell.ftsclient.FtsClient import docspell.ftsclient.FtsClient
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.analysis.RegexNerFile import docspell.joex.analysis.RegexNerFile
import docspell.joex.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
object ProcessItem { object ProcessItem {
@ -23,12 +25,13 @@ object ProcessItem {
itemOps: OItem[F], itemOps: OItem[F],
fts: FtsClient[F], fts: FtsClient[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F],
store: Store[F]
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] = )(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
ExtractArchive(item) ExtractArchive(store)(item)
.flatMap(Task.setProgress(20)) .flatMap(Task.setProgress(20))
.flatMap(processAttachments0(cfg, fts, analyser, regexNer, (40, 60, 80))) .flatMap(processAttachments0(cfg, fts, analyser, regexNer, store, (40, 60, 80)))
.flatMap(LinkProposal.onlyNew[F]) .flatMap(LinkProposal.onlyNew[F](store))
.flatMap(SetGivenData.onlyNew[F](itemOps)) .flatMap(SetGivenData.onlyNew[F](itemOps))
.flatMap(Task.setProgress(99)) .flatMap(Task.setProgress(99))
.flatMap(RemoveEmptyItem(itemOps)) .flatMap(RemoveEmptyItem(itemOps))
@ -37,34 +40,37 @@ object ProcessItem {
cfg: Config, cfg: Config,
fts: FtsClient[F], fts: FtsClient[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F],
store: Store[F]
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] = )(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
processAttachments0[F](cfg, fts, analyser, regexNer, (30, 60, 90))(item) processAttachments0[F](cfg, fts, analyser, regexNer, store, (30, 60, 90))(item)
def analysisOnly[F[_]: Async]( def analysisOnly[F[_]: Async](
cfg: Config, cfg: Config,
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F],
store: Store[F]
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] = )(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
TextAnalysis[F](cfg.textAnalysis, analyser, regexNer)(item) TextAnalysis[F](cfg.textAnalysis, analyser, regexNer, store)(item)
.flatMap(FindProposal[F](cfg.textAnalysis)) .flatMap(FindProposal[F](cfg.textAnalysis, store))
.flatMap(EvalProposals[F]) .flatMap(EvalProposals[F](store))
.flatMap(CrossCheckProposals[F]) .flatMap(CrossCheckProposals[F](store))
.flatMap(SaveProposals[F]) .flatMap(SaveProposals[F](store))
private def processAttachments0[F[_]: Async]( private def processAttachments0[F[_]: Async](
cfg: Config, cfg: Config,
fts: FtsClient[F], fts: FtsClient[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F], regexNer: RegexNerFile[F],
store: Store[F],
progress: (Int, Int, Int) progress: (Int, Int, Int)
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] = )(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
ConvertPdf(cfg.convert, item) ConvertPdf(cfg.convert, store, item)
.flatMap(Task.setProgress(progress._1)) .flatMap(Task.setProgress(progress._1))
.flatMap(TextExtraction(cfg.extraction, fts)) .flatMap(TextExtraction(cfg.extraction, fts, store))
.flatMap(AttachmentPreview(cfg.extraction.preview)) .flatMap(AttachmentPreview(cfg.extraction.preview, store))
.flatMap(AttachmentPageCount()) .flatMap(AttachmentPageCount(store))
.flatMap(Task.setProgress(progress._2)) .flatMap(Task.setProgress(progress._2))
.flatMap(analysisOnly[F](cfg, analyser, regexNer)) .flatMap(analysisOnly[F](cfg, analyser, regexNer, store))
.flatMap(Task.setProgress(progress._3)) .flatMap(Task.setProgress(progress._3))
} }

View File

@ -16,8 +16,9 @@ import docspell.common._
import docspell.ftsclient.FtsClient import docspell.ftsclient.FtsClient
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.analysis.RegexNerFile import docspell.joex.analysis.RegexNerFile
import docspell.joex.scheduler.Context import docspell.scheduler.Context
import docspell.joex.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.queries.QItem import docspell.store.queries.QItem
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
import docspell.store.records.RAttachmentSource import docspell.store.records.RAttachmentSource
@ -32,13 +33,14 @@ object ReProcessItem {
fts: FtsClient[F], fts: FtsClient[F],
itemOps: OItem[F], itemOps: OItem[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F],
store: Store[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
Task Task
.log[F, Args](_.info("===== Start reprocessing ======")) .log[F, Args](_.info("===== Start reprocessing ======"))
.flatMap(_ => .flatMap(_ =>
loadItem[F] loadItem[F](store)
.flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer)) .flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer, store))
.map(_ => ()) .map(_ => ())
) )
@ -53,13 +55,13 @@ object ReProcessItem {
else ra => selection.contains(ra.id) else ra => selection.contains(ra.id)
} }
def loadItem[F[_]: Sync]: Task[F, Args, ItemData] = def loadItem[F[_]: Sync](store: Store[F]): Task[F, Args, ItemData] =
Task { ctx => Task { ctx =>
(for { (for {
item <- OptionT(ctx.store.transact(RItem.findById(ctx.args.itemId))) item <- OptionT(store.transact(RItem.findById(ctx.args.itemId)))
attach <- OptionT.liftF(ctx.store.transact(RAttachment.findByItem(item.id))) attach <- OptionT.liftF(store.transact(RAttachment.findByItem(item.id)))
asrc <- asrc <-
OptionT.liftF(ctx.store.transact(RAttachmentSource.findByItem(ctx.args.itemId))) OptionT.liftF(store.transact(RAttachmentSource.findByItem(ctx.args.itemId)))
asrcMap = asrc.map(s => s.id -> s).toMap asrcMap = asrc.map(s => s.id -> s).toMap
// copy the original files over to attachments to run the default processing task // copy the original files over to attachments to run the default processing task
// the processing doesn't touch the original files, only RAttachments // the processing doesn't touch the original files, only RAttachments
@ -97,6 +99,7 @@ object ReProcessItem {
itemOps: OItem[F], itemOps: OItem[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F], regexNer: RegexNerFile[F],
store: Store[F],
data: ItemData data: ItemData
): Task[F, Args, ItemData] = { ): Task[F, Args, ItemData] = {
@ -121,27 +124,27 @@ object ReProcessItem {
Nil Nil
).pure[F] ).pure[F]
getLanguage[F].flatMap { lang => getLanguage[F](store).flatMap { lang =>
ProcessItem ProcessItem
.processAttachments[F](cfg, fts, analyser, regexNer)(data) .processAttachments[F](cfg, fts, analyser, regexNer, store)(data)
.flatMap(LinkProposal[F]) .flatMap(LinkProposal[F](store))
.flatMap(SetGivenData[F](itemOps)) .flatMap(SetGivenData[F](itemOps))
.contramap[Args](convertArgs(lang)) .contramap[Args](convertArgs(lang))
} }
} }
def getLanguage[F[_]: Sync]: Task[F, Args, Language] = def getLanguage[F[_]: Sync](store: Store[F]): Task[F, Args, Language] =
Task { ctx => Task { ctx =>
val lang1 = OptionT( val lang1 = OptionT(
ctx.store.transact(QItem.getItemLanguage(ctx.args.itemId)).map(_.headOption) store.transact(QItem.getItemLanguage(ctx.args.itemId)).map(_.headOption)
) )
val lang2 = OptionT(ctx.store.transact(RCollective.findByItem(ctx.args.itemId))) val lang2 = OptionT(store.transact(RCollective.findByItem(ctx.args.itemId)))
.map(_.language) .map(_.language)
lang1.orElse(lang2).getOrElse(Language.German) lang1.orElse(lang2).getOrElse(Language.German)
} }
def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] = def isLastRetry[F[_]]: Task[F, Args, Boolean] =
Task(_.isLastRetry) Task(_.isLastRetry)
def safeProcess[F[_]: Async]( def safeProcess[F[_]: Async](
@ -149,11 +152,12 @@ object ReProcessItem {
fts: FtsClient[F], fts: FtsClient[F],
itemOps: OItem[F], itemOps: OItem[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F],
store: Store[F]
)(data: ItemData): Task[F, Args, ItemData] = )(data: ItemData): Task[F, Args, ItemData] =
isLastRetry[F].flatMap { isLastRetry[F].flatMap {
case true => case true =>
processFiles[F](cfg, fts, itemOps, analyser, regexNer, data).attempt processFiles[F](cfg, fts, itemOps, analyser, regexNer, store, data).attempt
.flatMap { .flatMap {
case Right(d) => case Right(d) =>
Task.pure(d) Task.pure(d)
@ -163,7 +167,7 @@ object ReProcessItem {
).andThen(_ => Sync[F].raiseError(ex)) ).andThen(_ => Sync[F].raiseError(ex))
} }
case false => case false =>
processFiles[F](cfg, fts, itemOps, analyser, regexNer, data) processFiles[F](cfg, fts, itemOps, analyser, regexNer, store, data)
} }
private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] = private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] =

View File

@ -11,7 +11,7 @@ import cats.implicits._
import docspell.backend.ops.OItem import docspell.backend.ops.OItem
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.Task import docspell.scheduler.Task
object RemoveEmptyItem { object RemoveEmptyItem {

View File

@ -10,15 +10,15 @@ import cats.effect.Sync
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.AddResult
import docspell.store.records._ import docspell.store.records._
import docspell.store.{AddResult, Store}
/** Saves the proposals in the database */ /** Saves the proposals in the database */
object SaveProposals { object SaveProposals {
type Args = ProcessItemArgs type Args = ProcessItemArgs
def apply[F[_]: Sync](data: ItemData): Task[F, Args, ItemData] = def apply[F[_]: Sync](store: Store[F])(data: ItemData): Task[F, Args, ItemData] =
Task { ctx => Task { ctx =>
for { for {
_ <- ctx.logger.info("Storing proposals") _ <- ctx.logger.info("Storing proposals")
@ -26,20 +26,24 @@ object SaveProposals {
.traverse(rm => .traverse(rm =>
ctx.logger.debug( ctx.logger.debug(
s"Storing attachment proposals: ${rm.proposals}" s"Storing attachment proposals: ${rm.proposals}"
) *> ctx.store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals)) ) *> store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals))
) )
_ <- _ <-
if (data.classifyProposals.isEmpty && data.classifyTags.isEmpty) 0.pure[F] if (data.classifyProposals.isEmpty && data.classifyTags.isEmpty) 0.pure[F]
else saveItemProposal(ctx, data) else saveItemProposal(ctx, store, data)
} yield data } yield data
} }
def saveItemProposal[F[_]: Sync](ctx: Context[F, Args], data: ItemData): F[Unit] = { def saveItemProposal[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
data: ItemData
): F[Unit] = {
def upsert(v: RItemProposal): F[Int] = def upsert(v: RItemProposal): F[Int] =
ctx.store.add(RItemProposal.insert(v), RItemProposal.exists(v.itemId)).flatMap { store.add(RItemProposal.insert(v), RItemProposal.exists(v.itemId)).flatMap {
case AddResult.Success => 1.pure[F] case AddResult.Success => 1.pure[F]
case AddResult.EntityExists(_) => case AddResult.EntityExists(_) =>
ctx.store.transact(RItemProposal.update(v)) store.transact(RItemProposal.update(v))
case AddResult.Failure(ex) => case AddResult.Failure(ex) =>
ctx.logger.warn(s"Could not store item proposals: ${ex.getMessage}") *> 0 ctx.logger.warn(s"Could not store item proposals: ${ex.getMessage}") *> 0
.pure[F] .pure[F]
@ -47,7 +51,7 @@ object SaveProposals {
for { for {
_ <- ctx.logger.debug(s"Storing classifier proposals: ${data.classifyProposals}") _ <- ctx.logger.debug(s"Storing classifier proposals: ${data.classifyProposals}")
tags <- ctx.store.transact( tags <- store.transact(
RTag.findAllByNameOrId(data.classifyTags, ctx.args.meta.collective) RTag.findAllByNameOrId(data.classifyTags, ctx.args.meta.collective)
) )
tagRefs = tags.map(t => IdRef(t.tagId, t.name)) tagRefs = tags.map(t => IdRef(t.tagId, t.name))

View File

@ -11,7 +11,7 @@ import cats.implicits._
import docspell.backend.ops.OItem import docspell.backend.ops.OItem
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.Task import docspell.scheduler.Task
import docspell.store.UpdateResult import docspell.store.UpdateResult
object SetGivenData { object SetGivenData {

View File

@ -18,8 +18,9 @@ import docspell.joex.Config
import docspell.joex.analysis.RegexNerFile import docspell.joex.analysis.RegexNerFile
import docspell.joex.learn.{ClassifierName, Classify, LearnClassifierTask} import docspell.joex.learn.{ClassifierName, Classify, LearnClassifierTask}
import docspell.joex.process.ItemData.AttachmentDates import docspell.joex.process.ItemData.AttachmentDates
import docspell.joex.scheduler.Context import docspell.scheduler.Context
import docspell.joex.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records.{RAttachmentMeta, RClassifierSetting} import docspell.store.records.{RAttachmentMeta, RClassifierSetting}
object TextAnalysis { object TextAnalysis {
@ -28,7 +29,8 @@ object TextAnalysis {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
nerFile: RegexNerFile[F] nerFile: RegexNerFile[F],
store: Store[F]
)(item: ItemData): Task[F, Args, ItemData] = )(item: ItemData): Task[F, Args, ItemData] =
Task { ctx => Task { ctx =>
for { for {
@ -41,18 +43,19 @@ object TextAnalysis {
) )
_ <- ctx.logger.debug(s"Storing tags: ${t.map(_._1.copy(content = None))}") _ <- ctx.logger.debug(s"Storing tags: ${t.map(_._1.copy(content = None))}")
_ <- t.traverse(m => _ <- t.traverse(m =>
ctx.store.transact(RAttachmentMeta.updateLabels(m._1.id, m._1.nerlabels)) store.transact(RAttachmentMeta.updateLabels(m._1.id, m._1.nerlabels))
) )
v = t.toVector v = t.toVector
autoTagEnabled <- getActiveAutoTag(ctx, cfg) autoTagEnabled <- getActiveAutoTag(ctx, store, cfg)
tag <- tag <-
if (autoTagEnabled) predictTags(ctx, cfg, item.metas, analyser.classifier) if (autoTagEnabled)
predictTags(ctx, store, cfg, item.metas, analyser.classifier)
else List.empty[String].pure[F] else List.empty[String].pure[F]
classProposals <- classProposals <-
if (cfg.classification.enabled) if (cfg.classification.enabled)
predictItemEntities(ctx, cfg, item.metas, analyser.classifier) predictItemEntities(ctx, store, cfg, item.metas, analyser.classifier)
else MetaProposalList.empty.pure[F] else MetaProposalList.empty.pure[F]
e <- s e <- s
@ -86,16 +89,17 @@ object TextAnalysis {
def predictTags[F[_]: Async]( def predictTags[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
metas: Vector[RAttachmentMeta], metas: Vector[RAttachmentMeta],
classifier: TextClassifier[F] classifier: TextClassifier[F]
): F[List[String]] = { ): F[List[String]] = {
val text = metas.flatMap(_.content).mkString(LearnClassifierTask.pageSep) val text = metas.flatMap(_.content).mkString(LearnClassifierTask.pageSep)
val classifyWith: ClassifierName => F[Option[String]] = val classifyWith: ClassifierName => F[Option[String]] =
makeClassify(ctx, cfg, classifier)(text) makeClassify(ctx, store, cfg, classifier)(text)
for { for {
names <- ctx.store.transact( names <- store.transact(
ClassifierName.findTagClassifiers(ctx.args.meta.collective) ClassifierName.findTagClassifiers(ctx.args.meta.collective)
) )
_ <- ctx.logger.debug(s"Guessing tags for ${names.size} categories") _ <- ctx.logger.debug(s"Guessing tags for ${names.size} categories")
@ -105,6 +109,7 @@ object TextAnalysis {
def predictItemEntities[F[_]: Async]( def predictItemEntities[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
metas: Vector[RAttachmentMeta], metas: Vector[RAttachmentMeta],
classifier: TextClassifier[F] classifier: TextClassifier[F]
@ -116,7 +121,7 @@ object TextAnalysis {
mtype: MetaProposalType mtype: MetaProposalType
): F[Option[MetaProposal]] = ): F[Option[MetaProposal]] =
for { for {
label <- makeClassify(ctx, cfg, classifier)(text).apply(cname) label <- makeClassify(ctx, store, cfg, classifier)(text).apply(cname)
} yield label.map(str => } yield label.map(str =>
MetaProposal(mtype, Candidate(IdRef(Ident.unsafe(""), str), Set.empty)) MetaProposal(mtype, Candidate(IdRef(Ident.unsafe(""), str), Set.empty))
) )
@ -136,13 +141,14 @@ object TextAnalysis {
private def makeClassify[F[_]: Async]( private def makeClassify[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
classifier: TextClassifier[F] classifier: TextClassifier[F]
)(text: String): ClassifierName => F[Option[String]] = )(text: String): ClassifierName => F[Option[String]] =
Classify[F]( Classify[F](
ctx.logger, ctx.logger,
cfg.workingDir, cfg.workingDir,
ctx.store, store,
classifier, classifier,
ctx.args.meta.collective, ctx.args.meta.collective,
text text
@ -150,10 +156,11 @@ object TextAnalysis {
private def getActiveAutoTag[F[_]: Sync]( private def getActiveAutoTag[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis cfg: Config.TextAnalysis
): F[Boolean] = ): F[Boolean] =
if (cfg.classification.enabled) if (cfg.classification.enabled)
ctx.store store
.transact(RClassifierSetting.findById(ctx.args.meta.collective)) .transact(RClassifierSetting.findById(ctx.args.meta.collective))
.map(_.exists(_.autoTagEnabled)) .map(_.exists(_.autoTagEnabled))
.flatTap(enabled => .flatTap(enabled =>

View File

@ -13,12 +13,13 @@ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.extract.{ExtractConfig, ExtractResult, Extraction} import docspell.extract.{ExtractConfig, ExtractResult, Extraction}
import docspell.ftsclient.{FtsClient, TextData} import docspell.ftsclient.{FtsClient, TextData}
import docspell.joex.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta} import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta}
object TextExtraction { object TextExtraction {
def apply[F[_]: Async](cfg: ExtractConfig, fts: FtsClient[F])( def apply[F[_]: Async](cfg: ExtractConfig, fts: FtsClient[F], store: Store[F])(
item: ItemData item: ItemData
): Task[F, ProcessItemArgs, ItemData] = ): Task[F, ProcessItemArgs, ItemData] =
Task { ctx => Task { ctx =>
@ -30,6 +31,7 @@ object TextExtraction {
txt <- item.attachments.traverse( txt <- item.attachments.traverse(
extractTextIfEmpty( extractTextIfEmpty(
ctx, ctx,
store,
cfg, cfg,
ctx.args.meta.language, ctx.args.meta.language,
ctx.args.meta.collective, ctx.args.meta.collective,
@ -38,7 +40,7 @@ object TextExtraction {
) )
_ <- ctx.logger.debug("Storing extracted texts …") _ <- ctx.logger.debug("Storing extracted texts …")
_ <- _ <-
txt.toList.traverse(res => ctx.store.transact(RAttachmentMeta.upsert(res.am))) txt.toList.traverse(res => store.transact(RAttachmentMeta.upsert(res.am)))
_ <- ctx.logger.debug(s"Extracted text stored.") _ <- ctx.logger.debug(s"Extracted text stored.")
idxItem = TextData.item( idxItem = TextData.item(
item.item.id, item.item.id,
@ -65,6 +67,7 @@ object TextExtraction {
def extractTextIfEmpty[F[_]: Async]( def extractTextIfEmpty[F[_]: Async](
ctx: Context[F, ProcessItemArgs], ctx: Context[F, ProcessItemArgs],
store: Store[F],
cfg: ExtractConfig, cfg: ExtractConfig,
lang: Language, lang: Language,
collective: Ident, collective: Ident,
@ -91,13 +94,14 @@ object TextExtraction {
ctx.logger.info("TextExtraction skipped, since text is already available.") *> ctx.logger.info("TextExtraction skipped, since text is already available.") *>
makeTextData((rm, Nil)).pure[F] makeTextData((rm, Nil)).pure[F]
case _ => case _ =>
extractTextToMeta[F](ctx, cfg, lang, item)(ra) extractTextToMeta[F](ctx, store, cfg, lang, item)(ra)
.map(makeTextData) .map(makeTextData)
} }
} }
def extractTextToMeta[F[_]: Async]( def extractTextToMeta[F[_]: Async](
ctx: Context[F, _], ctx: Context[F, _],
store: Store[F],
cfg: ExtractConfig, cfg: ExtractConfig,
lang: Language, lang: Language,
item: ItemData item: ItemData
@ -105,8 +109,8 @@ object TextExtraction {
for { for {
_ <- ctx.logger.debug(s"Extracting text for attachment ${stripAttachmentName(ra)}") _ <- ctx.logger.debug(s"Extracting text for attachment ${stripAttachmentName(ra)}")
dst <- Duration.stopTime[F] dst <- Duration.stopTime[F]
fids <- filesToExtract(ctx)(item, ra) fids <- filesToExtract(store)(item, ra)
res <- extractTextFallback(ctx, cfg, ra, lang)(fids) res <- extractTextFallback(ctx, store, cfg, ra, lang)(fids)
meta = item.changeMeta( meta = item.changeMeta(
ra.id, ra.id,
lang, lang,
@ -123,14 +127,14 @@ object TextExtraction {
} yield (meta, tags) } yield (meta, tags)
def extractText[F[_]: Sync]( def extractText[F[_]: Sync](
ctx: Context[F, _], store: Store[F],
extr: Extraction[F], extr: Extraction[F],
lang: Language lang: Language
)(fileId: FileKey): F[ExtractResult] = { )(fileId: FileKey): F[ExtractResult] = {
val data = ctx.store.fileRepo.getBytes(fileId) val data = store.fileRepo.getBytes(fileId)
def findMime: F[MimeType] = def findMime: F[MimeType] =
OptionT(ctx.store.fileRepo.findMeta(fileId)) OptionT(store.fileRepo.findMeta(fileId))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(MimeType.octetStream) .getOrElse(MimeType.octetStream)
@ -140,6 +144,7 @@ object TextExtraction {
private def extractTextFallback[F[_]: Async]( private def extractTextFallback[F[_]: Async](
ctx: Context[F, _], ctx: Context[F, _],
store: Store[F],
cfg: ExtractConfig, cfg: ExtractConfig,
ra: RAttachment, ra: RAttachment,
lang: Language lang: Language
@ -151,7 +156,7 @@ object TextExtraction {
case id :: rest => case id :: rest =>
val extr = Extraction.create[F](ctx.logger, cfg) val extr = Extraction.create[F](ctx.logger, cfg)
extractText[F](ctx, extr, lang)(id) extractText[F](store, extr, lang)(id)
.flatMap { .flatMap {
case res @ ExtractResult.Success(_, _) => case res @ ExtractResult.Success(_, _) =>
res.some.pure[F] res.some.pure[F]
@ -161,12 +166,12 @@ object TextExtraction {
.warn( .warn(
s"Cannot extract text from file ${stripAttachmentName(ra)}: unsupported format ${mt.asString}. Try with converted file." s"Cannot extract text from file ${stripAttachmentName(ra)}: unsupported format ${mt.asString}. Try with converted file."
) )
.flatMap(_ => extractTextFallback[F](ctx, cfg, ra, lang)(rest)) .flatMap(_ => extractTextFallback[F](ctx, store, cfg, ra, lang)(rest))
case ExtractResult.Failure(ex) => case ExtractResult.Failure(ex) =>
ctx.logger ctx.logger
.warn(s"Cannot extract text: ${ex.getMessage}. Try with converted file") .warn(s"Cannot extract text: ${ex.getMessage}. Try with converted file")
.flatMap(_ => extractTextFallback[F](ctx, cfg, ra, lang)(rest)) .flatMap(_ => extractTextFallback[F](ctx, store, cfg, ra, lang)(rest))
} }
} }
@ -176,13 +181,13 @@ object TextExtraction {
* If the source file is a PDF, then use the converted file. This may then already * If the source file is a PDF, then use the converted file. This may then already
* contain the text if ocrmypdf is enabled. If it is disabled, both files are the same. * contain the text if ocrmypdf is enabled. If it is disabled, both files are the same.
*/ */
private def filesToExtract[F[_]: Sync](ctx: Context[F, _])( private def filesToExtract[F[_]: Sync](store: Store[F])(
item: ItemData, item: ItemData,
ra: RAttachment ra: RAttachment
): F[List[FileKey]] = ): F[List[FileKey]] =
item.originFile.get(ra.id) match { item.originFile.get(ra.id) match {
case Some(sid) => case Some(sid) =>
ctx.store.transact(RFileMeta.findMime(sid)).map { store.transact(RFileMeta.findMime(sid)).map {
case Some(MimeType.PdfMatch(_)) => case Some(MimeType.PdfMatch(_)) =>
List(ra.fileId) List(ra.fileId)
case _ => case _ =>

View File

@ -12,7 +12,7 @@ import cats.implicits._
import docspell.common.{Duration, Ident, Timestamp} import docspell.common.{Duration, Ident, Timestamp}
import docspell.joex.JoexApp import docspell.joex.JoexApp
import docspell.joexapi.model._ import docspell.joexapi.model._
import docspell.store.records.{RJob, RJobLog} import docspell.store.records.RJobLog
import org.http4s.HttpRoutes import org.http4s.HttpRoutes
import org.http4s.circe.CirceEntityEncoder._ import org.http4s.circe.CirceEntityEncoder._
@ -67,17 +67,19 @@ object JoexRoutes {
} }
} }
def mkJob(j: RJob): Job = // TODO !!
def mkJob(j: docspell.scheduler.Job[String]): Job =
Job( Job(
j.id, j.id,
j.subject, j.subject,
j.submitted, Timestamp.Epoch,
j.priority, j.priority,
j.retries, -1,
j.progress, -1,
j.started.getOrElse(Timestamp.Epoch) Timestamp.Epoch
) )
def mkJobLog(j: RJob, jl: Vector[RJobLog]): JobAndLog = def mkJobLog(j: docspell.scheduler.Job[String], jl: Vector[RJobLog]): JobAndLog =
JobAndLog(mkJob(j), jl.map(r => JobLogEvent(r.created, r.level, r.message)).toList) JobAndLog(mkJob(j), jl.map(r => JobLogEvent(r.created, r.level, r.message)).toList)
} }

View File

@ -16,8 +16,9 @@ import fs2._
import docspell.backend.ops.{OJoex, OUpload} import docspell.backend.ops.{OJoex, OUpload}
import docspell.common._ import docspell.common._
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.scheduler.{Context, Task}
import docspell.logging.Logger import docspell.logging.Logger
import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.queries.QOrganization import docspell.store.queries.QOrganization
import docspell.store.records._ import docspell.store.records._
@ -32,6 +33,7 @@ object ScanMailboxTask {
def apply[F[_]: Sync]( def apply[F[_]: Sync](
cfg: Config.ScanMailbox, cfg: Config.ScanMailbox,
store: Store[F],
emil: Emil[F], emil: Emil[F],
upload: OUpload[F], upload: OUpload[F],
joex: OJoex[F] joex: OJoex[F]
@ -42,22 +44,22 @@ object ScanMailboxTask {
s"=== Start importing mails for user ${ctx.args.account.user.id}" s"=== Start importing mails for user ${ctx.args.account.user.id}"
) )
_ <- ctx.logger.debug(s"Settings: ${ctx.args.asJson.noSpaces}") _ <- ctx.logger.debug(s"Settings: ${ctx.args.asJson.noSpaces}")
mailCfg <- getMailSettings(ctx) mailCfg <- getMailSettings(ctx, store)
folders = ctx.args.folders.mkString(", ") folders = ctx.args.folders.mkString(", ")
userId = ctx.args.account.user userId = ctx.args.account.user
imapConn = ctx.args.imapConnection imapConn = ctx.args.imapConnection
_ <- ctx.logger.info( _ <- ctx.logger.info(
s"Reading mails for user ${userId.id} from ${imapConn.id}/$folders" s"Reading mails for user ${userId.id} from ${imapConn.id}/$folders"
) )
_ <- importMails(cfg, mailCfg, emil, upload, joex, ctx) _ <- importMails(cfg, mailCfg, emil, upload, joex, ctx, store)
} yield () } yield ()
} }
def onCancel[F[_]]: Task[F, ScanMailboxArgs, Unit] = def onCancel[F[_]]: Task[F, ScanMailboxArgs, Unit] =
Task.log(_.warn("Cancelling scan-mailbox task")) Task.log(_.warn("Cancelling scan-mailbox task"))
def getMailSettings[F[_]: Sync](ctx: Context[F, Args]): F[RUserImap] = def getMailSettings[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): F[RUserImap] =
ctx.store store
.transact(RUserImap.getByName(ctx.args.account, ctx.args.imapConnection)) .transact(RUserImap.getByName(ctx.args.account, ctx.args.imapConnection))
.flatMap { .flatMap {
case Some(c) => c.pure[F] case Some(c) => c.pure[F]
@ -75,10 +77,11 @@ object ScanMailboxTask {
theEmil: Emil[F], theEmil: Emil[F],
upload: OUpload[F], upload: OUpload[F],
joex: OJoex[F], joex: OJoex[F],
ctx: Context[F, Args] ctx: Context[F, Args],
store: Store[F]
): F[Unit] = { ): F[Unit] = {
val mailer = theEmil(mailCfg.toMailConfig) val mailer = theEmil(mailCfg.toMailConfig)
val impl = new Impl[F](cfg, ctx) val impl = new Impl[F](cfg, ctx, store)
val inFolders = ctx.args.folders.take(cfg.maxFolders) val inFolders = ctx.args.folders.take(cfg.maxFolders)
val getInitialInput = val getInitialInput =
@ -142,7 +145,11 @@ object ScanMailboxTask {
ScanResult(List(folder -> left), processed) ScanResult(List(folder -> left), processed)
} }
final private class Impl[F[_]: Sync](cfg: Config.ScanMailbox, ctx: Context[F, Args]) { final private class Impl[F[_]: Sync](
cfg: Config.ScanMailbox,
ctx: Context[F, Args],
store: Store[F]
) {
private def logOp[C](f: Logger[F] => F[Unit]): MailOp[F, C, Unit] = private def logOp[C](f: Logger[F] => F[Unit]): MailOp[F, C, Unit] =
MailOp(_ => f(ctx.logger)) MailOp(_ => f(ctx.logger))
@ -213,7 +220,7 @@ object ScanMailboxTask {
NonEmptyList.fromFoldable(headers.flatMap(_.mh.messageId)) match { NonEmptyList.fromFoldable(headers.flatMap(_.mh.messageId)) match {
case Some(nl) => case Some(nl) =>
for { for {
archives <- ctx.store.transact( archives <- store.transact(
RAttachmentArchive RAttachmentArchive
.findByMessageIdAndCollective(nl, ctx.args.account.collective) .findByMessageIdAndCollective(nl, ctx.args.account.collective)
) )
@ -237,7 +244,7 @@ object ScanMailboxTask {
for { for {
from <- OptionT.fromOption[F](mh.from) from <- OptionT.fromOption[F](mh.from)
_ <- OptionT( _ <- OptionT(
ctx.store.transact( store.transact(
QOrganization QOrganization
.findPersonByContact( .findPersonByContact(
ctx.args.account.collective, ctx.args.account.collective,

View File

@ -1,60 +0,0 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex.scheduler
import cats.effect._
import fs2._
import fs2.concurrent.SignallingRef
import docspell.backend.ops.OJoex
import docspell.store.queue._
/** A periodic scheduler takes care to submit periodic tasks to the job queue.
*
* It is run in the background to regularily find a periodic task to execute. If the task
* is due, it will be submitted into the job queue where it will be picked up by the
* scheduler from some joex instance. If it is due in the future, a notification is
* scheduled to be received at that time so the task can be looked up again.
*/
trait PeriodicScheduler[F[_]] {
def config: PeriodicSchedulerConfig
def start: Stream[F, Nothing]
def shutdown: F[Unit]
def periodicAwake: F[Fiber[F, Throwable, Unit]]
def notifyChange: F[Unit]
}
object PeriodicScheduler {
def create[F[_]: Async](
cfg: PeriodicSchedulerConfig,
sch: Scheduler[F],
queue: JobQueue[F],
store: PeriodicTaskStore[F],
joex: OJoex[F]
): Resource[F, PeriodicScheduler[F]] =
for {
waiter <- Resource.eval(SignallingRef(true))
state <- Resource.eval(SignallingRef(PeriodicSchedulerImpl.emptyState[F]))
psch = new PeriodicSchedulerImpl[F](
cfg,
sch,
queue,
store,
joex,
waiter,
state
)
_ <- Resource.eval(psch.init)
} yield psch
}

View File

@ -11,12 +11,10 @@ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.joex.scheduler.Context import docspell.scheduler.Task
import docspell.joex.scheduler.Task import docspell.scheduler.usertask.UserTask
import docspell.store.records.RPeriodicTask import docspell.store.Store
import docspell.store.records.RUserEmail import docspell.store.records.RUserEmail
import docspell.store.usertask.UserTask
import docspell.store.usertask.UserTaskScope
import emil._ import emil._
@ -28,22 +26,20 @@ object UpdateCheckTask {
def onCancel[F[_]]: Task[F, Args, Unit] = def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn("Cancelling update-check task")) Task.log(_.warn("Cancelling update-check task"))
def periodicTask[F[_]: Sync](cfg: UpdateCheckConfig): F[RPeriodicTask] = def periodicTask[F[_]: Sync](cfg: UpdateCheckConfig): F[UserTask[Unit]] =
UserTask( UserTask(
Ident.unsafe("docspell-update-check"), Ident.unsafe("docspell-update-check"),
taskName, taskName,
cfg.enabled, cfg.enabled,
cfg.schedule, cfg.schedule,
None, "Docspell Update Check".some,
() ()
).encode.toPeriodicTask( ).pure[F]
UserTaskScope(cfg.senderAccount.collective),
"Docspell Update Check".some
)
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: UpdateCheckConfig, cfg: UpdateCheckConfig,
sendCfg: MailSendConfig, sendCfg: MailSendConfig,
store: Store[F],
emil: Emil[F], emil: Emil[F],
updateCheck: UpdateCheck[F], updateCheck: UpdateCheck[F],
thisVersion: ThisVersion thisVersion: ThisVersion
@ -57,7 +53,7 @@ object UpdateCheckTask {
_ <- ctx.logger.debug( _ <- ctx.logger.debug(
s"Get SMTP connection for ${cfg.senderAccount.asString} and ${cfg.smtpId}" s"Get SMTP connection for ${cfg.senderAccount.asString} and ${cfg.smtpId}"
) )
smtpCfg <- findConnection(ctx, cfg) smtpCfg <- findConnection(store, cfg)
_ <- ctx.logger.debug("Checking for latest release at GitHub") _ <- ctx.logger.debug("Checking for latest release at GitHub")
latest <- updateCheck.latestRelease latest <- updateCheck.latestRelease
_ <- ctx.logger.debug(s"Got latest release: $latest.") _ <- ctx.logger.debug(s"Got latest release: $latest.")
@ -84,10 +80,10 @@ object UpdateCheckTask {
Task.pure(()) Task.pure(())
def findConnection[F[_]: Sync]( def findConnection[F[_]: Sync](
ctx: Context[F, _], store: Store[F],
cfg: UpdateCheckConfig cfg: UpdateCheckConfig
): F[RUserEmail] = ): F[RUserEmail] =
OptionT(ctx.store.transact(RUserEmail.getByName(cfg.senderAccount, cfg.smtpId))) OptionT(store.transact(RUserEmail.getByName(cfg.senderAccount, cfg.smtpId)))
.getOrElseF( .getOrElseF(
Sync[F].raiseError( Sync[F].raiseError(
new Exception( new Exception(

View File

@ -22,10 +22,12 @@ object ScribeConfigure {
Sync[F].delay { Sync[F].delay {
replaceJUL() replaceJUL()
val docspellLogger = scribe.Logger("docspell") val docspellLogger = scribe.Logger("docspell")
val flywayLogger = scribe.Logger("org.flywaydb")
unsafeConfigure(scribe.Logger.root, cfg.copy(minimumLevel = getRootMinimumLevel)) unsafeConfigure(scribe.Logger.root, cfg.copy(minimumLevel = getRootMinimumLevel))
unsafeConfigure(docspellLogger, cfg) unsafeConfigure(docspellLogger, cfg)
unsafeConfigure(flywayLogger, cfg) unsafeConfigure(scribe.Logger("org.flywaydb"), cfg)
unsafeConfigure(scribe.Logger("binny"), cfg)
unsafeConfigure(scribe.Logger("org.http4s"), cfg)
} }
private[this] def getRootMinimumLevel: Level = private[this] def getRootMinimumLevel: Level =

View File

@ -204,7 +204,8 @@ object Event {
state: JobState, state: JobState,
subject: String, subject: String,
submitter: Ident, submitter: Ident,
result: Json resultData: Json,
resultMsg: Option[String]
) extends Event { ) extends Event {
val eventType = JobDone val eventType = JobDone
val baseUrl = None val baseUrl = None
@ -222,7 +223,8 @@ object Event {
JobState.running, JobState.running,
"Process 3 files", "Process 3 files",
account.user, account.user,
Json.Null Json.Null,
None
) )
} yield ev } yield ev
} }

View File

@ -31,30 +31,25 @@ trait EventContext {
"content" -> content "content" -> content
) )
def defaultTitle: Either[String, String] def defaultMessage: Either[String, EventMessage]
def defaultTitleHtml: Either[String, String] def defaultMessageHtml: Either[String, EventMessage]
def defaultBody: Either[String, String]
def defaultBodyHtml: Either[String, String]
def defaultBoth: Either[String, String] def defaultBoth: Either[String, String]
def defaultBothHtml: Either[String, String] def defaultBothHtml: Either[String, String]
lazy val asJsonWithMessage: Either[String, Json] = lazy val asJsonWithMessage: Either[String, Json] =
for { for {
tt1 <- defaultTitle dm1 <- defaultMessage
tb1 <- defaultBody dm2 <- defaultMessageHtml
tt2 <- defaultTitleHtml
tb2 <- defaultBodyHtml
data = asJson data = asJson
msg = Json.obj( msg = Json.obj(
"message" -> Json.obj( "message" -> Json.obj(
"title" -> tt1.asJson, "title" -> dm1.title.asJson,
"body" -> tb1.asJson "body" -> dm1.body.asJson
), ),
"messageHtml" -> Json.obj( "messageHtml" -> Json.obj(
"title" -> tt2.asJson, "title" -> dm2.title.asJson,
"body" -> tb2.asJson "body" -> dm2.body.asJson
) )
) )
} yield data.withObject(o1 => msg.withObject(o2 => o1.deepMerge(o2).asJson)) } yield data.withObject(o1 => msg.withObject(o2 => o1.deepMerge(o2).asJson))
@ -65,10 +60,8 @@ object EventContext {
new EventContext { new EventContext {
val event = ev val event = ev
def content = Json.obj() def content = Json.obj()
def defaultTitle = Right("") def defaultMessage = Right(EventMessage.empty)
def defaultTitleHtml = Right("") def defaultMessageHtml = Right(EventMessage.empty)
def defaultBody = Right("")
def defaultBodyHtml = Right("")
def defaultBoth = Right("") def defaultBoth = Right("")
def defaultBothHtml = Right("") def defaultBothHtml = Right("")
} }

View File

@ -0,0 +1,13 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.notification.api
final case class EventMessage(title: String, body: String)
object EventMessage {
val empty: EventMessage = EventMessage("", "")
}

View File

@ -6,7 +6,7 @@
package docspell.notification.impl package docspell.notification.impl
import docspell.notification.api.EventContext import docspell.notification.api.{EventContext, EventMessage}
import yamusca.circe._ import yamusca.circe._
import yamusca.implicits._ import yamusca.implicits._
@ -24,17 +24,17 @@ abstract class AbstractEventContext extends EventContext {
def renderHtml(template: Template): String = def renderHtml(template: Template): String =
Markdown.toHtml(render(template)) Markdown.toHtml(render(template))
lazy val defaultTitle: Either[String, String] = lazy val defaultMessage: Either[String, EventMessage] =
titleTemplate.map(render) for {
title <- titleTemplate.map(render)
body <- bodyTemplate.map(render)
} yield EventMessage(title, body)
lazy val defaultTitleHtml: Either[String, String] = lazy val defaultMessageHtml: Either[String, EventMessage] =
titleTemplate.map(renderHtml) for {
title <- titleTemplate.map(renderHtml)
lazy val defaultBody: Either[String, String] = body <- bodyTemplate.map(renderHtml)
bodyTemplate.map(render) } yield EventMessage(title, body)
lazy val defaultBodyHtml: Either[String, String] =
bodyTemplate.map(renderHtml)
lazy val defaultBoth: Either[String, String] = lazy val defaultBoth: Either[String, String] =
for { for {

View File

@ -18,8 +18,9 @@ trait EventContextSyntax {
implicit final class EventContextOps(self: EventContext) { implicit final class EventContextOps(self: EventContext) {
def withDefault[F[_]](logger: Logger[F])(f: (String, String) => F[Unit]): F[Unit] = def withDefault[F[_]](logger: Logger[F])(f: (String, String) => F[Unit]): F[Unit] =
(for { (for {
tt <- self.defaultTitle dm <- self.defaultMessage
tb <- self.defaultBody tt = dm.title
tb = dm.body
} yield f(tt, tb)).fold(logError(logger), identity) } yield f(tt, tb)).fold(logError(logger), identity)
def withJsonMessage[F[_]](logger: Logger[F])(f: Json => F[Unit]): F[Unit] = def withJsonMessage[F[_]](logger: Logger[F])(f: Json => F[Unit]): F[Unit] =

View File

@ -23,9 +23,14 @@ final case class JobDoneCtx(event: Event.JobDone, data: JobDoneCtx.Data)
val content = data.asJson val content = data.asJson
val titleTemplate = Right(mustache"{{eventType}} (by *{{account.user}}*)") val titleTemplate = Right(mustache"{{eventType}} (by *{{account.user}}*)")
val bodyTemplate = Right( val bodyTemplate =
mustache"""{{#content}}_'{{subject}}'_ finished {{/content}}""" data.resultMsg match {
) case None =>
Right(mustache"""{{#content}}_'{{subject}}'_ finished {{/content}}""")
case Some(msg) =>
val tpl = s"""{{#content}}$msg{{/content}}"""
yamusca.imports.mustache.parse(tpl).left.map(_._2)
}
} }
object JobDoneCtx { object JobDoneCtx {
@ -46,7 +51,8 @@ object JobDoneCtx {
state: JobState, state: JobState,
subject: String, subject: String,
submitter: Ident, submitter: Ident,
result: Json resultData: Json,
resultMsg: Option[String]
) )
object Data { object Data {
implicit val jsonEncoder: Encoder[Data] = implicit val jsonEncoder: Encoder[Data] =
@ -61,7 +67,8 @@ object JobDoneCtx {
ev.state, ev.state,
ev.subject, ev.subject,
ev.submitter, ev.submitter,
ev.result ev.resultData,
ev.resultMsg
) )
} }
} }

View File

@ -46,9 +46,10 @@ class TagsChangedCtxTest extends FunSuite {
TagsChangedCtx.Data(account, List(item), List(tag), Nil, url.some.map(_.asString)) TagsChangedCtx.Data(account, List(item), List(tag), Nil, url.some.map(_.asString))
) )
assertEquals(ctx.defaultTitle.toOption.get, "TagsChanged (by *user2*)") val dm = ctx.defaultMessage.toOption.get
assertEquals(dm.title, "TagsChanged (by *user2*)")
assertEquals( assertEquals(
ctx.defaultBody.toOption.get, dm.body,
"Adding *tag-red* on [`Report 2`](http://test/item-1)." "Adding *tag-red* on [`Report 2`](http://test/item-1)."
) )
} }
@ -65,9 +66,10 @@ class TagsChangedCtxTest extends FunSuite {
) )
) )
assertEquals(ctx.defaultTitle.toOption.get, "TagsChanged (by *user2*)") val dm = ctx.defaultMessage.toOption.get
assertEquals(dm.title, "TagsChanged (by *user2*)")
assertEquals( assertEquals(
ctx.defaultBody.toOption.get, dm.body,
"Adding *tag-red*; Removing *tag-blue* on [`Report 2`](http://test/item-1)." "Adding *tag-red*; Removing *tag-blue* on [`Report 2`](http://test/item-1)."
) )
} }

View File

@ -2487,6 +2487,59 @@ paths:
schema: schema:
$ref: "#/components/schemas/BasicResult" $ref: "#/components/schemas/BasicResult"
/admin/files/cloneFileRepository:
post:
operationId: "admin-files-cloneFileRepository"
tags: [Admin]
summary: Copy all files into a new repository
description: |
Submits a task that will copy all files of the application
(from the default file repository) into another file
repository as specified in the request. The request may define
ids of file repository configurations that must be present in
the config file. An empty list means to copy to all enabled
file repositories from te default file repository.
security:
- adminHeader: []
requestBody:
content:
application/json:
schema:
$ref: "#/components/schemas/FileRepositoryCloneRequest"
responses:
422:
description: BadRequest
200:
description: Ok
content:
application/json:
schema:
$ref: "#/components/schemas/BasicResult"
/admin/files/integrityCheck:
post:
operationId: "admin-files-integrityCheck"
tags: [ Admin ]
summary: Verifies the stored checksum
description: |
Submits a task that goes through the files and compares the
stored checksum (at the time of inserting) against a newly
calculated one.
security:
- adminHeader: []
requestBody:
content:
application/json:
schema:
$ref: "#/components/schemas/FileIntegrityCheckRequest"
responses:
200:
description: Ok
content:
application/json:
schema:
$ref: "#/components/schemas/BasicResult"
/sec/source: /sec/source:
get: get:
operationId: "sec-source-get-all" operationId: "sec-source-get-all"
@ -5433,6 +5486,26 @@ paths:
components: components:
schemas: schemas:
FileIntegrityCheckRequest:
description: |
Data for running a file integrity check
properties:
collective:
type: string
format: ident
FileRepositoryCloneRequest:
description: |
Clone the file repository to a new location.
required:
- targetRepositories
properties:
targetRepositories:
type: array
items:
type: string
format: ident
BookmarkedQuery: BookmarkedQuery:
description: | description: |
A query bookmark. A query bookmark.

View File

@ -358,6 +358,41 @@ docspell.server {
# restrict file types that should be handed over to processing. # restrict file types that should be handed over to processing.
# By default all files are allowed. # By default all files are allowed.
valid-mime-types = [ ] valid-mime-types = [ ]
# The id of an enabled store from the `stores` array that should
# be used.
#
# IMPORTANT NOTE: All nodes must have the exact same file store
# configuration!
default-store = "database"
# A list of possible file stores. Each entry must have a unique
# id. The `type` is one of: default-database, filesystem, s3.
#
# The enabled property serves currently to define target stores
# for te "copy files" task. All stores with enabled=false are
# removed from the list. The `default-store` must be enabled.
stores = {
database =
{ enabled = true
type = "default-database"
}
filesystem =
{ enabled = false
type = "file-system"
directory = "/some/directory"
}
minio =
{ enabled = false
type = "s3"
endpoint = "http://localhost:9000"
access-key = "username"
secret-key = "password"
bucket = "docspell"
}
}
} }
} }
} }

View File

@ -24,12 +24,18 @@ import scodec.bits.ByteVector
object ConfigFile { object ConfigFile {
private[this] val unsafeLogger = docspell.logging.unsafeLogger private[this] val unsafeLogger = docspell.logging.unsafeLogger
// IntelliJ is wrong, this is required
import Implicits._ import Implicits._
def loadConfig[F[_]: Async](args: List[String]): F[Config] = { def loadConfig[F[_]: Async](args: List[String]): F[Config] = {
val logger = docspell.logging.getLogger[F] val logger = docspell.logging.getLogger[F]
val validate = val validate =
Validation.of(generateSecretIfEmpty, duplicateOpenIdProvider, signKeyVsUserUrl) Validation.of(
generateSecretIfEmpty,
duplicateOpenIdProvider,
signKeyVsUserUrl,
filesValidate
)
ConfigFactory ConfigFactory
.default[F, Config](logger, "docspell.server")(args, validate) .default[F, Config](logger, "docspell.server")(args, validate)
} }
@ -97,4 +103,7 @@ object ConfigFile {
.map(checkProvider) .map(checkProvider)
) )
} }
def filesValidate: Validation[Config] =
Validation(cfg => cfg.backend.files.validate.map(_ => cfg))
} }

View File

@ -28,7 +28,8 @@ object Main extends IOApp {
Option(System.getProperty("config.file")), Option(System.getProperty("config.file")),
cfg.appId, cfg.appId,
cfg.baseUrl, cfg.baseUrl,
Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled) Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled),
cfg.backend.files.defaultStoreConfig
) )
_ <- logger.info(s"\n${banner.render("***>")}") _ <- logger.info(s"\n${banner.render("***>")}")
_ <- _ <-

View File

@ -10,6 +10,9 @@ import fs2.Stream
import docspell.backend.BackendApp import docspell.backend.BackendApp
import org.http4s.HttpRoutes
import org.http4s.server.websocket.WebSocketBuilder2
trait RestApp[F[_]] { trait RestApp[F[_]] {
/** Access to the configuration used to build backend services. */ /** Access to the configuration used to build backend services. */
@ -25,4 +28,7 @@ trait RestApp[F[_]] {
* via websocket. * via websocket.
*/ */
def subscriptions: Stream[F, Nothing] def subscriptions: Stream[F, Nothing]
/** Http4s endpoint definitions. */
def routes(wsb: WebSocketBuilder2[F]): HttpRoutes[F]
} }

View File

@ -11,20 +11,31 @@ import fs2.Stream
import fs2.concurrent.Topic import fs2.concurrent.Topic
import docspell.backend.BackendApp import docspell.backend.BackendApp
import docspell.backend.auth.{AuthToken, ShareToken}
import docspell.ftsclient.FtsClient import docspell.ftsclient.FtsClient
import docspell.ftssolr.SolrFtsClient import docspell.ftssolr.SolrFtsClient
import docspell.notification.api.NotificationModule import docspell.notification.api.NotificationModule
import docspell.notification.impl.NotificationModuleImpl import docspell.notification.impl.NotificationModuleImpl
import docspell.oidc.CodeFlowRoutes
import docspell.pubsub.api.{PubSub, PubSubT} import docspell.pubsub.api.{PubSub, PubSubT}
import docspell.restserver.ws.OutputEvent import docspell.restserver.auth.OpenId
import docspell.restserver.http4s.EnvMiddleware
import docspell.restserver.routes._
import docspell.restserver.webapp.{TemplateRoutes, Templates, WebjarRoutes}
import docspell.restserver.ws.{OutputEvent, WebSocketRoutes}
import docspell.scheduler.impl.JobStoreModuleBuilder
import docspell.store.Store import docspell.store.Store
import emil.javamail.JavaMailEmil import emil.javamail.JavaMailEmil
import org.http4s.HttpRoutes
import org.http4s.client.Client import org.http4s.client.Client
import org.http4s.server.Router
import org.http4s.server.websocket.WebSocketBuilder2
final class RestAppImpl[F[_]: Async]( final class RestAppImpl[F[_]: Async](
val config: Config, val config: Config,
val backend: BackendApp[F], val backend: BackendApp[F],
httpClient: Client[F],
notificationMod: NotificationModule[F], notificationMod: NotificationModule[F],
wsTopic: Topic[F, OutputEvent], wsTopic: Topic[F, OutputEvent],
pubSub: PubSubT[F] pubSub: PubSubT[F]
@ -35,6 +46,108 @@ final class RestAppImpl[F[_]: Async](
def subscriptions: Stream[F, Nothing] = def subscriptions: Stream[F, Nothing] =
Subscriptions[F](wsTopic, pubSub) Subscriptions[F](wsTopic, pubSub)
def routes(wsb: WebSocketBuilder2[F]): HttpRoutes[F] =
createHttpApp(wsb)
val templates = TemplateRoutes[F](config, Templates[F])
def createHttpApp(
wsB: WebSocketBuilder2[F]
) =
Router(
"/api/info" -> InfoRoutes(),
"/api/v1/open/" -> openRoutes(httpClient),
"/api/v1/sec/" -> Authenticate(backend.login, config.auth) { token =>
securedRoutes(wsB, token)
},
"/api/v1/admin" -> AdminAuth(config.adminEndpoint) {
adminRoutes
},
"/api/v1/share" -> ShareAuth(backend.share, config.auth) { token =>
shareRoutes(token)
},
"/api/doc" -> templates.doc,
"/app/assets" -> EnvMiddleware(WebjarRoutes.appRoutes[F]),
"/app" -> EnvMiddleware(templates.app),
"/sw.js" -> EnvMiddleware(templates.serviceWorker)
)
def adminRoutes: HttpRoutes[F] =
Router(
"fts" -> FullTextIndexRoutes.admin(config, backend),
"user/otp" -> TotpRoutes.admin(backend),
"user" -> UserRoutes.admin(backend),
"info" -> InfoRoutes.admin(config),
"attachments" -> AttachmentRoutes.admin(backend),
"files" -> FileRepositoryRoutes.admin(backend)
)
def shareRoutes(
token: ShareToken
): HttpRoutes[F] =
Router(
"search" -> ShareSearchRoutes(backend, config, token),
"attachment" -> ShareAttachmentRoutes(backend, token),
"item" -> ShareItemRoutes(backend, token),
"clientSettings" -> ClientSettingsRoutes.share(backend, token)
)
def openRoutes(
client: Client[F]
): HttpRoutes[F] =
Router(
"auth/openid" -> CodeFlowRoutes(
config.openIdEnabled,
OpenId.handle[F](backend, config),
OpenId.codeFlowConfig(config),
client
),
"auth" -> LoginRoutes.login(backend.login, config),
"signup" -> RegisterRoutes(backend, config),
"upload" -> UploadRoutes.open(backend, config),
"checkfile" -> CheckFileRoutes.open(backend),
"integration" -> IntegrationEndpointRoutes.open(backend, config),
"share" -> ShareRoutes.verify(backend, config)
)
def securedRoutes(
wsB: WebSocketBuilder2[F],
token: AuthToken
): HttpRoutes[F] =
Router(
"ws" -> WebSocketRoutes(token, backend, wsTopic, wsB),
"auth" -> LoginRoutes.session(backend.login, config, token),
"tag" -> TagRoutes(backend, token),
"equipment" -> EquipmentRoutes(backend, token),
"organization" -> OrganizationRoutes(backend, token),
"person" -> PersonRoutes(backend, token),
"source" -> SourceRoutes(backend, token),
"user/otp" -> TotpRoutes(backend, config, token),
"user" -> UserRoutes(backend, token),
"collective" -> CollectiveRoutes(backend, token),
"queue" -> JobQueueRoutes(backend, token),
"item" -> ItemRoutes(config, backend, token),
"items" -> ItemMultiRoutes(config, backend, token),
"attachment" -> AttachmentRoutes(backend, token),
"attachments" -> AttachmentMultiRoutes(backend, token),
"upload" -> UploadRoutes.secured(backend, config, token),
"checkfile" -> CheckFileRoutes.secured(backend, token),
"email/send" -> MailSendRoutes(backend, token),
"email/settings" -> MailSettingsRoutes(backend, token),
"email/sent" -> SentMailRoutes(backend, token),
"share" -> ShareRoutes.manage(backend, token),
"usertask/notifydueitems" -> NotifyDueItemsRoutes(config, backend, token),
"usertask/scanmailbox" -> ScanMailboxRoutes(backend, token),
"usertask/periodicquery" -> PeriodicQueryRoutes(config, backend, token),
"calevent/check" -> CalEventCheckRoutes(),
"fts" -> FullTextIndexRoutes.secured(config, backend, token),
"folder" -> FolderRoutes(backend, token),
"customfield" -> CustomFieldRoutes(backend, token),
"clientSettings" -> ClientSettingsRoutes(backend, token),
"notification" -> NotificationRoutes(config, backend, token),
"querybookmark" -> BookmarkRoutes(backend, token)
)
} }
object RestAppImpl { object RestAppImpl {
@ -55,10 +168,21 @@ object RestAppImpl {
notificationMod <- Resource.eval( notificationMod <- Resource.eval(
NotificationModuleImpl[F](store, javaEmil, httpClient, 200) NotificationModuleImpl[F](store, javaEmil, httpClient, 200)
) )
schedulerMod = JobStoreModuleBuilder(store)
.withPubsub(pubSubT)
.withEventSink(notificationMod)
.build
backend <- BackendApp backend <- BackendApp
.create[F](store, javaEmil, ftsClient, pubSubT, notificationMod) .create[F](store, javaEmil, ftsClient, pubSubT, schedulerMod, notificationMod)
app = new RestAppImpl[F](cfg, backend, notificationMod, wsTopic, pubSubT) app = new RestAppImpl[F](
cfg,
backend,
httpClient,
notificationMod,
wsTopic,
pubSubT
)
} yield app } yield app
} }

View File

@ -13,24 +13,18 @@ import cats.implicits._
import fs2.Stream import fs2.Stream
import fs2.concurrent.Topic import fs2.concurrent.Topic
import docspell.backend.auth.{AuthToken, ShareToken}
import docspell.backend.msg.Topics import docspell.backend.msg.Topics
import docspell.common._ import docspell.common._
import docspell.oidc.CodeFlowRoutes
import docspell.pubsub.naive.NaivePubSub import docspell.pubsub.naive.NaivePubSub
import docspell.restserver.auth.OpenId import docspell.restserver.http4s.InternalHeader
import docspell.restserver.http4s.{EnvMiddleware, InternalHeader} import docspell.restserver.ws.OutputEvent
import docspell.restserver.routes._
import docspell.restserver.webapp._
import docspell.restserver.ws.OutputEvent.KeepAlive import docspell.restserver.ws.OutputEvent.KeepAlive
import docspell.restserver.ws.{OutputEvent, WebSocketRoutes}
import docspell.store.Store import docspell.store.Store
import docspell.store.records.RInternalSetting import docspell.store.records.RInternalSetting
import org.http4s._ import org.http4s._
import org.http4s.blaze.client.BlazeClientBuilder import org.http4s.blaze.client.BlazeClientBuilder
import org.http4s.blaze.server.BlazeServerBuilder import org.http4s.blaze.server.BlazeServerBuilder
import org.http4s.client.Client
import org.http4s.dsl.Http4sDsl import org.http4s.dsl.Http4sDsl
import org.http4s.headers.Location import org.http4s.headers.Location
import org.http4s.implicits._ import org.http4s.implicits._
@ -51,7 +45,7 @@ object RestServer {
server = server =
Stream Stream
.resource(createApp(cfg, pools, wsTopic)) .resource(createApp(cfg, pools, wsTopic))
.flatMap { case (restApp, pubSub, httpClient, setting) => .flatMap { case (restApp, pubSub, setting) =>
Stream( Stream(
restApp.subscriptions, restApp.subscriptions,
restApp.eventConsume(2), restApp.eventConsume(2),
@ -59,7 +53,7 @@ object RestServer {
.bindHttp(cfg.bind.port, cfg.bind.address) .bindHttp(cfg.bind.port, cfg.bind.address)
.withoutBanner .withoutBanner
.withHttpWebSocketApp( .withHttpWebSocketApp(
createHttpApp(cfg, setting, httpClient, pubSub, restApp, wsTopic) createHttpApp(setting, pubSub, restApp)
) )
.serve .serve
.drain .drain
@ -76,13 +70,13 @@ object RestServer {
wsTopic: Topic[F, OutputEvent] wsTopic: Topic[F, OutputEvent]
): Resource[ ): Resource[
F, F,
(RestApp[F], NaivePubSub[F], Client[F], RInternalSetting) (RestApp[F], NaivePubSub[F], RInternalSetting)
] = ] =
for { for {
httpClient <- BlazeClientBuilder[F].resource httpClient <- BlazeClientBuilder[F].resource
store <- Store.create[F]( store <- Store.create[F](
cfg.backend.jdbc, cfg.backend.jdbc,
cfg.backend.files.chunkSize, cfg.backend.files.defaultFileRepositoryConfig,
pools.connectEC pools.connectEC
) )
setting <- Resource.eval(store.transact(RInternalSetting.create)) setting <- Resource.eval(store.transact(RInternalSetting.create))
@ -92,41 +86,22 @@ object RestServer {
httpClient httpClient
)(Topics.all.map(_.topic)) )(Topics.all.map(_.topic))
restApp <- RestAppImpl.create[F](cfg, store, httpClient, pubSub, wsTopic) restApp <- RestAppImpl.create[F](cfg, store, httpClient, pubSub, wsTopic)
} yield (restApp, pubSub, httpClient, setting) } yield (restApp, pubSub, setting)
def createHttpApp[F[_]: Async]( def createHttpApp[F[_]: Async](
cfg: Config,
internSettings: RInternalSetting, internSettings: RInternalSetting,
httpClient: Client[F],
pubSub: NaivePubSub[F], pubSub: NaivePubSub[F],
restApp: RestApp[F], restApp: RestApp[F]
topic: Topic[F, OutputEvent]
)( )(
wsB: WebSocketBuilder2[F] wsB: WebSocketBuilder2[F]
) = { ) = {
val templates = TemplateRoutes[F](cfg, Templates[F]) val internal = Router(
val httpApp = Router( "/" -> redirectTo("/app"),
"/internal" -> InternalHeader(internSettings.internalRouteKey) { "/internal" -> InternalHeader(internSettings.internalRouteKey) {
internalRoutes(pubSub) internalRoutes(pubSub)
}, }
"/api/info" -> routes.InfoRoutes(), )
"/api/v1/open/" -> openRoutes(cfg, httpClient, restApp), val httpApp = (internal <+> restApp.routes(wsB)).orNotFound
"/api/v1/sec/" -> Authenticate(restApp.backend.login, cfg.auth) { token =>
securedRoutes(cfg, restApp, wsB, topic, token)
},
"/api/v1/admin" -> AdminAuth(cfg.adminEndpoint) {
adminRoutes(cfg, restApp)
},
"/api/v1/share" -> ShareAuth(restApp.backend.share, cfg.auth) { token =>
shareRoutes(cfg, restApp, token)
},
"/api/doc" -> templates.doc,
"/app/assets" -> EnvMiddleware(WebjarRoutes.appRoutes[F]),
"/app" -> EnvMiddleware(templates.app),
"/sw.js" -> EnvMiddleware(templates.serviceWorker),
"/" -> redirectTo("/app")
).orNotFound
Logger.httpApp(logHeaders = false, logBody = false)(httpApp) Logger.httpApp(logHeaders = false, logBody = false)(httpApp)
} }
@ -135,88 +110,6 @@ object RestServer {
"pubsub" -> pubSub.receiveRoute "pubsub" -> pubSub.receiveRoute
) )
def securedRoutes[F[_]: Async](
cfg: Config,
restApp: RestApp[F],
wsB: WebSocketBuilder2[F],
topic: Topic[F, OutputEvent],
token: AuthToken
): HttpRoutes[F] =
Router(
"ws" -> WebSocketRoutes(token, restApp.backend, topic, wsB),
"auth" -> LoginRoutes.session(restApp.backend.login, cfg, token),
"tag" -> TagRoutes(restApp.backend, token),
"equipment" -> EquipmentRoutes(restApp.backend, token),
"organization" -> OrganizationRoutes(restApp.backend, token),
"person" -> PersonRoutes(restApp.backend, token),
"source" -> SourceRoutes(restApp.backend, token),
"user/otp" -> TotpRoutes(restApp.backend, cfg, token),
"user" -> UserRoutes(restApp.backend, token),
"collective" -> CollectiveRoutes(restApp.backend, token),
"queue" -> JobQueueRoutes(restApp.backend, token),
"item" -> ItemRoutes(cfg, restApp.backend, token),
"items" -> ItemMultiRoutes(cfg, restApp.backend, token),
"attachment" -> AttachmentRoutes(restApp.backend, token),
"attachments" -> AttachmentMultiRoutes(restApp.backend, token),
"upload" -> UploadRoutes.secured(restApp.backend, cfg, token),
"checkfile" -> CheckFileRoutes.secured(restApp.backend, token),
"email/send" -> MailSendRoutes(restApp.backend, token),
"email/settings" -> MailSettingsRoutes(restApp.backend, token),
"email/sent" -> SentMailRoutes(restApp.backend, token),
"share" -> ShareRoutes.manage(restApp.backend, token),
"usertask/notifydueitems" -> NotifyDueItemsRoutes(cfg, restApp.backend, token),
"usertask/scanmailbox" -> ScanMailboxRoutes(restApp.backend, token),
"usertask/periodicquery" -> PeriodicQueryRoutes(cfg, restApp.backend, token),
"calevent/check" -> CalEventCheckRoutes(),
"fts" -> FullTextIndexRoutes.secured(cfg, restApp.backend, token),
"folder" -> FolderRoutes(restApp.backend, token),
"customfield" -> CustomFieldRoutes(restApp.backend, token),
"clientSettings" -> ClientSettingsRoutes(restApp.backend, token),
"notification" -> NotificationRoutes(cfg, restApp.backend, token),
"querybookmark" -> BookmarkRoutes(restApp.backend, token)
)
def openRoutes[F[_]: Async](
cfg: Config,
client: Client[F],
restApp: RestApp[F]
): HttpRoutes[F] =
Router(
"auth/openid" -> CodeFlowRoutes(
cfg.openIdEnabled,
OpenId.handle[F](restApp.backend, cfg),
OpenId.codeFlowConfig(cfg),
client
),
"auth" -> LoginRoutes.login(restApp.backend.login, cfg),
"signup" -> RegisterRoutes(restApp.backend, cfg),
"upload" -> UploadRoutes.open(restApp.backend, cfg),
"checkfile" -> CheckFileRoutes.open(restApp.backend),
"integration" -> IntegrationEndpointRoutes.open(restApp.backend, cfg),
"share" -> ShareRoutes.verify(restApp.backend, cfg)
)
def adminRoutes[F[_]: Async](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
Router(
"fts" -> FullTextIndexRoutes.admin(cfg, restApp.backend),
"user/otp" -> TotpRoutes.admin(restApp.backend),
"user" -> UserRoutes.admin(restApp.backend),
"info" -> InfoRoutes.admin(cfg),
"attachments" -> AttachmentRoutes.admin(restApp.backend)
)
def shareRoutes[F[_]: Async](
cfg: Config,
restApp: RestApp[F],
token: ShareToken
): HttpRoutes[F] =
Router(
"search" -> ShareSearchRoutes(restApp.backend, cfg, token),
"attachment" -> ShareAttachmentRoutes(restApp.backend, token),
"item" -> ShareItemRoutes(restApp.backend, token),
"clientSettings" -> ClientSettingsRoutes.share(restApp.backend, token)
)
def redirectTo[F[_]: Async](path: String): HttpRoutes[F] = { def redirectTo[F[_]: Async](path: String): HttpRoutes[F] = {
val dsl = new Http4sDsl[F] {} val dsl = new Http4sDsl[F] {}
import dsl._ import dsl._

View File

@ -10,9 +10,9 @@ import cats.effect.Async
import fs2.Stream import fs2.Stream
import fs2.concurrent.Topic import fs2.concurrent.Topic
import docspell.backend.msg.{JobDone, JobSubmitted}
import docspell.pubsub.api.PubSubT import docspell.pubsub.api.PubSubT
import docspell.restserver.ws.OutputEvent import docspell.restserver.ws.OutputEvent
import docspell.scheduler.msg.{JobDone, JobSubmitted}
/** Subscribes to those events from docspell that are forwarded to the websocket endpoints /** Subscribes to those events from docspell that are forwarded to the websocket endpoints
*/ */

View File

@ -0,0 +1,69 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.restserver.routes
import cats.data.NonEmptyList
import cats.effect._
import cats.implicits._
import docspell.backend.BackendApp
import docspell.common.FileCopyTaskArgs.Selection
import docspell.common.{FileCopyTaskArgs, FileIntegrityCheckArgs, FileKeyPart}
import docspell.restapi.model._
import org.http4s._
import org.http4s.circe.CirceEntityDecoder._
import org.http4s.circe.CirceEntityEncoder._
import org.http4s.dsl.Http4sDsl
object FileRepositoryRoutes {
def admin[F[_]: Async](backend: BackendApp[F]): HttpRoutes[F] = {
val dsl = Http4sDsl[F]
import dsl._
val logger = docspell.logging.getLogger[F]
HttpRoutes.of {
case req @ POST -> Root / "cloneFileRepository" =>
for {
input <- req.as[FileRepositoryCloneRequest]
args = makeTaskArgs(input)
job <- backend.fileRepository.cloneFileRepository(args, true)
result = BasicResult(
job.isDefined,
job.fold(s"Job for '${FileCopyTaskArgs.taskName.id}' already running")(j =>
s"Job for '${FileCopyTaskArgs.taskName.id}' submitted: ${j.id.id}"
)
)
_ <- logger.info(result.message)
resp <- Ok(result)
} yield resp
case req @ POST -> Root / "integrityCheck" =>
for {
input <- req.as[FileKeyPart]
job <- backend.fileRepository.checkIntegrityAll(input, true)
result = BasicResult(
job.isDefined,
job.fold(s"Job for '${FileCopyTaskArgs.taskName.id}' already running")(j =>
s"Job for '${FileIntegrityCheckArgs.taskName.id}' submitted: ${j.id.id}"
)
)
_ <- logger.info(result.message)
resp <- Ok(result)
} yield resp
}
}
def makeTaskArgs(input: FileRepositoryCloneRequest): FileCopyTaskArgs =
NonEmptyList.fromList(input.targetRepositories) match {
case Some(nel) =>
FileCopyTaskArgs(None, Selection.Stores(nel))
case None =>
FileCopyTaskArgs(None, Selection.All)
}
}

Some files were not shown because too many files have changed in this diff Show More