diff --git a/build.sbt b/build.sbt index aa74d056..7f395a82 100644 --- a/build.sbt +++ b/build.sbt @@ -519,6 +519,29 @@ val pubsubNaive = project ) .dependsOn(common, pubsubApi, store % "compile->compile;test->test") +val schedulerApi = project + .in(file("modules/scheduler/api")) + .disablePlugins(RevolverPlugin) + .settings(sharedSettings) + .withTestSettingsDependsOn(loggingScribe) + .settings( + name := "docspell-scheduler-api", + libraryDependencies ++= + Dependencies.fs2Core ++ + Dependencies.circeCore + ) + .dependsOn(loggingApi, common, pubsubApi) + +val schedulerImpl = project + .in(file("modules/scheduler/impl")) + .disablePlugins(RevolverPlugin) + .settings(sharedSettings) + .withTestSettingsDependsOn(loggingScribe) + .settings( + name := "docspell-scheduler-impl" + ) + .dependsOn(store, schedulerApi, notificationApi, pubsubApi) + val extract = project .in(file("modules/extract")) .disablePlugins(RevolverPlugin) @@ -641,7 +664,16 @@ val backend = project Dependencies.http4sClient ++ Dependencies.emil ) - .dependsOn(store, notificationApi, joexapi, ftsclient, totp, pubsubApi, loggingApi) + .dependsOn( + store, + notificationApi, + joexapi, + ftsclient, + totp, + pubsubApi, + loggingApi, + schedulerApi + ) val oidc = project .in(file("modules/oidc")) @@ -732,7 +764,8 @@ val joex = project restapi, ftssolr, pubsubNaive, - notificationImpl + notificationImpl, + schedulerImpl ) val restserver = project @@ -804,7 +837,8 @@ val restserver = project ftssolr, oidc, pubsubNaive, - notificationImpl + notificationImpl, + schedulerImpl ) // --- Website Documentation @@ -902,7 +936,9 @@ val root = project pubsubApi, pubsubNaive, notificationApi, - notificationImpl + notificationImpl, + schedulerApi, + schedulerImpl ) // --- Helpers diff --git a/modules/backend/src/main/scala/docspell/backend/BackendApp.scala b/modules/backend/src/main/scala/docspell/backend/BackendApp.scala index fd217fb5..5d5dc532 100644 --- a/modules/backend/src/main/scala/docspell/backend/BackendApp.scala +++ b/modules/backend/src/main/scala/docspell/backend/BackendApp.scala @@ -10,14 +10,13 @@ import cats.effect._ import docspell.backend.auth.Login import docspell.backend.fulltext.CreateIndex -import docspell.backend.msg.JobQueuePublish import docspell.backend.ops._ import docspell.backend.signup.OSignup import docspell.ftsclient.FtsClient import docspell.notification.api.{EventExchange, NotificationModule} import docspell.pubsub.api.PubSubT +import docspell.scheduler.JobStoreModule import docspell.store.Store -import docspell.store.usertask.UserTaskStore import docspell.totp.Totp import emil.Emil @@ -50,6 +49,7 @@ trait BackendApp[F[_]] { def events: EventExchange[F] def notification: ONotification[F] def bookmarks: OQueryBookmarks[F] + def fileRepository: OFileRepository[F] } object BackendApp { @@ -59,29 +59,43 @@ object BackendApp { javaEmil: Emil[F], ftsClient: FtsClient[F], pubSubT: PubSubT[F], + schedulerModule: JobStoreModule[F], notificationMod: NotificationModule[F] ): Resource[F, BackendApp[F]] = for { - utStore <- UserTaskStore(store) - queue <- JobQueuePublish(store, pubSubT, notificationMod) totpImpl <- OTotp(store, Totp.default) loginImpl <- Login[F](store, Totp.default) signupImpl <- OSignup[F](store) joexImpl <- OJoex(pubSubT) - collImpl <- OCollective[F](store, utStore, queue, joexImpl) + collImpl <- OCollective[F]( + store, + schedulerModule.userTasks, + schedulerModule.jobs, + joexImpl + ) sourceImpl <- OSource[F](store) tagImpl <- OTag[F](store) equipImpl <- OEquipment[F](store) orgImpl <- OOrganization(store) - uploadImpl <- OUpload(store, queue, joexImpl) + uploadImpl <- OUpload(store, schedulerModule.jobs, joexImpl) nodeImpl <- ONode(store) jobImpl <- OJob(store, joexImpl, pubSubT) createIndex <- CreateIndex.resource(ftsClient, store) - itemImpl <- OItem(store, ftsClient, createIndex, queue, joexImpl) + itemImpl <- OItem(store, ftsClient, createIndex, schedulerModule.jobs, joexImpl) itemSearchImpl <- OItemSearch(store) - fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl) + fulltextImpl <- OFulltext( + itemSearchImpl, + ftsClient, + store, + schedulerModule.jobs, + joexImpl + ) mailImpl <- OMail(store, javaEmil) - userTaskImpl <- OUserTask(utStore, store, queue, joexImpl) + userTaskImpl <- OUserTask( + schedulerModule.userTasks, + store, + joexImpl + ) folderImpl <- OFolder(store) customFieldsImpl <- OCustomFields(store) simpleSearchImpl = OSimpleSearch(fulltextImpl, itemSearchImpl) @@ -91,6 +105,7 @@ object BackendApp { ) notifyImpl <- ONotification(store, notificationMod) bookmarksImpl <- OQueryBookmarks(store) + fileRepoImpl <- OFileRepository(store, schedulerModule.jobs, joexImpl) } yield new BackendApp[F] { val pubSub = pubSubT val login = loginImpl @@ -118,5 +133,6 @@ object BackendApp { val events = notificationMod val notification = notifyImpl val bookmarks = bookmarksImpl + val fileRepository = fileRepoImpl } } diff --git a/modules/backend/src/main/scala/docspell/backend/Config.scala b/modules/backend/src/main/scala/docspell/backend/Config.scala index 8ddce838..efccb8dc 100644 --- a/modules/backend/src/main/scala/docspell/backend/Config.scala +++ b/modules/backend/src/main/scala/docspell/backend/Config.scala @@ -6,9 +6,13 @@ package docspell.backend +import cats.data.{Validated, ValidatedNec} +import cats.implicits._ + import docspell.backend.signup.{Config => SignupConfig} import docspell.common._ import docspell.store.JdbcConfig +import docspell.store.file.FileRepositoryConfig import emil.javamail.Settings @@ -21,10 +25,45 @@ case class Config( def mailSettings: Settings = Settings.defaultSettings.copy(debug = mailDebug) + } object Config { - case class Files(chunkSize: Int, validMimeTypes: Seq[MimeType]) + case class Files( + chunkSize: Int, + validMimeTypes: Seq[MimeType], + defaultStore: Ident, + stores: Map[Ident, FileStoreConfig] + ) { + val enabledStores: Map[Ident, FileStoreConfig] = + stores.view.filter(_._2.enabled).toMap + def defaultStoreConfig: FileStoreConfig = + enabledStores(defaultStore) + + def defaultFileRepositoryConfig: FileRepositoryConfig = + FileRepositoryConfig.fromFileStoreConfig(chunkSize, defaultStoreConfig) + + def getFileRepositoryConfig(id: Ident): Option[FileRepositoryConfig] = + stores.get(id).map(FileRepositoryConfig.fromFileStoreConfig(chunkSize, _)) + + def validate: ValidatedNec[String, Files] = { + val storesEmpty = + if (enabledStores.isEmpty) + Validated.invalidNec( + "No file stores defined! Make sure at least one enabled store is present." + ) + else Validated.validNec(()) + + val defaultStorePresent = + enabledStores.get(defaultStore) match { + case Some(_) => Validated.validNec(()) + case None => + Validated.invalidNec(s"Default file store not present: ${defaultStore.id}") + } + + (storesEmpty |+| defaultStorePresent).map(_ => this) + } + } } diff --git a/modules/backend/src/main/scala/docspell/backend/JobFactory.scala b/modules/backend/src/main/scala/docspell/backend/JobFactory.scala index 05320b31..9a79f104 100644 --- a/modules/backend/src/main/scala/docspell/backend/JobFactory.scala +++ b/modules/backend/src/main/scala/docspell/backend/JobFactory.scala @@ -12,80 +12,88 @@ import cats.implicits._ import docspell.backend.MailAddressCodec import docspell.common._ import docspell.notification.api.PeriodicQueryArgs -import docspell.store.records.RJob +import docspell.scheduler.Job object JobFactory extends MailAddressCodec { - def periodicQuery[F[_]: Sync](args: PeriodicQueryArgs, submitter: AccountId): F[RJob] = - for { - id <- Ident.randomId[F] - now <- Timestamp.current[F] - job = RJob.newJob( - id, - PeriodicQueryArgs.taskName, - submitter.collective, - args, - s"Running periodic query, notify via ${args.channels.map(_.channelType)}", - now, - submitter.user, - Priority.Low, - None - ) - } yield job + def integrityCheck[F[_]: Sync]( + args: FileIntegrityCheckArgs, + submitter: AccountId = DocspellSystem.account + ): F[Job[FileIntegrityCheckArgs]] = + Job.createNew( + FileIntegrityCheckArgs.taskName, + submitter.collective, + args, + s"Check integrity of files", + submitter.user, + Priority.High, + Some(FileIntegrityCheckArgs.taskName) + ) + + def fileCopy[F[_]: Sync]( + args: FileCopyTaskArgs, + submitter: AccountId = DocspellSystem.account + ): F[Job[FileCopyTaskArgs]] = + Job.createNew( + FileCopyTaskArgs.taskName, + submitter.collective, + args, + "Copying all files", + submitter.user, + Priority.High, + Some(FileCopyTaskArgs.taskName) + ) + + def periodicQuery[F[_]: Sync]( + args: PeriodicQueryArgs, + submitter: AccountId + ): F[Job[PeriodicQueryArgs]] = + Job.createNew( + PeriodicQueryArgs.taskName, + submitter.collective, + args, + s"Running periodic query, notify via ${args.channels.map(_.channelType)}", + submitter.user, + Priority.Low, + None + ) def makePageCount[F[_]: Sync]( args: MakePageCountArgs, account: Option[AccountId] - ): F[RJob] = - for { - id <- Ident.randomId[F] - now <- Timestamp.current[F] - job = RJob.newJob( - id, - MakePageCountArgs.taskName, - account.map(_.collective).getOrElse(DocspellSystem.taskGroup), - args, - s"Find page-count metadata for ${args.attachment.id}", - now, - account.map(_.user).getOrElse(DocspellSystem.user), - Priority.Low, - Some(MakePageCountArgs.taskName / args.attachment) - ) - } yield job + ): F[Job[MakePageCountArgs]] = + Job.createNew( + MakePageCountArgs.taskName, + account.map(_.collective).getOrElse(DocspellSystem.taskGroup), + args, + s"Find page-count metadata for ${args.attachment.id}", + account.map(_.user).getOrElse(DocspellSystem.user), + Priority.Low, + Some(MakePageCountArgs.taskName / args.attachment) + ) def makePreview[F[_]: Sync]( args: MakePreviewArgs, account: Option[AccountId] - ): F[RJob] = - for { - id <- Ident.randomId[F] - now <- Timestamp.current[F] - job = RJob.newJob( - id, - MakePreviewArgs.taskName, - account.map(_.collective).getOrElse(DocspellSystem.taskGroup), - args, - s"Generate preview image", - now, - account.map(_.user).getOrElse(DocspellSystem.user), - Priority.Low, - Some(MakePreviewArgs.taskName / args.attachment) - ) - } yield job + ): F[Job[MakePreviewArgs]] = + Job.createNew( + MakePreviewArgs.taskName, + account.map(_.collective).getOrElse(DocspellSystem.taskGroup), + args, + s"Generate preview image", + account.map(_.user).getOrElse(DocspellSystem.user), + Priority.Low, + Some(MakePreviewArgs.taskName / args.attachment) + ) def allPreviews[F[_]: Sync]( args: AllPreviewsArgs, submitter: Option[Ident] - ): F[RJob] = - for { - id <- Ident.randomId[F] - now <- Timestamp.current[F] - } yield RJob.newJob( - id, + ): F[Job[AllPreviewsArgs]] = + Job.createNew( AllPreviewsArgs.taskName, args.collective.getOrElse(DocspellSystem.taskGroup), args, "Create preview images", - now, submitter.getOrElse(DocspellSystem.user), Priority.Low, Some(DocspellSystem.allPreviewTaskTracker) @@ -95,127 +103,91 @@ object JobFactory extends MailAddressCodec { collective: Option[Ident], submitter: Option[Ident], prio: Priority - ): F[RJob] = - for { - id <- Ident.randomId[F] - now <- Timestamp.current[F] - job = RJob.newJob( - id, - ConvertAllPdfArgs.taskName, - collective.getOrElse(DocspellSystem.taskGroup), - ConvertAllPdfArgs(collective), - s"Convert all pdfs not yet converted", - now, - submitter.getOrElse(DocspellSystem.user), - prio, - collective - .map(c => c / ConvertAllPdfArgs.taskName) - .orElse(ConvertAllPdfArgs.taskName.some) - ) - } yield job + ): F[Job[ConvertAllPdfArgs]] = + Job.createNew( + ConvertAllPdfArgs.taskName, + collective.getOrElse(DocspellSystem.taskGroup), + ConvertAllPdfArgs(collective), + s"Convert all pdfs not yet converted", + submitter.getOrElse(DocspellSystem.user), + prio, + collective + .map(c => c / ConvertAllPdfArgs.taskName) + .orElse(ConvertAllPdfArgs.taskName.some) + ) def reprocessItem[F[_]: Sync]( args: ReProcessItemArgs, account: AccountId, prio: Priority - ): F[RJob] = - for { - id <- Ident.randomId[F] - now <- Timestamp.current[F] - job = RJob.newJob( - id, - ReProcessItemArgs.taskName, - account.collective, - args, - s"Re-process files of item ${args.itemId.id}", - now, - account.user, - prio, - Some(ReProcessItemArgs.taskName / args.itemId) - ) - } yield job + ): F[Job[ReProcessItemArgs]] = + Job.createNew( + ReProcessItemArgs.taskName, + account.collective, + args, + s"Re-process files of item ${args.itemId.id}", + account.user, + prio, + Some(ReProcessItemArgs.taskName / args.itemId) + ) def processItem[F[_]: Sync]( args: ProcessItemArgs, account: AccountId, prio: Priority, tracker: Option[Ident] - ): F[RJob] = - for { - id <- Ident.randomId[F] - now <- Timestamp.current[F] - job = RJob.newJob( - id, - ProcessItemArgs.taskName, - account.collective, - args, - args.makeSubject, - now, - account.user, - prio, - tracker - ) - } yield job + ): F[Job[ProcessItemArgs]] = + Job.createNew( + ProcessItemArgs.taskName, + account.collective, + args, + args.makeSubject, + account.user, + prio, + tracker + ) def processItems[F[_]: Sync]( args: Vector[ProcessItemArgs], account: AccountId, prio: Priority, tracker: Option[Ident] - ): F[Vector[RJob]] = { - def create(now: Timestamp, arg: ProcessItemArgs): F[RJob] = - Ident - .randomId[F] - .map(id => - RJob.newJob( - id, - ProcessItemArgs.taskName, - account.collective, - arg, - arg.makeSubject, - now, - account.user, - prio, - tracker - ) - ) + ): F[Vector[Job[ProcessItemArgs]]] = { + def create(arg: ProcessItemArgs): F[Job[ProcessItemArgs]] = + Job.createNew( + ProcessItemArgs.taskName, + account.collective, + arg, + arg.makeSubject, + account.user, + prio, + tracker + ) - for { - now <- Timestamp.current[F] - jobs <- args.traverse(a => create(now, a)) - } yield jobs + args.traverse(create) } - def reIndexAll[F[_]: Sync]: F[RJob] = - for { - id <- Ident.randomId[F] - now <- Timestamp.current[F] - } yield RJob.newJob( - id, + def reIndexAll[F[_]: Sync]: F[Job[ReIndexTaskArgs]] = + Job.createNew( ReIndexTaskArgs.taskName, DocspellSystem.taskGroup, ReIndexTaskArgs(None), - s"Recreate full-text index", - now, + "Recreate full-text index", DocspellSystem.taskGroup, Priority.Low, Some(DocspellSystem.migrationTaskTracker) ) - def reIndex[F[_]: Sync](account: AccountId): F[RJob] = - for { - id <- Ident.randomId[F] - now <- Timestamp.current[F] - args = ReIndexTaskArgs(Some(account.collective)) - } yield RJob.newJob( - id, + def reIndex[F[_]: Sync](account: AccountId): F[Job[ReIndexTaskArgs]] = { + val args = ReIndexTaskArgs(Some(account.collective)) + Job.createNew( ReIndexTaskArgs.taskName, account.collective, args, - s"Recreate full-text index", - now, + "Recreate full-text index", account.user, Priority.Low, Some(ReIndexTaskArgs.tracker(args)) ) + } } diff --git a/modules/backend/src/main/scala/docspell/backend/msg/Topics.scala b/modules/backend/src/main/scala/docspell/backend/msg/Topics.scala index ae53d9f6..63d8c401 100644 --- a/modules/backend/src/main/scala/docspell/backend/msg/Topics.scala +++ b/modules/backend/src/main/scala/docspell/backend/msg/Topics.scala @@ -8,16 +8,19 @@ package docspell.backend.msg import cats.data.NonEmptyList -import docspell.pubsub.api.{Topic, TypedTopic} +import docspell.pubsub.api.TypedTopic +import docspell.scheduler.msg._ /** All topics used in Docspell. */ object Topics { - /** A generic notification to the job executors to look for new work. */ - val jobsNotify: TypedTopic[Unit] = - TypedTopic[Unit](Topic("jobs-notify")) - /** A list of all topics. It is required to list every topic in use here! */ val all: NonEmptyList[TypedTopic[_]] = - NonEmptyList.of(JobDone.topic, CancelJob.topic, jobsNotify, JobSubmitted.topic) + NonEmptyList.of( + JobDone.topic, + CancelJob.topic, + JobsNotify(), + JobSubmitted.topic, + PeriodicTaskNotify() + ) } diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OCollective.scala b/modules/backend/src/main/scala/docspell/backend/ops/OCollective.scala index 907bfcef..e59ddb9a 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OCollective.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OCollective.scala @@ -14,11 +14,11 @@ import docspell.backend.JobFactory import docspell.backend.PasswordCrypt import docspell.backend.ops.OCollective._ import docspell.common._ +import docspell.scheduler.JobStore +import docspell.scheduler.usertask.{UserTask, UserTaskScope, UserTaskStore} import docspell.store.UpdateResult import docspell.store.queries.{QCollective, QUser} -import docspell.store.queue.JobQueue import docspell.store.records._ -import docspell.store.usertask.{UserTask, UserTaskScope, UserTaskStore} import docspell.store.{AddResult, Store} import com.github.eikek.calev._ @@ -133,7 +133,7 @@ object OCollective { def apply[F[_]: Async]( store: Store[F], uts: UserTaskStore[F], - queue: JobQueue[F], + jobStore: JobStore[F], joex: OJoex[F] ): Resource[F, OCollective[F]] = Resource.pure[F, OCollective[F]](new OCollective[F] { @@ -196,32 +196,32 @@ object OCollective { for { id <- Ident.randomId[F] args = LearnClassifierArgs(collective) - ut <- UserTask( + ut = UserTask( id, LearnClassifierArgs.taskName, true, CalEvent(WeekdayComponent.All, DateEvent.All, TimeEvent.All), None, args - ).encode.toPeriodicTask(UserTaskScope(collective), args.makeSubject.some) - job <- ut.toJob - _ <- queue.insert(job) + ) + _ <- uts + .updateOneTask(UserTaskScope(collective), args.makeSubject.some, ut) _ <- joex.notifyAllNodes } yield () def startEmptyTrash(args: EmptyTrashArgs): F[Unit] = for { id <- Ident.randomId[F] - ut <- UserTask( + ut = UserTask( id, EmptyTrashArgs.taskName, true, CalEvent(WeekdayComponent.All, DateEvent.All, TimeEvent.All), None, args - ).encode.toPeriodicTask(UserTaskScope(args.collective), args.makeSubject.some) - job <- ut.toJob - _ <- queue.insert(job) + ) + _ <- uts + .updateOneTask(UserTaskScope(args.collective), args.makeSubject.some, ut) _ <- joex.notifyAllNodes } yield () @@ -321,7 +321,7 @@ object OCollective { AllPreviewsArgs(Some(account.collective), storeMode), Some(account.user) ) - _ <- queue.insertIfNew(job) + _ <- jobStore.insertIfNew(job.encode) _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] } yield UpdateResult.success diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OFileRepository.scala b/modules/backend/src/main/scala/docspell/backend/ops/OFileRepository.scala new file mode 100644 index 00000000..0e2d9cdb --- /dev/null +++ b/modules/backend/src/main/scala/docspell/backend/ops/OFileRepository.scala @@ -0,0 +1,100 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.backend.ops + +import cats.data.OptionT +import cats.effect._ +import cats.implicits._ + +import docspell.backend.JobFactory +import docspell.backend.ops.OFileRepository.IntegrityResult +import docspell.common._ +import docspell.scheduler.{Job, JobStore} +import docspell.store.Store + +import scodec.bits.ByteVector + +trait OFileRepository[F[_]] { + + /** Inserts the job or return None if such a job already is running. */ + def cloneFileRepository( + args: FileCopyTaskArgs, + notifyJoex: Boolean + ): F[Option[Job[FileCopyTaskArgs]]] + + def checkIntegrityAll( + part: FileKeyPart, + notifyJoex: Boolean + ): F[Option[Job[FileIntegrityCheckArgs]]] + + def checkIntegrity(key: FileKey, hash: Option[ByteVector]): F[Option[IntegrityResult]] +} + +object OFileRepository { + + case class IntegrityResult(ok: Boolean, key: FileKey) + + def apply[F[_]: Async]( + store: Store[F], + jobStore: JobStore[F], + joex: OJoex[F] + ): Resource[F, OFileRepository[F]] = + Resource.pure(new OFileRepository[F] { + private[this] val logger = docspell.logging.getLogger[F] + + def cloneFileRepository( + args: FileCopyTaskArgs, + notifyJoex: Boolean + ): F[Option[Job[FileCopyTaskArgs]]] = + for { + job <- JobFactory.fileCopy(args) + flag <- jobStore.insertIfNew(job.encode) + _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] + } yield Option.when(flag)(job) + + def checkIntegrityAll( + part: FileKeyPart, + notifyJoex: Boolean + ): F[Option[Job[FileIntegrityCheckArgs]]] = + for { + job <- JobFactory.integrityCheck(FileIntegrityCheckArgs(part)) + flag <- jobStore.insertIfNew(job.encode) + _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] + } yield Option.when(flag)(job) + + def checkIntegrity( + key: FileKey, + hash: Option[ByteVector] + ): F[Option[IntegrityResult]] = + (for { + _ <- OptionT.liftF( + logger.debugWith(s"Checking file $key")(_.data("fileKey", key)) + ) + expectedHash <- + hash.fold(OptionT(store.fileRepo.findMeta(key)).map(_.checksum))(h => + OptionT.pure[F](h) + ) + + actualHash <- + OptionT.liftF( + logger.debugWith(s"Calculating new hash for $key")( + _.data("fileKey", key) + ) *> + store.fileRepo + .getBytes(key) + .through(fs2.hash.sha256) + .compile + .foldChunks(ByteVector.empty)(_ ++ _.toByteVector) + ) + res = IntegrityResult(expectedHash == actualHash, key) + _ <- OptionT.liftF { + if (res.ok) logger.debug(s"File hashes match for $key") + else logger.warnWith(s"File hashes differ for: $key")(_.data("fileKey", key)) + } + } yield res).value + }) +} diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OFulltext.scala b/modules/backend/src/main/scala/docspell/backend/ops/OFulltext.scala index 9d057f1c..39452ea5 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OFulltext.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OFulltext.scala @@ -17,8 +17,8 @@ import docspell.common._ import docspell.ftsclient._ import docspell.query.ItemQuery._ import docspell.query.ItemQueryDsl._ +import docspell.scheduler.JobStore import docspell.store.queries.{QFolder, QItem, SelectedItem} -import docspell.store.queue.JobQueue import docspell.store.records.RJob import docspell.store.{Store, qb} @@ -81,7 +81,7 @@ object OFulltext { itemSearch: OItemSearch[F], fts: FtsClient[F], store: Store[F], - queue: JobQueue[F], + jobStore: JobStore[F], joex: OJoex[F] ): Resource[F, OFulltext[F]] = Resource.pure[F, OFulltext[F]](new OFulltext[F] { @@ -90,7 +90,7 @@ object OFulltext { for { _ <- logger.info(s"Re-index all.") job <- JobFactory.reIndexAll[F] - _ <- queue.insertIfNew(job) *> joex.notifyAllNodes + _ <- jobStore.insertIfNew(job.encode) *> joex.notifyAllNodes } yield () def reindexCollective(account: AccountId): F[Unit] = @@ -102,7 +102,7 @@ object OFulltext { job <- JobFactory.reIndex(account) _ <- if (exist.isDefined) ().pure[F] - else queue.insertIfNew(job) *> joex.notifyAllNodes + else jobStore.insertIfNew(job.encode) *> joex.notifyAllNodes } yield () def findIndexOnly(maxNoteLen: Int)( @@ -324,9 +324,7 @@ object OFulltext { def apply[A](implicit ev: ItemId[A]): ItemId[A] = ev def from[A](f: A => Ident): ItemId[A] = - new ItemId[A] { - def itemId(a: A) = f(a) - } + (a: A) => f(a) implicit val listItemId: ItemId[ListItem] = ItemId.from(_.id) diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala index f625cdfe..04643348 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala @@ -18,8 +18,8 @@ import docspell.common._ import docspell.ftsclient.FtsClient import docspell.logging.Logger import docspell.notification.api.Event +import docspell.scheduler.JobStore import docspell.store.queries.{QAttachment, QItem, QMoveAttachment} -import docspell.store.queue.JobQueue import docspell.store.records._ import docspell.store.{AddResult, Store, UpdateResult} @@ -228,7 +228,7 @@ object OItem { store: Store[F], fts: FtsClient[F], createIndex: CreateIndex[F], - queue: JobQueue[F], + jobStore: JobStore[F], joex: OJoex[F] ): Resource[F, OItem[F]] = for { @@ -288,7 +288,7 @@ object OItem { ) ev = Event.TagsChanged.partial( itemIds, - added.toList.flatten.map(_.id).toList, + added.toList.flatten.map(_.id), Nil ) } yield AttachedEvent(UpdateResult.success)(ev)) @@ -763,7 +763,7 @@ object OItem { job <- OptionT.liftF( JobFactory.reprocessItem[F](args, account, Priority.Low) ) - _ <- OptionT.liftF(queue.insertIfNew(job)) + _ <- OptionT.liftF(jobStore.insertIfNew(job.encode)) _ <- OptionT.liftF(if (notifyJoex) joex.notifyAllNodes else ().pure[F]) } yield UpdateResult.success).getOrElse(UpdateResult.notFound) @@ -777,7 +777,8 @@ object OItem { jobs <- items .map(item => ReProcessItemArgs(item, Nil)) .traverse(arg => JobFactory.reprocessItem[F](arg, account, Priority.Low)) - _ <- queue.insertAllIfNew(jobs) + .map(_.map(_.encode)) + _ <- jobStore.insertAllIfNew(jobs) _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] } yield items.size) @@ -788,7 +789,7 @@ object OItem { ): F[UpdateResult] = for { job <- JobFactory.convertAllPdfs[F](collective, submitter, Priority.Low) - _ <- queue.insertIfNew(job) + _ <- jobStore.insertIfNew(job.encode) _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] } yield UpdateResult.success @@ -799,7 +800,7 @@ object OItem { ): F[UpdateResult] = for { job <- JobFactory.makePreview[F](args, account.some) - _ <- queue.insertIfNew(job) + _ <- jobStore.insertIfNew(job.encode) _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] } yield UpdateResult.success @@ -809,7 +810,7 @@ object OItem { ): F[UpdateResult] = for { job <- JobFactory.allPreviews[F](AllPreviewsArgs(None, storeMode), None) - _ <- queue.insertIfNew(job) + _ <- jobStore.insertIfNew(job.encode) _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] } yield UpdateResult.success diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OJob.scala b/modules/backend/src/main/scala/docspell/backend/ops/OJob.scala index b4b0b7ae..e0c14552 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OJob.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OJob.scala @@ -10,10 +10,10 @@ import cats.data.OptionT import cats.effect._ import cats.implicits._ -import docspell.backend.msg.JobDone import docspell.backend.ops.OJob.{CollectiveQueueState, JobCancelResult} import docspell.common._ import docspell.pubsub.api.PubSubT +import docspell.scheduler.msg.JobDone import docspell.store.Store import docspell.store.UpdateResult import docspell.store.queries.QJob diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OJoex.scala b/modules/backend/src/main/scala/docspell/backend/ops/OJoex.scala index e51e8bd6..9f83d46c 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OJoex.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OJoex.scala @@ -10,14 +10,16 @@ import cats.Applicative import cats.effect._ import cats.implicits._ -import docspell.backend.msg.{CancelJob, Topics} import docspell.common.Ident import docspell.pubsub.api.PubSubT +import docspell.scheduler.msg.{CancelJob, JobsNotify, PeriodicTaskNotify} trait OJoex[F[_]] { def notifyAllNodes: F[Unit] + def notifyPeriodicTasks: F[Unit] + def cancelJob(job: Ident, worker: Ident): F[Unit] } @@ -26,7 +28,10 @@ object OJoex { Resource.pure[F, OJoex[F]](new OJoex[F] { def notifyAllNodes: F[Unit] = - pubSub.publish1IgnoreErrors(Topics.jobsNotify, ()).as(()) + pubSub.publish1IgnoreErrors(JobsNotify(), ()).void + + def notifyPeriodicTasks: F[Unit] = + pubSub.publish1IgnoreErrors(PeriodicTaskNotify(), ()).void def cancelJob(job: Ident, worker: Ident): F[Unit] = pubSub.publish1IgnoreErrors(CancelJob.topic, CancelJob(job, worker)).as(()) diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala index dbda65b2..ca523cf8 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala @@ -14,8 +14,8 @@ import fs2.Stream import docspell.backend.JobFactory import docspell.common._ +import docspell.scheduler.{Job, JobStore} import docspell.store.Store -import docspell.store.queue.JobQueue import docspell.store.records._ trait OUpload[F[_]] { @@ -108,7 +108,7 @@ object OUpload { def apply[F[_]: Sync]( store: Store[F], - queue: JobQueue[F], + jobStore: JobStore[F], joex: OJoex[F] ): Resource[F, OUpload[F]] = Resource.pure[F, OUpload[F]](new OUpload[F] { @@ -187,10 +187,10 @@ object OUpload { private def submitJobs( notifyJoex: Boolean - )(jobs: Vector[RJob]): F[OUpload.UploadResult] = + )(jobs: Vector[Job[String]]): F[OUpload.UploadResult] = for { _ <- logger.debug(s"Storing jobs: $jobs") - _ <- queue.insertAll(jobs) + _ <- jobStore.insertAll(jobs) _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] } yield UploadResult.Success @@ -244,7 +244,9 @@ object OUpload { account: AccountId, prio: Priority, tracker: Option[Ident] - ): F[Vector[RJob]] = - JobFactory.processItems[F](args, account, prio, tracker) + ): F[Vector[Job[String]]] = + JobFactory + .processItems[F](args, account, prio, tracker) + .map(_.map(_.encode)) }) } diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OUserTask.scala b/modules/backend/src/main/scala/docspell/backend/ops/OUserTask.scala index 8f8d0ab3..7fc80b7d 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OUserTask.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OUserTask.scala @@ -13,10 +13,9 @@ import fs2.Stream import docspell.common._ import docspell.notification.api.{ChannelRef, PeriodicDueItemsArgs, PeriodicQueryArgs} +import docspell.scheduler.usertask.{UserTask, UserTaskScope, UserTaskStore} import docspell.store.Store -import docspell.store.queue.JobQueue import docspell.store.records.RNotificationChannel -import docspell.store.usertask._ import io.circe.Encoder @@ -86,7 +85,6 @@ object OUserTask { def apply[F[_]: Async]( taskStore: UserTaskStore[F], store: Store[F], - queue: JobQueue[F], joex: OJoex[F] ): Resource[F, OUserTask[F]] = Resource.pure[F, OUserTask[F]](new OUserTask[F] { @@ -95,9 +93,7 @@ object OUserTask { implicit E: Encoder[A] ): F[Unit] = for { - ptask <- task.encode.toPeriodicTask(scope, subject) - job <- ptask.toJob - _ <- queue.insert(job) + _ <- taskStore.executeNow(scope, subject, task) _ <- joex.notifyAllNodes } yield () @@ -124,7 +120,7 @@ object OUserTask { ): F[Unit] = for { _ <- taskStore.updateTask[ScanMailboxArgs](scope, subject, task) - _ <- joex.notifyAllNodes + _ <- joex.notifyPeriodicTasks } yield () def getNotifyDueItems( @@ -153,7 +149,7 @@ object OUserTask { ): F[Unit] = for { _ <- taskStore.updateTask[PeriodicDueItemsArgs](scope, subject, task) - _ <- joex.notifyAllNodes + _ <- joex.notifyPeriodicTasks } yield () def getPeriodicQuery(scope: UserTaskScope): Stream[F, UserTask[PeriodicQueryArgs]] = @@ -180,7 +176,7 @@ object OUserTask { ): F[Unit] = for { _ <- taskStore.updateTask[PeriodicQueryArgs](scope, subject, task) - _ <- joex.notifyAllNodes + _ <- joex.notifyPeriodicTasks } yield () // When retrieving arguments containing channel references, we must update diff --git a/modules/common/src/main/scala/docspell/common/Banner.scala b/modules/common/src/main/scala/docspell/common/Banner.scala index 169c67cc..21a7f299 100644 --- a/modules/common/src/main/scala/docspell/common/Banner.scala +++ b/modules/common/src/main/scala/docspell/common/Banner.scala @@ -14,7 +14,8 @@ case class Banner( configFile: Option[String], appId: Ident, baseUrl: LenientUri, - ftsUrl: Option[LenientUri] + ftsUrl: Option[LenientUri], + fileStoreConfig: FileStoreConfig ) { private val banner = @@ -36,6 +37,7 @@ case class Banner( s"Database: ${jdbcUrl.asString}", s"Fts: ${ftsUrl.map(_.asString).getOrElse("-")}", s"Config: ${configFile.getOrElse("")}", + s"FileRepo: ${fileStoreConfig}", "" ) diff --git a/modules/common/src/main/scala/docspell/common/DocspellSystem.scala b/modules/common/src/main/scala/docspell/common/DocspellSystem.scala index 4ecfff2a..37319864 100644 --- a/modules/common/src/main/scala/docspell/common/DocspellSystem.scala +++ b/modules/common/src/main/scala/docspell/common/DocspellSystem.scala @@ -10,6 +10,8 @@ object DocspellSystem { val user = Ident.unsafe("docspell-system") val taskGroup = user + val account: AccountId = AccountId(taskGroup, user) + val migrationTaskTracker = Ident.unsafe("full-text-index-tracker") val allPreviewTaskTracker = Ident.unsafe("generate-all-previews") val allPageCountTaskTracker = Ident.unsafe("all-page-count-tracker") diff --git a/modules/common/src/main/scala/docspell/common/FileCopyTaskArgs.scala b/modules/common/src/main/scala/docspell/common/FileCopyTaskArgs.scala new file mode 100644 index 00000000..5026a775 --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/FileCopyTaskArgs.scala @@ -0,0 +1,56 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.common + +import cats.data.NonEmptyList + +import docspell.common.FileCopyTaskArgs.Selection + +import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} +import io.circe.syntax._ +import io.circe.{Decoder, Encoder} + +/** This is the input to the `FileCopyTask`. The task copies all files from on + * FileRepository to one ore more target repositories. + * + * If no `from` is given, the default file repository is used. For targets, a list of ids + * can be specified that must match a configured file store in the config file. When + * selecting "all", it means all enabled stores. + */ +final case class FileCopyTaskArgs(from: Option[Ident], to: Selection) + +object FileCopyTaskArgs { + val taskName = Ident.unsafe("copy-file-repositories") + + sealed trait Selection + + object Selection { + + case object All extends Selection + case class Stores(ids: NonEmptyList[Ident]) extends Selection + + implicit val jsonEncoder: Encoder[Selection] = + Encoder.instance { + case All => "!all".asJson + case Stores(ids) => ids.toList.asJson + } + + implicit val jsonDecoder: Decoder[Selection] = + Decoder.instance { cursor => + cursor.value.asString match { + case Some(s) if s.equalsIgnoreCase("!all") => Right(All) + case _ => cursor.value.as[NonEmptyList[Ident]].map(Stores.apply) + } + } + } + + implicit val jsonDecoder: Decoder[FileCopyTaskArgs] = + deriveDecoder + + implicit val jsonEncoder: Encoder[FileCopyTaskArgs] = + deriveEncoder +} diff --git a/modules/common/src/main/scala/docspell/common/FileIntegrityCheckArgs.scala b/modules/common/src/main/scala/docspell/common/FileIntegrityCheckArgs.scala new file mode 100644 index 00000000..671596f3 --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/FileIntegrityCheckArgs.scala @@ -0,0 +1,22 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.common + +import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} +import io.circe.{Decoder, Encoder} + +final case class FileIntegrityCheckArgs(pattern: FileKeyPart) {} + +object FileIntegrityCheckArgs { + val taskName: Ident = Ident.unsafe("all-file-integrity-check") + + implicit val jsonDecoder: Decoder[FileIntegrityCheckArgs] = + deriveDecoder + + implicit val jsonEncoder: Encoder[FileIntegrityCheckArgs] = + deriveEncoder +} diff --git a/modules/common/src/main/scala/docspell/common/FileKey.scala b/modules/common/src/main/scala/docspell/common/FileKey.scala index 17dc009e..46eb8409 100644 --- a/modules/common/src/main/scala/docspell/common/FileKey.scala +++ b/modules/common/src/main/scala/docspell/common/FileKey.scala @@ -9,7 +9,10 @@ package docspell.common import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} import io.circe.{Decoder, Encoder} -case class FileKey(collective: Ident, category: FileCategory, id: Ident) +final case class FileKey(collective: Ident, category: FileCategory, id: Ident) { + override def toString = + s"${collective.id}/${category.id.id}/${id.id}" +} object FileKey { diff --git a/modules/common/src/main/scala/docspell/common/FileKeyPart.scala b/modules/common/src/main/scala/docspell/common/FileKeyPart.scala new file mode 100644 index 00000000..fab01ab0 --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/FileKeyPart.scala @@ -0,0 +1,53 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.common + +import cats.implicits._ + +import io.circe.syntax._ +import io.circe.{Decoder, DecodingFailure, Encoder} + +sealed trait FileKeyPart {} + +object FileKeyPart { + + case object Empty extends FileKeyPart + + final case class Collective(collective: Ident) extends FileKeyPart + + final case class Category(collective: Ident, category: FileCategory) extends FileKeyPart + + final case class Key(key: FileKey) extends FileKeyPart + + implicit val jsonEncoder: Encoder[FileKeyPart] = + Encoder.instance { + case Empty => ().asJson + case Collective(cid) => + Map("collective" -> cid.asJson).asJson + case Category(cid, cat) => + Map("collective" -> cid.asJson, "category" -> cat.asJson).asJson + case Key(key) => + key.asJson + } + + implicit val jsonDecoder: Decoder[FileKeyPart] = + Decoder.instance { cursor => + for { + cid <- cursor.getOrElse[Option[Ident]]("collective")(None) + cat <- cursor.getOrElse[Option[FileCategory]]("category")(None) + emptyObj = cursor.keys.exists(_.isEmpty) + + c3 = cursor.as[FileKey].map(Key).toOption + c2 = (cid, cat).mapN(Category) + c1 = cid.map(Collective) + c0 = Option.when(emptyObj)(Empty) + + c = c3.orElse(c2).orElse(c1).orElse(c0) + res <- c.toRight(DecodingFailure("", cursor.history)) + } yield res + } +} diff --git a/modules/common/src/main/scala/docspell/common/FileStoreConfig.scala b/modules/common/src/main/scala/docspell/common/FileStoreConfig.scala new file mode 100644 index 00000000..cb9afa9d --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/FileStoreConfig.scala @@ -0,0 +1,39 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.common + +import fs2.io.file.Path + +sealed trait FileStoreConfig { + def enabled: Boolean + def storeType: FileStoreType +} +object FileStoreConfig { + case class DefaultDatabase(enabled: Boolean) extends FileStoreConfig { + val storeType = FileStoreType.DefaultDatabase + } + + case class FileSystem( + enabled: Boolean, + directory: Path + ) extends FileStoreConfig { + val storeType = FileStoreType.FileSystem + } + + case class S3( + enabled: Boolean, + endpoint: String, + accessKey: String, + secretKey: String, + bucket: String + ) extends FileStoreConfig { + val storeType = FileStoreType.S3 + + override def toString = + s"S3(enabled=$enabled, endpoint=$endpoint, bucket=$bucket, accessKey=$accessKey, secretKey=***)" + } +} diff --git a/modules/common/src/main/scala/docspell/common/FileStoreType.scala b/modules/common/src/main/scala/docspell/common/FileStoreType.scala new file mode 100644 index 00000000..f67b218d --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/FileStoreType.scala @@ -0,0 +1,32 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.common + +import cats.data.NonEmptyList + +sealed trait FileStoreType { self: Product => + def name: String = + productPrefix.toLowerCase +} +object FileStoreType { + case object DefaultDatabase extends FileStoreType + + case object S3 extends FileStoreType + + case object FileSystem extends FileStoreType + + val all: NonEmptyList[FileStoreType] = + NonEmptyList.of(DefaultDatabase, S3, FileSystem) + + def fromString(str: String): Either[String, FileStoreType] = + all + .find(_.name.equalsIgnoreCase(str)) + .toRight(s"Invalid file store type: $str") + + def unsafeFromString(str: String): FileStoreType = + fromString(str).fold(sys.error, identity) +} diff --git a/modules/common/src/main/scala/docspell/common/syntax/StringSyntax.scala b/modules/common/src/main/scala/docspell/common/syntax/StringSyntax.scala index 40d5bf80..5e0038da 100644 --- a/modules/common/src/main/scala/docspell/common/syntax/StringSyntax.scala +++ b/modules/common/src/main/scala/docspell/common/syntax/StringSyntax.scala @@ -6,10 +6,8 @@ package docspell.common.syntax -import cats.implicits._ - import io.circe.Decoder -import io.circe.parser._ +import io.circe.parser trait StringSyntax { implicit class EvenMoreStringOps(s: String) { @@ -18,9 +16,8 @@ trait StringSyntax { def parseJsonAs[A](implicit d: Decoder[A]): Either[Throwable, A] = for { - json <- parse(s).leftMap(_.underlying) - value <- json.as[A] - } yield value + json <- parser.decode[A](s) + } yield json } } diff --git a/modules/config/src/main/scala/docspell/config/Implicits.scala b/modules/config/src/main/scala/docspell/config/Implicits.scala index 0bc7dda4..e9b23348 100644 --- a/modules/config/src/main/scala/docspell/config/Implicits.scala +++ b/modules/config/src/main/scala/docspell/config/Implicits.scala @@ -18,9 +18,18 @@ import docspell.logging.{Level, LogConfig} import com.github.eikek.calev.CalEvent import pureconfig.ConfigReader import pureconfig.error.{CannotConvert, FailureReason} +import pureconfig.generic.{CoproductHint, FieldCoproductHint} import scodec.bits.ByteVector object Implicits { + // the value "s-3" looks strange, this is to allow to write "s3" in the config + implicit val fileStoreCoproductHint: CoproductHint[FileStoreConfig] = + new FieldCoproductHint[FileStoreConfig]("type") { + override def fieldValue(name: String) = + if (name.equalsIgnoreCase("S3")) "s3" + else super.fieldValue(name) + } + implicit val accountIdReader: ConfigReader[AccountId] = ConfigReader[String].emap(reason(AccountId.parse)) @@ -42,6 +51,9 @@ object Implicits { implicit val identReader: ConfigReader[Ident] = ConfigReader[String].emap(reason(Ident.fromString)) + implicit def identMapReader[B: ConfigReader]: ConfigReader[Map[Ident, B]] = + pureconfig.configurable.genericMapReader[Ident, B](reason(Ident.fromString)) + implicit val byteVectorReader: ConfigReader[ByteVector] = ConfigReader[String].emap(reason { str => if (str.startsWith("hex:")) @@ -70,6 +82,9 @@ object Implicits { implicit val logLevelReader: ConfigReader[Level] = ConfigReader[String].emap(reason(Level.fromString)) + implicit val fileStoreTypeReader: ConfigReader[FileStoreType] = + ConfigReader[String].emap(reason(FileStoreType.fromString)) + def reason[A: ClassTag]( f: String => Either[String, A] ): String => Either[FailureReason, A] = diff --git a/modules/joex/src/main/resources/reference.conf b/modules/joex/src/main/resources/reference.conf index 8516c916..318bdeff 100644 --- a/modules/joex/src/main/resources/reference.conf +++ b/modules/joex/src/main/resources/reference.conf @@ -194,6 +194,11 @@ docspell.joex { # How often the node must be unreachable, before it is removed. min-not-found = 2 } + + # Checks all files against their checksum + integrity-check { + enabled = true + } } # A periodic task to check for new releases of docspell. It can @@ -646,6 +651,41 @@ Docpell Update Check # restrict file types that should be handed over to processing. # By default all files are allowed. valid-mime-types = [ ] + + # The id of an enabled store from the `stores` array that should + # be used. + # + # IMPORTANT NOTE: All nodes must have the exact same file store + # configuration! + default-store = "database" + + # A list of possible file stores. Each entry must have a unique + # id. The `type` is one of: default-database, filesystem, s3. + # + # The enabled property serves currently to define target stores + # for te "copy files" task. All stores with enabled=false are + # removed from the list. The `default-store` must be enabled. + stores = { + database = + { enabled = true + type = "default-database" + } + + filesystem = + { enabled = false + type = "file-system" + directory = "/some/directory" + } + + minio = + { enabled = false + type = "s3" + endpoint = "http://localhost:9000" + access-key = "username" + secret-key = "password" + bucket = "docspell" + } + } } # Configuration of the full-text search engine. diff --git a/modules/joex/src/main/scala/docspell/joex/Config.scala b/modules/joex/src/main/scala/docspell/joex/Config.scala index 549b24ca..3418a56d 100644 --- a/modules/joex/src/main/scala/docspell/joex/Config.scala +++ b/modules/joex/src/main/scala/docspell/joex/Config.scala @@ -19,10 +19,10 @@ import docspell.ftssolr.SolrConfig import docspell.joex.analysis.RegexNerFile import docspell.joex.hk.HouseKeepingConfig import docspell.joex.routes.InternalHeader -import docspell.joex.scheduler.{PeriodicSchedulerConfig, SchedulerConfig} import docspell.joex.updatecheck.UpdateCheckConfig import docspell.logging.LogConfig import docspell.pubsub.naive.PubSubConfig +import docspell.scheduler.{PeriodicSchedulerConfig, SchedulerConfig} import docspell.store.JdbcConfig case class Config( diff --git a/modules/joex/src/main/scala/docspell/joex/ConfigFile.scala b/modules/joex/src/main/scala/docspell/joex/ConfigFile.scala index a3663030..ec5e70da 100644 --- a/modules/joex/src/main/scala/docspell/joex/ConfigFile.scala +++ b/modules/joex/src/main/scala/docspell/joex/ConfigFile.scala @@ -10,7 +10,7 @@ import cats.effect.Async import docspell.config.Implicits._ import docspell.config.{ConfigFactory, Validation} -import docspell.joex.scheduler.CountingScheme +import docspell.scheduler.CountingScheme import emil.MailAddress import emil.javamail.syntax._ @@ -19,6 +19,7 @@ import pureconfig.generic.auto._ import yamusca.imports._ object ConfigFile { + // IntelliJ is wrong, this is required import Implicits._ def loadConfig[F[_]: Async](args: List[String]): F[Config] = { @@ -51,6 +52,7 @@ object ConfigFile { Validation.failWhen( cfg => cfg.updateCheck.enabled && cfg.updateCheck.subject.els.isEmpty, "No subject given for enabled update check!" - ) + ), + Validation(cfg => cfg.files.validate.map(_ => cfg)) ) } diff --git a/modules/joex/src/main/scala/docspell/joex/JoexApp.scala b/modules/joex/src/main/scala/docspell/joex/JoexApp.scala index 4f096f02..7fe53412 100644 --- a/modules/joex/src/main/scala/docspell/joex/JoexApp.scala +++ b/modules/joex/src/main/scala/docspell/joex/JoexApp.scala @@ -7,7 +7,7 @@ package docspell.joex import docspell.common.Ident -import docspell.joex.scheduler.{PeriodicScheduler, Scheduler} +import docspell.scheduler.{PeriodicScheduler, Scheduler} import docspell.store.records.RJobLog trait JoexApp[F[_]] { diff --git a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala index ce28e8d3..5d8b6a40 100644 --- a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala +++ b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala @@ -10,37 +10,23 @@ import cats.effect._ import cats.implicits._ import fs2.concurrent.SignallingRef -import docspell.analysis.TextAnalyser import docspell.backend.MailAddressCodec -import docspell.backend.fulltext.CreateIndex -import docspell.backend.msg.{CancelJob, JobQueuePublish, Topics} import docspell.backend.ops._ import docspell.common._ -import docspell.ftsclient.FtsClient -import docspell.ftssolr.SolrFtsClient -import docspell.joex.analysis.RegexNerFile import docspell.joex.emptytrash._ -import docspell.joex.fts.{MigrationTask, ReIndexTask} +import docspell.joex.fts.MigrationTask import docspell.joex.hk._ -import docspell.joex.learn.LearnClassifierTask -import docspell.joex.notify._ import docspell.joex.pagecount._ -import docspell.joex.pdfconv.ConvertAllPdfTask -import docspell.joex.pdfconv.PdfConvTask import docspell.joex.preview._ -import docspell.joex.process.ItemHandler -import docspell.joex.process.ReProcessItem -import docspell.joex.scanmailbox._ -import docspell.joex.scheduler._ import docspell.joex.updatecheck._ import docspell.notification.api.NotificationModule import docspell.notification.impl.NotificationModuleImpl import docspell.pubsub.api.{PubSub, PubSubT} +import docspell.scheduler._ +import docspell.scheduler.impl.{JobStoreModuleBuilder, SchedulerModuleBuilder} +import docspell.scheduler.usertask.{UserTaskScope, UserTaskStore} import docspell.store.Store -import docspell.store.queue._ import docspell.store.records.{REmptyTrashSetting, RJobLog} -import docspell.store.usertask.UserTaskScope -import docspell.store.usertask.UserTaskStore import emil.javamail._ import org.http4s.client.Client @@ -48,9 +34,8 @@ import org.http4s.client.Client final class JoexAppImpl[F[_]: Async]( cfg: Config, store: Store[F], - queue: JobQueue[F], - pubSubT: PubSubT[F], - pstore: PeriodicTaskStore[F], + uts: UserTaskStore[F], + jobStore: JobStore[F], termSignal: SignallingRef[F, Boolean], notificationMod: NotificationModule[F], val scheduler: Scheduler[F], @@ -67,20 +52,11 @@ final class JoexAppImpl[F[_]: Async]( _ <- Async[F].start(eventConsume) _ <- scheduler.periodicAwake _ <- periodicScheduler.periodicAwake - _ <- subscriptions + _ <- scheduler.startSubscriptions + _ <- periodicScheduler.startSubscriptions } yield () } - def subscriptions = - for { - _ <- Async[F].start(pubSubT.subscribeSink(Topics.jobsNotify) { _ => - scheduler.notifyChange - }) - _ <- Async[F].start(pubSubT.subscribeSink(CancelJob.topic) { msg => - scheduler.requestCancel(msg.body.jobId).as(()) - }) - } yield () - def findLogs(jobId: Ident): F[Vector[RJobLog]] = store.transact(RJobLog.findLogs(jobId)) @@ -90,32 +66,30 @@ final class JoexAppImpl[F[_]: Async]( private def scheduleBackgroundTasks: F[Unit] = HouseKeepingTask .periodicTask[F](cfg.houseKeeping.schedule) - .flatMap(pstore.insert) *> + .flatMap(t => uts.updateTask(UserTaskScope.system, t.summary, t)) *> scheduleEmptyTrashTasks *> UpdateCheckTask .periodicTask(cfg.updateCheck) - .flatMap(pstore.insert) *> - MigrationTask.job.flatMap(queue.insertIfNew) *> + .flatMap(t => uts.updateTask(UserTaskScope.system, t.summary, t)) *> + MigrationTask.job.flatMap(jobStore.insertIfNew) *> AllPreviewsTask .job(MakePreviewArgs.StoreMode.WhenMissing, None) - .flatMap(queue.insertIfNew) *> - AllPageCountTask.job.flatMap(queue.insertIfNew).as(()) + .flatMap(jobStore.insertIfNew) *> + AllPageCountTask.job.flatMap(jobStore.insertIfNew).void private def scheduleEmptyTrashTasks: F[Unit] = store .transact( REmptyTrashSetting.findForAllCollectives(OCollective.EmptyTrash.default, 50) ) - .evalMap(es => - UserTaskStore(store).use { uts => - val args = EmptyTrashArgs(es.cid, es.minAge) - uts.updateOneTask( - UserTaskScope(args.collective), - args.makeSubject.some, - EmptyTrashTask.userTask(args, es.schedule) - ) - } - ) + .evalMap { es => + val args = EmptyTrashArgs(es.cid, es.minAge) + uts.updateOneTask( + UserTaskScope(args.collective), + args.makeSubject.some, + EmptyTrashTask.userTask(args, es.schedule) + ) + } .compile .drain @@ -131,179 +105,45 @@ object JoexAppImpl extends MailAddressCodec { pubSub: PubSub[F] ): Resource[F, JoexApp[F]] = for { - pstore <- PeriodicTaskStore.create(store) - joexLogger = docspell.logging.getLogger[F](s"joex-${cfg.appId.id}") + joexLogger <- Resource.pure(docspell.logging.getLogger[F](s"joex-${cfg.appId.id}")) pubSubT = PubSubT(pubSub, joexLogger) javaEmil = JavaMailEmil(Settings.defaultSettings.copy(debug = cfg.mailDebug)) notificationMod <- Resource.eval( NotificationModuleImpl[F](store, javaEmil, httpClient, 200) ) - queue <- JobQueuePublish(store, pubSubT, notificationMod) - joex <- OJoex(pubSubT) - upload <- OUpload(store, queue, joex) - fts <- createFtsClient(cfg)(httpClient) - createIndex <- CreateIndex.resource(fts, store) - itemOps <- OItem(store, fts, createIndex, queue, joex) - itemSearchOps <- OItemSearch(store) - analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig) - regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, store) - updateCheck <- UpdateCheck.resource(httpClient) - notification <- ONotification(store, notificationMod) - sch <- SchedulerBuilder(cfg.scheduler, store) - .withQueue(queue) - .withPubSub(pubSubT) + + jobStoreModule = JobStoreModuleBuilder(store) + .withPubsub(pubSubT) .withEventSink(notificationMod) - .withTask( - JobTask.json( - ProcessItemArgs.taskName, - ItemHandler.newItem[F](cfg, itemOps, fts, analyser, regexNer), - ItemHandler.onCancel[F] - ) - ) - .withTask( - JobTask.json( - ReProcessItemArgs.taskName, - ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer), - ReProcessItem.onCancel[F] - ) - ) - .withTask( - JobTask.json( - ScanMailboxArgs.taskName, - ScanMailboxTask[F](cfg.userTasks.scanMailbox, javaEmil, upload, joex), - ScanMailboxTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - MigrationTask.taskName, - MigrationTask[F](cfg.fullTextSearch, fts, createIndex), - MigrationTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - ReIndexTask.taskName, - ReIndexTask[F](cfg.fullTextSearch, fts, createIndex), - ReIndexTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - HouseKeepingTask.taskName, - HouseKeepingTask[F](cfg), - HouseKeepingTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - PdfConvTask.taskName, - PdfConvTask[F](cfg), - PdfConvTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - ConvertAllPdfArgs.taskName, - ConvertAllPdfTask[F](queue, joex), - ConvertAllPdfTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - LearnClassifierArgs.taskName, - LearnClassifierTask[F](cfg.textAnalysis, analyser), - LearnClassifierTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - MakePreviewArgs.taskName, - MakePreviewTask[F](cfg.extraction.preview), - MakePreviewTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - AllPreviewsArgs.taskName, - AllPreviewsTask[F](queue, joex), - AllPreviewsTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - MakePageCountArgs.taskName, - MakePageCountTask[F](), - MakePageCountTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - AllPageCountTask.taskName, - AllPageCountTask[F](queue, joex), - AllPageCountTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - EmptyTrashArgs.taskName, - EmptyTrashTask[F](itemOps, itemSearchOps), - EmptyTrashTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - UpdateCheckTask.taskName, - UpdateCheckTask[F]( - cfg.updateCheck, - cfg.sendMail, - javaEmil, - updateCheck, - ThisVersion.default - ), - UpdateCheckTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - PeriodicQueryTask.taskName, - PeriodicQueryTask[F](notification), - PeriodicQueryTask.onCancel[F] - ) - ) - .withTask( - JobTask.json( - PeriodicDueItemsTask.taskName, - PeriodicDueItemsTask[F](notification), - PeriodicDueItemsTask.onCancel[F] - ) - ) - .resource - psch <- PeriodicScheduler.create( - cfg.periodicScheduler, - sch, - queue, - pstore, - joex + .build + + tasks <- JoexTasks.resource( + cfg, + jobStoreModule, + httpClient, + pubSubT, + notificationMod, + javaEmil ) + + schedulerModule <- SchedulerModuleBuilder(jobStoreModule) + .withSchedulerConfig(cfg.scheduler) + .withPeriodicSchedulerConfig(cfg.periodicScheduler) + .withTaskRegistry(tasks.get) + .resource + app = new JoexAppImpl( cfg, store, - queue, - pubSubT, - pstore, + jobStoreModule.userTasks, + jobStoreModule.jobs, termSignal, notificationMod, - sch, - psch + schedulerModule.scheduler, + schedulerModule.periodicScheduler ) appR <- Resource.make(app.init.map(_ => app))(_.initShutdown) } yield appR - private def createFtsClient[F[_]: Async]( - cfg: Config - )(client: Client[F]): Resource[F, FtsClient[F]] = - if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client) - else Resource.pure[F, FtsClient[F]](FtsClient.none[F]) } diff --git a/modules/joex/src/main/scala/docspell/joex/JoexServer.scala b/modules/joex/src/main/scala/docspell/joex/JoexServer.scala index ef06f730..a13d4b1f 100644 --- a/modules/joex/src/main/scala/docspell/joex/JoexServer.scala +++ b/modules/joex/src/main/scala/docspell/joex/JoexServer.scala @@ -41,7 +41,7 @@ object JoexServer { store <- Store.create[F]( cfg.jdbc, - cfg.files.chunkSize, + cfg.files.defaultFileRepositoryConfig, pools.connectEC ) settings <- Resource.eval(store.transact(RInternalSetting.create)) diff --git a/modules/joex/src/main/scala/docspell/joex/JoexTasks.scala b/modules/joex/src/main/scala/docspell/joex/JoexTasks.scala new file mode 100644 index 00000000..8d2f0a1f --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/JoexTasks.scala @@ -0,0 +1,248 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.joex + +import cats.effect.{Async, Resource} + +import docspell.analysis.TextAnalyser +import docspell.backend.fulltext.CreateIndex +import docspell.backend.ops._ +import docspell.common._ +import docspell.ftsclient.FtsClient +import docspell.ftssolr.SolrFtsClient +import docspell.joex.analysis.RegexNerFile +import docspell.joex.emptytrash.EmptyTrashTask +import docspell.joex.filecopy.{FileCopyTask, FileIntegrityCheckTask} +import docspell.joex.fts.{MigrationTask, ReIndexTask} +import docspell.joex.hk.HouseKeepingTask +import docspell.joex.learn.LearnClassifierTask +import docspell.joex.notify.{PeriodicDueItemsTask, PeriodicQueryTask} +import docspell.joex.pagecount.{AllPageCountTask, MakePageCountTask} +import docspell.joex.pdfconv.{ConvertAllPdfTask, PdfConvTask} +import docspell.joex.preview.{AllPreviewsTask, MakePreviewTask} +import docspell.joex.process.{ItemHandler, ReProcessItem} +import docspell.joex.scanmailbox.ScanMailboxTask +import docspell.joex.updatecheck.{ThisVersion, UpdateCheck, UpdateCheckTask} +import docspell.notification.api.NotificationModule +import docspell.pubsub.api.PubSubT +import docspell.scheduler.impl.JobStoreModuleBuilder +import docspell.scheduler.{JobStoreModule, JobTask, JobTaskRegistry} +import docspell.store.Store + +import emil.Emil +import org.http4s.client.Client + +final class JoexTasks[F[_]: Async]( + cfg: Config, + store: Store[F], + itemOps: OItem[F], + fts: FtsClient[F], + analyser: TextAnalyser[F], + regexNer: RegexNerFile[F], + updateCheck: UpdateCheck[F], + notification: ONotification[F], + fileRepo: OFileRepository[F], + javaEmil: Emil[F], + jobStoreModule: JobStoreModule[F], + upload: OUpload[F], + createIndex: CreateIndex[F], + joex: OJoex[F], + itemSearch: OItemSearch[F] +) { + + def get: JobTaskRegistry[F] = + JobTaskRegistry + .empty[F] + .withTask( + JobTask.json( + ProcessItemArgs.taskName, + ItemHandler.newItem[F](cfg, store, itemOps, fts, analyser, regexNer), + ItemHandler.onCancel[F](store) + ) + ) + .withTask( + JobTask.json( + ReProcessItemArgs.taskName, + ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer, store), + ReProcessItem.onCancel[F] + ) + ) + .withTask( + JobTask.json( + ScanMailboxArgs.taskName, + ScanMailboxTask[F](cfg.userTasks.scanMailbox, store, javaEmil, upload, joex), + ScanMailboxTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + MigrationTask.taskName, + MigrationTask[F](cfg.fullTextSearch, store, fts, createIndex), + MigrationTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + ReIndexTask.taskName, + ReIndexTask[F](cfg.fullTextSearch, store, fts, createIndex), + ReIndexTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + HouseKeepingTask.taskName, + HouseKeepingTask[F](cfg, store, fileRepo), + HouseKeepingTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + PdfConvTask.taskName, + PdfConvTask[F](cfg, store), + PdfConvTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + ConvertAllPdfArgs.taskName, + ConvertAllPdfTask[F](jobStoreModule.jobs, joex, store), + ConvertAllPdfTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + LearnClassifierArgs.taskName, + LearnClassifierTask[F](cfg.textAnalysis, store, analyser), + LearnClassifierTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + MakePreviewArgs.taskName, + MakePreviewTask[F](cfg.extraction.preview, store), + MakePreviewTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + AllPreviewsArgs.taskName, + AllPreviewsTask[F](jobStoreModule.jobs, joex, store), + AllPreviewsTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + MakePageCountArgs.taskName, + MakePageCountTask[F](store), + MakePageCountTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + AllPageCountTask.taskName, + AllPageCountTask[F](store, jobStoreModule.jobs, joex), + AllPageCountTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + EmptyTrashArgs.taskName, + EmptyTrashTask[F](itemOps, itemSearch), + EmptyTrashTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + UpdateCheckTask.taskName, + UpdateCheckTask[F]( + cfg.updateCheck, + cfg.sendMail, + store, + javaEmil, + updateCheck, + ThisVersion.default + ), + UpdateCheckTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + PeriodicQueryTask.taskName, + PeriodicQueryTask[F](store, notification), + PeriodicQueryTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + PeriodicDueItemsTask.taskName, + PeriodicDueItemsTask[F](store, notification), + PeriodicDueItemsTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + FileCopyTaskArgs.taskName, + FileCopyTask[F](cfg, store), + FileCopyTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + FileIntegrityCheckArgs.taskName, + FileIntegrityCheckTask[F](fileRepo, store), + FileIntegrityCheckTask.onCancel[F] + ) + ) +} + +object JoexTasks { + + def resource[F[_]: Async]( + cfg: Config, + jobStoreModule: JobStoreModuleBuilder.Module[F], + httpClient: Client[F], + pubSub: PubSubT[F], + notificationModule: NotificationModule[F], + emailService: Emil[F] + ): Resource[F, JoexTasks[F]] = + for { + joex <- OJoex(pubSub) + store = jobStoreModule.store + upload <- OUpload(store, jobStoreModule.jobs, joex) + fts <- createFtsClient(cfg)(httpClient) + createIndex <- CreateIndex.resource(fts, store) + itemOps <- OItem(store, fts, createIndex, jobStoreModule.jobs, joex) + itemSearchOps <- OItemSearch(store) + analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig) + regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, store) + updateCheck <- UpdateCheck.resource(httpClient) + notification <- ONotification(store, notificationModule) + fileRepo <- OFileRepository(store, jobStoreModule.jobs, joex) + } yield new JoexTasks[F]( + cfg, + store, + itemOps, + fts, + analyser, + regexNer, + updateCheck, + notification, + fileRepo, + emailService, + jobStoreModule, + upload, + createIndex, + joex, + itemSearchOps + ) + + private def createFtsClient[F[_]: Async]( + cfg: Config + )(client: Client[F]): Resource[F, FtsClient[F]] = + if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client) + else Resource.pure[F, FtsClient[F]](FtsClient.none[F]) +} diff --git a/modules/joex/src/main/scala/docspell/joex/Main.scala b/modules/joex/src/main/scala/docspell/joex/Main.scala index 7866d6d1..a7607a5f 100644 --- a/modules/joex/src/main/scala/docspell/joex/Main.scala +++ b/modules/joex/src/main/scala/docspell/joex/Main.scala @@ -31,7 +31,8 @@ object Main extends IOApp { Option(System.getProperty("config.file")), cfg.appId, cfg.baseUrl, - Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled) + Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled), + cfg.files.defaultStoreConfig ) _ <- logger.info(s"\n${banner.render("***>")}") _ <- diff --git a/modules/joex/src/main/scala/docspell/joex/emptytrash/EmptyTrashTask.scala b/modules/joex/src/main/scala/docspell/joex/emptytrash/EmptyTrashTask.scala index 5b1cbd3f..3d50a0ca 100644 --- a/modules/joex/src/main/scala/docspell/joex/emptytrash/EmptyTrashTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/emptytrash/EmptyTrashTask.scala @@ -12,9 +12,9 @@ import fs2.Stream import docspell.backend.ops.{OItem, OItemSearch} import docspell.common._ -import docspell.joex.scheduler._ +import docspell.scheduler._ +import docspell.scheduler.usertask.UserTask import docspell.store.records.RItem -import docspell.store.usertask.UserTask import com.github.eikek.calev.CalEvent diff --git a/modules/joex/src/main/scala/docspell/joex/filecopy/FileCopyTask.scala b/modules/joex/src/main/scala/docspell/joex/filecopy/FileCopyTask.scala new file mode 100644 index 00000000..630857fd --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/filecopy/FileCopyTask.scala @@ -0,0 +1,144 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.joex.filecopy + +import cats.data.NonEmptyList +import cats.effect._ +import cats.implicits._ + +import docspell.common.FileCopyTaskArgs.Selection +import docspell.common.{FileCopyTaskArgs, Ident} +import docspell.joex.Config +import docspell.logging.Logger +import docspell.scheduler.{JobTaskResultEncoder, Task} +import docspell.store.Store +import docspell.store.file.{BinnyUtils, FileRepository, FileRepositoryConfig} + +import binny.CopyTool.Counter +import binny.{BinaryId, BinaryStore, CopyTool} +import io.circe.generic.semiauto.deriveCodec +import io.circe.{Codec, Decoder, Encoder} + +object FileCopyTask { + type Args = FileCopyTaskArgs + + case class CopyResult(success: Boolean, message: String, counter: List[Counter]) + object CopyResult { + def noSourceImpl: CopyResult = + CopyResult(false, "No source BinaryStore implementation found!", Nil) + + def noTargetImpl: CopyResult = + CopyResult(false, "No target BinaryStore implementation found!", Nil) + + def noSourceStore(id: Ident): CopyResult = + CopyResult( + false, + s"No source file repo found with id: ${id.id}. Make sure it is present in the config.", + Nil + ) + + def noTargetStore: CopyResult = + CopyResult(false, "No target file repositories defined", Nil) + + def success(counter: NonEmptyList[Counter]): CopyResult = + CopyResult(true, "Done", counter.toList) + + implicit val binaryIdCodec: Codec[BinaryId] = + Codec.from( + Decoder.decodeString.map(BinaryId.apply), + Encoder.encodeString.contramap(_.id) + ) + + implicit val counterEncoder: Codec[Counter] = + deriveCodec + implicit val jsonCodec: Codec[CopyResult] = + deriveCodec + + implicit val jobTaskResultEncoder: JobTaskResultEncoder[CopyResult] = + JobTaskResultEncoder.fromJson[CopyResult].withMessage { result => + val allGood = result.counter.map(_.success).sum + val failed = result.counter.map(_.failed.size).sum + if (result.success) + s"Successfully copied $allGood files to ${result.counter.size} stores." + else + s"Copying files failed for ${failed} files! ${allGood} were copied successfully." + } + } + + def onCancel[F[_]]: Task[F, Args, Unit] = + Task.log(_.warn(s"Cancelling ${FileCopyTaskArgs.taskName.id} task")) + + def apply[F[_]: Async](cfg: Config, store: Store[F]): Task[F, Args, CopyResult] = + Task { ctx => + val src = ctx.args.from + .map(id => + cfg.files.getFileRepositoryConfig(id).toRight(CopyResult.noSourceStore(id)) + ) + .getOrElse(Right(cfg.files.defaultFileRepositoryConfig)) + + val targets = ctx.args.to match { + case Selection.All => + cfg.files.enabledStores.values.toList + .map(FileRepositoryConfig.fromFileStoreConfig(cfg.files.chunkSize, _)) + case Selection.Stores(ids) => + ids.traverse(cfg.files.getFileRepositoryConfig).map(_.toList).getOrElse(Nil) + } + + // remove source from targets if present there + val data = + for { + srcConfig <- src + trgConfig <- NonEmptyList + .fromList(targets.filter(_ != srcConfig)) + .toRight(CopyResult.noTargetStore) + + srcRepo = store.createFileRepository(srcConfig, true) + targetRepos = trgConfig.map(store.createFileRepository(_, false)) + } yield (srcRepo, targetRepos) + + data match { + case Right((from, tos)) => + ctx.logger.info(s"Start copying all files from $from") *> + copy(ctx.logger, from, tos).flatTap(r => + if (r.success) ctx.logger.info(s"Copying finished: ${r.counter}") + else ctx.logger.error(s"Copying failed: $r") + ) + + case Left(res) => + ctx.logger.error(s"Copying failed: $res") *> res.pure[F] + } + } + + def copy[F[_]: Async]( + logger: Logger[F], + from: FileRepository[F], + to: NonEmptyList[FileRepository[F]] + ): F[CopyResult] = + FileRepository.getDelegate(from) match { + case None => + CopyResult.noSourceImpl.pure[F] + + case Some((src, srcMeta)) => + to.traverse(FileRepository.getDelegate).map(_.map(_._1)) match { + case None => + CopyResult.noTargetImpl.pure[F] + + case Some(targets) => + val log = BinnyUtils.LoggerAdapter(logger) + val maxConcurrent = { + val nCores = Runtime.getRuntime.availableProcessors() + if (nCores > 2) nCores / 2 else 1 + } + + def copyTo(to: BinaryStore[F]) = + CopyTool.copyAll[F](log, src, srcMeta, to, 50, maxConcurrent) + + logger.info(s"Start copying ${from.config} -> ${to.map(_.config)}") *> + targets.traverse(copyTo).map(CopyResult.success) + } + } +} diff --git a/modules/joex/src/main/scala/docspell/joex/filecopy/FileIntegrityCheckTask.scala b/modules/joex/src/main/scala/docspell/joex/filecopy/FileIntegrityCheckTask.scala new file mode 100644 index 00000000..4c1b1751 --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/filecopy/FileIntegrityCheckTask.scala @@ -0,0 +1,91 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.joex.filecopy + +import cats.Monoid +import cats.effect._ +import cats.implicits._ + +import docspell.backend.ops.OFileRepository +import docspell.backend.ops.OFileRepository.IntegrityResult +import docspell.common.{FileIntegrityCheckArgs, FileKey} +import docspell.scheduler.{JobTaskResultEncoder, Task} +import docspell.store.Store +import docspell.store.records.RFileMeta + +import io.circe.Encoder +import io.circe.generic.semiauto.deriveEncoder + +object FileIntegrityCheckTask { + type Args = FileIntegrityCheckArgs + + case class Result(ok: Int, failedKeys: Set[FileKey], notFoundKeys: Set[FileKey]) { + override def toString: String = + s"Result(ok=$ok, failed=${failedKeys.size}, notFound=${notFoundKeys.size}, " + + s"keysFailed=$failedKeys, notFoundKeys=$notFoundKeys)" + } + object Result { + val empty = Result(0, Set.empty, Set.empty) + + def notFound(key: FileKey) = Result(0, Set.empty, Set(key)) + + def from(r: IntegrityResult): Result = + if (r.ok) Result(1, Set.empty, Set.empty) else Result(0, Set(r.key), Set.empty) + + implicit val monoid: Monoid[Result] = + Monoid.instance( + empty, + (a, b) => + Result( + a.ok + b.ok, + a.failedKeys ++ b.failedKeys, + a.notFoundKeys ++ b.notFoundKeys + ) + ) + + implicit val jsonEncoder: Encoder[Result] = + deriveEncoder + + implicit val jobTaskResultEncoder: JobTaskResultEncoder[Result] = + JobTaskResultEncoder.fromJson[Result].withMessage { result => + s"Integrity check finished. Ok: ${result.ok}, " + + s"Failed: ${result.failedKeys.size}, Not found: ${result.notFoundKeys.size}" + } + } + + def apply[F[_]: Sync](ops: OFileRepository[F], store: Store[F]): Task[F, Args, Result] = + Task { ctx => + store + .transact( + RFileMeta + .findAll(ctx.args.pattern, 50) + ) + .chunks + .evalTap(c => ctx.logger.info(s"Checking next ${c.size} files…")) + .unchunks + .evalMap(meta => + ops.checkIntegrity(meta.id, meta.checksum.some).flatMap { + case Some(r) => + Result.from(r).pure[F] + case None => + ctx.logger + .error(s"File '${meta.id.toString}' not found in file repository") + .as(Result.notFound(meta.id)) + } + ) + .foldMonoid + .compile + .lastOrError + .flatTap(result => + ctx.logger + .infoWith(s"File check result: $result")(_.data("integrityCheck", result)) + ) + } + + def onCancel[F[_]]: Task[F, Args, Unit] = + Task.log(_.warn(s"Cancelling ${FileIntegrityCheckArgs.taskName.id} task")) +} diff --git a/modules/joex/src/main/scala/docspell/joex/fts/FtsContext.scala b/modules/joex/src/main/scala/docspell/joex/fts/FtsContext.scala index 8e7f133b..2e046d89 100644 --- a/modules/joex/src/main/scala/docspell/joex/fts/FtsContext.scala +++ b/modules/joex/src/main/scala/docspell/joex/fts/FtsContext.scala @@ -9,25 +9,13 @@ package docspell.joex.fts import docspell.backend.fulltext.CreateIndex import docspell.ftsclient.FtsClient import docspell.joex.Config -import docspell.joex.scheduler.Context import docspell.logging.Logger import docspell.store.Store -case class FtsContext[F[_]]( +final case class FtsContext[F[_]]( cfg: Config.FullTextSearch, store: Store[F], fulltext: CreateIndex[F], fts: FtsClient[F], logger: Logger[F] ) - -object FtsContext { - - def apply[F[_]]( - cfg: Config.FullTextSearch, - fts: FtsClient[F], - fulltext: CreateIndex[F], - ctx: Context[F, _] - ): FtsContext[F] = - FtsContext(cfg, ctx.store, fulltext, fts, ctx.logger) -} diff --git a/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala b/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala index 1184c0a3..3f5ed2a6 100644 --- a/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala +++ b/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala @@ -14,8 +14,9 @@ import docspell.backend.fulltext.CreateIndex import docspell.common._ import docspell.ftsclient._ import docspell.joex.Config -import docspell.joex.scheduler.Context import docspell.logging.Logger +import docspell.scheduler.Context +import docspell.store.Store object FtsWork { import syntax._ @@ -106,10 +107,11 @@ object FtsWork { def forContext( cfg: Config.FullTextSearch, + store: Store[F], fts: FtsClient[F], fulltext: CreateIndex[F] ): Kleisli[F, Context[F, _], Unit] = - mt.local(ctx => FtsContext(cfg, fts, fulltext, ctx)) + mt.local(ctx => FtsContext(cfg, store, fulltext, fts, ctx.logger)) } } } diff --git a/modules/joex/src/main/scala/docspell/joex/fts/MigrationTask.scala b/modules/joex/src/main/scala/docspell/joex/fts/MigrationTask.scala index dee2fc1f..ddb44a21 100644 --- a/modules/joex/src/main/scala/docspell/joex/fts/MigrationTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/fts/MigrationTask.scala @@ -13,14 +13,15 @@ import docspell.backend.fulltext.CreateIndex import docspell.common._ import docspell.ftsclient._ import docspell.joex.Config -import docspell.joex.scheduler.Task -import docspell.store.records.RJob +import docspell.scheduler.{Job, Task} +import docspell.store.Store object MigrationTask { val taskName = Ident.unsafe("full-text-index") def apply[F[_]: Async]( cfg: Config.FullTextSearch, + store: Store[F], fts: FtsClient[F], createIndex: CreateIndex[F] ): Task[F, Unit, Unit] = @@ -30,7 +31,7 @@ object MigrationTask { Task(ctx => for { migs <- migrationTasks[F](fts) - res <- Migration[F](cfg, fts, ctx.store, createIndex, ctx.logger).run(migs) + res <- Migration[F](cfg, fts, store, createIndex, ctx.logger).run(migs) } yield res ) ) @@ -38,21 +39,18 @@ object MigrationTask { def onCancel[F[_]]: Task[F, Unit, Unit] = Task.log[F, Unit](_.warn("Cancelling full-text-index task")) - def job[F[_]: Sync]: F[RJob] = - for { - id <- Ident.randomId[F] - now <- Timestamp.current[F] - } yield RJob.newJob( - id, - taskName, - DocspellSystem.taskGroup, - (), - "Create full-text index", - now, - DocspellSystem.taskGroup, - Priority.Low, - Some(DocspellSystem.migrationTaskTracker) - ) + def job[F[_]: Sync]: F[Job[String]] = + Job + .createNew( + taskName, + DocspellSystem.taskGroup, + (), + "Create full-text index", + DocspellSystem.taskGroup, + Priority.Low, + Some(DocspellSystem.migrationTaskTracker) + ) + .map(_.encode) def migrationTasks[F[_]: Async](fts: FtsClient[F]): F[List[Migration[F]]] = fts.initialize.map(_.map(fm => Migration.from(fm))) diff --git a/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala b/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala index 37c1326b..b457d2fb 100644 --- a/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala @@ -7,13 +7,15 @@ package docspell.joex.fts import cats.effect._ +import cats.implicits._ import docspell.backend.fulltext.CreateIndex import docspell.common._ import docspell.ftsclient._ import docspell.joex.Config import docspell.joex.fts.FtsWork.syntax._ -import docspell.joex.scheduler.Task +import docspell.scheduler.Task +import docspell.store.Store object ReIndexTask { type Args = ReIndexTaskArgs @@ -23,6 +25,7 @@ object ReIndexTask { def apply[F[_]: Async]( cfg: Config.FullTextSearch, + store: Store[F], fts: FtsClient[F], fulltext: CreateIndex[F] ): Task[F, Args, Unit] = @@ -30,7 +33,7 @@ object ReIndexTask { .log[F, Args](_.info(s"Running full-text re-index now")) .flatMap(_ => Task(ctx => - clearData[F](ctx.args.collective).forContext(cfg, fts, fulltext).run(ctx) + clearData[F](ctx.args.collective).forContext(cfg, store, fts, fulltext).run(ctx) ) ) @@ -42,7 +45,7 @@ object ReIndexTask { (collective match { case Some(_) => FtsWork - .clearIndex(collective) + .clearIndex[F](collective) .recoverWith( FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing.")) ) ++ diff --git a/modules/joex/src/main/scala/docspell/joex/hk/CheckNodesTask.scala b/modules/joex/src/main/scala/docspell/joex/hk/CheckNodesTask.scala index 8e0bdb1c..68256465 100644 --- a/modules/joex/src/main/scala/docspell/joex/hk/CheckNodesTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/hk/CheckNodesTask.scala @@ -10,44 +10,51 @@ import cats.effect._ import cats.implicits._ import docspell.common._ -import docspell.joex.scheduler.{Context, Task} import docspell.logging.Logger +import docspell.scheduler.Task +import docspell.store.Store import docspell.store.records._ import org.http4s.blaze.client.BlazeClientBuilder import org.http4s.client.Client object CheckNodesTask { - def apply[F[_]: Async]( - cfg: HouseKeepingConfig.CheckNodes - ): Task[F, Unit, Unit] = + cfg: HouseKeepingConfig.CheckNodes, + store: Store[F] + ): Task[F, Unit, CleanupResult] = Task { ctx => if (cfg.enabled) for { _ <- ctx.logger.info("Check nodes reachability") ec = scala.concurrent.ExecutionContext.global _ <- BlazeClientBuilder[F].withExecutionContext(ec).resource.use { client => - checkNodes(ctx, client) + checkNodes(ctx.logger, store, client) } _ <- ctx.logger.info( s"Remove nodes not found more than ${cfg.minNotFound} times" ) - n <- removeNodes(ctx, cfg) + n <- removeNodes(store, cfg) _ <- ctx.logger.info(s"Removed $n nodes") - } yield () + } yield CleanupResult.of(n) else - ctx.logger.info("CheckNodes task is disabled in the configuration") + ctx.logger.info("CheckNodes task is disabled in the configuration") *> + CleanupResult.disabled.pure[F] + } - def checkNodes[F[_]: Async](ctx: Context[F, _], client: Client[F]): F[Unit] = - ctx.store + def checkNodes[F[_]: Async]( + logger: Logger[F], + store: Store[F], + client: Client[F] + ): F[Unit] = + store .transact(RNode.streamAll) .evalMap(node => - checkNode(ctx.logger, client)(node.url) + checkNode(logger, client)(node.url) .flatMap(seen => - if (seen) ctx.store.transact(RNode.resetNotFound(node.id)) - else ctx.store.transact(RNode.incrementNotFound(node.id)) + if (seen) store.transact(RNode.resetNotFound(node.id)) + else store.transact(RNode.incrementNotFound(node.id)) ) ) .compile @@ -67,9 +74,9 @@ object CheckNodesTask { } def removeNodes[F[_]]( - ctx: Context[F, _], + store: Store[F], cfg: HouseKeepingConfig.CheckNodes ): F[Int] = - ctx.store.transact(RNode.deleteNotFound(cfg.minNotFound)) + store.transact(RNode.deleteNotFound(cfg.minNotFound)) } diff --git a/modules/joex/src/main/scala/docspell/joex/hk/CleanupInvitesTask.scala b/modules/joex/src/main/scala/docspell/joex/hk/CleanupInvitesTask.scala index ae7b59e8..864a73d7 100644 --- a/modules/joex/src/main/scala/docspell/joex/hk/CleanupInvitesTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/hk/CleanupInvitesTask.scala @@ -10,22 +10,27 @@ import cats.effect._ import cats.implicits._ import docspell.common._ -import docspell.joex.scheduler.Task +import docspell.scheduler.Task +import docspell.store.Store import docspell.store.records._ object CleanupInvitesTask { - def apply[F[_]: Sync](cfg: HouseKeepingConfig.CleanupInvites): Task[F, Unit, Unit] = + def apply[F[_]: Sync]( + cfg: HouseKeepingConfig.CleanupInvites, + store: Store[F] + ): Task[F, Unit, CleanupResult] = Task { ctx => if (cfg.enabled) for { now <- Timestamp.current[F] ts = now - cfg.olderThan _ <- ctx.logger.info(s"Cleanup invitations older than $ts") - n <- ctx.store.transact(RInvitation.deleteOlderThan(ts)) + n <- store.transact(RInvitation.deleteOlderThan(ts)) _ <- ctx.logger.info(s"Removed $n invitations") - } yield () + } yield CleanupResult.of(n) else - ctx.logger.info("CleanupInvites task is disabled in the configuration") + ctx.logger.info("CleanupInvites task is disabled in the configuration") *> + CleanupResult.disabled.pure[F] } } diff --git a/modules/joex/src/main/scala/docspell/joex/hk/CleanupJobsTask.scala b/modules/joex/src/main/scala/docspell/joex/hk/CleanupJobsTask.scala index 4ae1c9e1..0a4ee43b 100644 --- a/modules/joex/src/main/scala/docspell/joex/hk/CleanupJobsTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/hk/CleanupJobsTask.scala @@ -11,24 +11,28 @@ import cats.implicits._ import fs2.Stream import docspell.common._ -import docspell.joex.scheduler.Task +import docspell.scheduler.Task import docspell.store.Store import docspell.store.records._ object CleanupJobsTask { - def apply[F[_]: Sync](cfg: HouseKeepingConfig.CleanupJobs): Task[F, Unit, Unit] = + def apply[F[_]: Sync]( + cfg: HouseKeepingConfig.CleanupJobs, + store: Store[F] + ): Task[F, Unit, CleanupResult] = Task { ctx => if (cfg.enabled) for { now <- Timestamp.current[F] ts = now - cfg.olderThan _ <- ctx.logger.info(s"Cleanup jobs older than $ts") - n <- deleteDoneJobs(ctx.store, ts, cfg.deleteBatch) + n <- deleteDoneJobs(store, ts, cfg.deleteBatch) _ <- ctx.logger.info(s"Removed $n jobs") - } yield () + } yield CleanupResult.of(n) else - ctx.logger.info("CleanupJobs task is disabled in the configuration") + ctx.logger.info("CleanupJobs task is disabled in the configuration") *> + CleanupResult.disabled.pure[F] } def deleteDoneJobs[F[_]: Sync](store: Store[F], ts: Timestamp, batch: Int): F[Int] = diff --git a/modules/joex/src/main/scala/docspell/joex/hk/CleanupRememberMeTask.scala b/modules/joex/src/main/scala/docspell/joex/hk/CleanupRememberMeTask.scala index b9cfdc98..574abfc7 100644 --- a/modules/joex/src/main/scala/docspell/joex/hk/CleanupRememberMeTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/hk/CleanupRememberMeTask.scala @@ -10,22 +10,26 @@ import cats.effect._ import cats.implicits._ import docspell.common._ -import docspell.joex.scheduler.Task +import docspell.scheduler.Task +import docspell.store.Store import docspell.store.records._ object CleanupRememberMeTask { - - def apply[F[_]: Sync](cfg: HouseKeepingConfig.CleanupRememberMe): Task[F, Unit, Unit] = + def apply[F[_]: Sync]( + cfg: HouseKeepingConfig.CleanupRememberMe, + store: Store[F] + ): Task[F, Unit, CleanupResult] = Task { ctx => if (cfg.enabled) for { now <- Timestamp.current[F] ts = now - cfg.olderThan _ <- ctx.logger.info(s"Cleanup remember-me tokens older than $ts") - n <- ctx.store.transact(RRememberMe.deleteOlderThan(ts)) + n <- store.transact(RRememberMe.deleteOlderThan(ts)) _ <- ctx.logger.info(s"Removed $n tokens") - } yield () + } yield CleanupResult.of(n) else - ctx.logger.info("CleanupRememberMe task is disabled in the configuration") + ctx.logger.info("CleanupRememberMe task is disabled in the configuration") *> + CleanupResult.disabled.pure[F] } } diff --git a/modules/joex/src/main/scala/docspell/joex/hk/CleanupResult.scala b/modules/joex/src/main/scala/docspell/joex/hk/CleanupResult.scala new file mode 100644 index 00000000..931f6714 --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/hk/CleanupResult.scala @@ -0,0 +1,21 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.joex.hk + +import io.circe.Encoder +import io.circe.generic.semiauto.deriveEncoder + +case class CleanupResult(removed: Int, disabled: Boolean) { + def asString = if (disabled) "disabled" else s"$removed" +} +object CleanupResult { + def of(n: Int): CleanupResult = CleanupResult(n, false) + def disabled: CleanupResult = CleanupResult(0, true) + + implicit val jsonEncoder: Encoder[CleanupResult] = + deriveEncoder +} diff --git a/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingConfig.scala b/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingConfig.scala index 0d6b4778..2b5bc355 100644 --- a/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingConfig.scala +++ b/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingConfig.scala @@ -16,7 +16,8 @@ case class HouseKeepingConfig( cleanupInvites: CleanupInvites, cleanupJobs: CleanupJobs, cleanupRememberMe: CleanupRememberMe, - checkNodes: CheckNodes + checkNodes: CheckNodes, + integrityCheck: IntegrityCheck ) object HouseKeepingConfig { @@ -29,4 +30,5 @@ object HouseKeepingConfig { case class CheckNodes(enabled: Boolean, minNotFound: Int) + case class IntegrityCheck(enabled: Boolean) } diff --git a/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingTask.scala b/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingTask.scala index e6d0a86b..0da26529 100644 --- a/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingTask.scala @@ -9,41 +9,75 @@ package docspell.joex.hk import cats.effect._ import cats.implicits._ +import docspell.backend.ops.OFileRepository import docspell.common._ import docspell.joex.Config -import docspell.joex.scheduler.Task -import docspell.store.records._ -import docspell.store.usertask.UserTaskScope +import docspell.joex.filecopy.FileIntegrityCheckTask +import docspell.scheduler.usertask.UserTask +import docspell.scheduler.{JobTaskResultEncoder, Task} +import docspell.store.Store import com.github.eikek.calev._ +import io.circe.Encoder +import io.circe.generic.semiauto.deriveEncoder object HouseKeepingTask { private val periodicId = Ident.unsafe("docspell-houskeeping") val taskName: Ident = Ident.unsafe("housekeeping") - def apply[F[_]: Async](cfg: Config): Task[F, Unit, Unit] = + def apply[F[_]: Async]( + cfg: Config, + store: Store[F], + fileRepo: OFileRepository[F] + ): Task[F, Unit, Result] = { + val combined = + ( + CheckNodesTask(cfg.houseKeeping.checkNodes, store), + CleanupInvitesTask(cfg.houseKeeping.cleanupInvites, store), + CleanupJobsTask(cfg.houseKeeping.cleanupJobs, store), + CleanupRememberMeTask(cfg.houseKeeping.cleanupRememberMe, store), + IntegrityCheckTask(cfg.houseKeeping.integrityCheck, store, fileRepo) + ).mapN(Result.apply) + Task .log[F, Unit](_.info(s"Running house-keeping task now")) - .flatMap(_ => CleanupInvitesTask(cfg.houseKeeping.cleanupInvites)) - .flatMap(_ => CleanupRememberMeTask(cfg.houseKeeping.cleanupRememberMe)) - .flatMap(_ => CleanupJobsTask(cfg.houseKeeping.cleanupJobs)) - .flatMap(_ => CheckNodesTask(cfg.houseKeeping.checkNodes)) + .flatMap(_ => combined) + } def onCancel[F[_]]: Task[F, Unit, Unit] = Task.log[F, Unit](_.warn("Cancelling house-keeping task")) - def periodicTask[F[_]: Sync](ce: CalEvent): F[RPeriodicTask] = - RPeriodicTask - .createJson( - true, - UserTaskScope(DocspellSystem.taskGroup), - taskName, - (), - "Docspell house-keeping", - Priority.Low, - ce, - None - ) - .map(_.copy(id = periodicId)) + def periodicTask[F[_]: Sync](ce: CalEvent): F[UserTask[Unit]] = + UserTask( + periodicId, + taskName, + true, + ce, + "Docspell house-keeping".some, + () + ).pure[F] + + case class Result( + checkNodes: CleanupResult, + cleanupInvites: CleanupResult, + cleanupJobs: CleanupResult, + cleanupRememberMe: CleanupResult, + integrityCheck: FileIntegrityCheckTask.Result + ) + + object Result { + implicit val jsonEncoder: Encoder[Result] = + deriveEncoder + + implicit val jobTaskResultEncoder: JobTaskResultEncoder[Result] = + JobTaskResultEncoder.fromJson[Result].withMessage { r => + s"- Nodes removed: ${r.checkNodes.asString}\n" + + s"- Invites removed: ${r.cleanupInvites.asString}\n" + + s"- Jobs removed: ${r.cleanupJobs.asString}\n" + + s"- RememberMe removed: ${r.cleanupRememberMe.asString}\n" + + s"- Integrity check: ok=${r.integrityCheck.ok}, failed=${r.integrityCheck.failedKeys.size}, notFound=${r.integrityCheck.notFoundKeys.size}" + } + + } } diff --git a/modules/joex/src/main/scala/docspell/joex/hk/IntegrityCheckTask.scala b/modules/joex/src/main/scala/docspell/joex/hk/IntegrityCheckTask.scala new file mode 100644 index 00000000..90a3a073 --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/hk/IntegrityCheckTask.scala @@ -0,0 +1,34 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.joex.hk + +import cats.effect._ +import cats.implicits._ + +import docspell.backend.ops.OFileRepository +import docspell.common._ +import docspell.joex.filecopy.FileIntegrityCheckTask +import docspell.scheduler.Task +import docspell.store.Store + +object IntegrityCheckTask { + + def apply[F[_]: Sync]( + cfg: HouseKeepingConfig.IntegrityCheck, + store: Store[F], + fileRepo: OFileRepository[F] + ): Task[F, Unit, FileIntegrityCheckTask.Result] = + Task { ctx => + if (cfg.enabled) + FileIntegrityCheckTask(fileRepo, store).run( + ctx.map(_ => FileIntegrityCheckArgs(FileKeyPart.Empty)) + ) + else + ctx.logger.info("Integrity check task is disabled in the configuration") *> + FileIntegrityCheckTask.Result.empty.pure[F] + } +} diff --git a/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala b/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala index 129afc5a..0de696c9 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala @@ -14,8 +14,9 @@ import docspell.analysis.TextAnalyser import docspell.backend.ops.OCollective import docspell.common._ import docspell.joex.Config -import docspell.joex.scheduler._ import docspell.logging.Logger +import docspell.scheduler._ +import docspell.store.Store import docspell.store.records.{RClassifierModel, RClassifierSetting} object LearnClassifierTask { @@ -29,14 +30,16 @@ object LearnClassifierTask { def apply[F[_]: Async]( cfg: Config.TextAnalysis, + store: Store[F], analyser: TextAnalyser[F] ): Task[F, Args, Unit] = - learnTags(cfg, analyser) - .flatMap(_ => learnItemEntities(cfg, analyser)) + learnTags(cfg, store, analyser) + .flatMap(_ => learnItemEntities(cfg, store, analyser)) .flatMap(_ => Task(_ => Sync[F].delay(System.gc()))) private def learnItemEntities[F[_]: Async]( cfg: Config.TextAnalysis, + store: Store[F], analyser: TextAnalyser[F] ): Task[F, Args, Unit] = Task { ctx => @@ -44,6 +47,7 @@ object LearnClassifierTask { LearnItemEntities .learnAll( analyser, + store, ctx.args.collective, cfg.classification.itemCount, cfg.maxLength @@ -54,16 +58,17 @@ object LearnClassifierTask { private def learnTags[F[_]: Async]( cfg: Config.TextAnalysis, + store: Store[F], analyser: TextAnalyser[F] ): Task[F, Args, Unit] = Task { ctx => val learnTags = for { - sett <- findActiveSettings[F](ctx, cfg) + sett <- findActiveSettings[F](ctx, store, cfg) maxItems = cfg.classification.itemCountOrWhenLower(sett.itemCount) _ <- OptionT.liftF( LearnTags - .learnAllTagCategories(analyser)( + .learnAllTagCategories(analyser, store)( ctx.args.collective, maxItems, cfg.maxLength @@ -74,34 +79,38 @@ object LearnClassifierTask { // learn classifier models from active tag categories learnTags.getOrElseF(logInactiveWarning(ctx.logger)) *> // delete classifier model files for categories that have been removed - clearObsoleteTagModels(ctx) *> + clearObsoleteTagModels(ctx, store) *> // when tags are deleted, categories may get removed. fix the json array - ctx.store + store .transact(RClassifierSetting.fixCategoryList(ctx.args.collective)) .map(_ => ()) } - private def clearObsoleteTagModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] = + private def clearObsoleteTagModels[F[_]: Sync]( + ctx: Context[F, Args], + store: Store[F] + ): F[Unit] = for { - list <- ctx.store.transact( + list <- store.transact( ClassifierName.findOrphanTagModels(ctx.args.collective) ) _ <- ctx.logger.info( s"Found ${list.size} obsolete model files that are deleted now." ) - n <- ctx.store.transact(RClassifierModel.deleteAll(list.map(_.id))) + n <- store.transact(RClassifierModel.deleteAll(list.map(_.id))) _ <- list .map(_.fileId) - .traverse(id => ctx.store.fileRepo.delete(id)) + .traverse(id => store.fileRepo.delete(id)) _ <- ctx.logger.debug(s"Deleted $n model files.") } yield () private def findActiveSettings[F[_]: Sync]( ctx: Context[F, Args], + store: Store[F], cfg: Config.TextAnalysis ): OptionT[F, OCollective.Classifier] = if (cfg.classification.enabled) - OptionT(ctx.store.transact(RClassifierSetting.findById(ctx.args.collective))) + OptionT(store.transact(RClassifierSetting.findById(ctx.args.collective))) .filter(_.autoTagEnabled) .map(OCollective.Classifier.fromRecord) else diff --git a/modules/joex/src/main/scala/docspell/joex/learn/LearnItemEntities.scala b/modules/joex/src/main/scala/docspell/joex/learn/LearnItemEntities.scala index b0d4f181..19048abd 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/LearnItemEntities.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/LearnItemEntities.scala @@ -14,72 +14,81 @@ import fs2.Stream import docspell.analysis.TextAnalyser import docspell.analysis.classifier.TextClassifier.Data import docspell.common._ -import docspell.joex.scheduler._ +import docspell.scheduler._ +import docspell.store.Store object LearnItemEntities { def learnAll[F[_]: Async, A]( analyser: TextAnalyser[F], + store: Store[F], collective: Ident, maxItems: Int, maxTextLen: Int ): Task[F, A, Unit] = - learnCorrOrg(analyser, collective, maxItems, maxTextLen) - .flatMap(_ => learnCorrPerson[F, A](analyser, collective, maxItems, maxTextLen)) - .flatMap(_ => learnConcPerson(analyser, collective, maxItems, maxTextLen)) - .flatMap(_ => learnConcEquip(analyser, collective, maxItems, maxTextLen)) + learnCorrOrg[F, A](analyser, store, collective, maxItems, maxTextLen) + .flatMap(_ => + learnCorrPerson[F, A](analyser, store, collective, maxItems, maxTextLen) + ) + .flatMap(_ => learnConcPerson(analyser, store, collective, maxItems, maxTextLen)) + .flatMap(_ => learnConcEquip(analyser, store, collective, maxItems, maxTextLen)) def learnCorrOrg[F[_]: Async, A]( analyser: TextAnalyser[F], + store: Store[F], collective: Ident, maxItems: Int, maxTextLen: Int ): Task[F, A, Unit] = - learn(analyser, collective)( + learn(store, analyser, collective)( ClassifierName.correspondentOrg, - ctx => SelectItems.forCorrOrg(ctx.store, collective, maxItems, maxTextLen) + _ => SelectItems.forCorrOrg(store, collective, maxItems, maxTextLen) ) def learnCorrPerson[F[_]: Async, A]( analyser: TextAnalyser[F], + store: Store[F], collective: Ident, maxItems: Int, maxTextLen: Int ): Task[F, A, Unit] = - learn(analyser, collective)( + learn(store, analyser, collective)( ClassifierName.correspondentPerson, - ctx => SelectItems.forCorrPerson(ctx.store, collective, maxItems, maxTextLen) + _ => SelectItems.forCorrPerson(store, collective, maxItems, maxTextLen) ) def learnConcPerson[F[_]: Async, A]( analyser: TextAnalyser[F], + store: Store[F], collective: Ident, maxItems: Int, maxTextLen: Int ): Task[F, A, Unit] = - learn(analyser, collective)( + learn(store, analyser, collective)( ClassifierName.concernedPerson, - ctx => SelectItems.forConcPerson(ctx.store, collective, maxItems, maxTextLen) + _ => SelectItems.forConcPerson(store, collective, maxItems, maxTextLen) ) def learnConcEquip[F[_]: Async, A]( analyser: TextAnalyser[F], + store: Store[F], collective: Ident, maxItems: Int, maxTextLen: Int ): Task[F, A, Unit] = - learn(analyser, collective)( + learn(store, analyser, collective)( ClassifierName.concernedEquip, - ctx => SelectItems.forConcEquip(ctx.store, collective, maxItems, maxTextLen) + _ => SelectItems.forConcEquip(store, collective, maxItems, maxTextLen) ) private def learn[F[_]: Async, A]( + store: Store[F], analyser: TextAnalyser[F], collective: Ident )(cname: ClassifierName, data: Context[F, _] => Stream[F, Data]): Task[F, A, Unit] = Task { ctx => ctx.logger.info(s"Learn classifier ${cname.name}") *> analyser.classifier.trainClassifier(ctx.logger, data(ctx))( - Kleisli(StoreClassifierModel.handleModel(ctx, collective, cname)) + Kleisli(StoreClassifierModel.handleModel(store, ctx.logger, collective, cname)) ) } } diff --git a/modules/joex/src/main/scala/docspell/joex/learn/LearnTags.scala b/modules/joex/src/main/scala/docspell/joex/learn/LearnTags.scala index 732a38af..e80fe83c 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/LearnTags.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/LearnTags.scala @@ -12,13 +12,15 @@ import cats.implicits._ import docspell.analysis.TextAnalyser import docspell.common._ -import docspell.joex.scheduler._ +import docspell.scheduler._ +import docspell.store.Store import docspell.store.records.RClassifierSetting object LearnTags { def learnTagCategory[F[_]: Async, A]( analyser: TextAnalyser[F], + store: Store[F], collective: Ident, maxItems: Int, maxTextLen: Int @@ -26,12 +28,14 @@ object LearnTags { category: String ): Task[F, A, Unit] = Task { ctx => - val data = SelectItems.forCategory(ctx, collective)(maxItems, category, maxTextLen) + val data = + SelectItems.forCategory(store, collective)(maxItems, category, maxTextLen) ctx.logger.info(s"Learn classifier for tag category: $category") *> analyser.classifier.trainClassifier(ctx.logger, data)( Kleisli( StoreClassifierModel.handleModel( - ctx, + store, + ctx.logger, collective, ClassifierName.tagCategory(category) ) @@ -39,15 +43,15 @@ object LearnTags { ) } - def learnAllTagCategories[F[_]: Async, A](analyser: TextAnalyser[F])( + def learnAllTagCategories[F[_]: Async, A](analyser: TextAnalyser[F], store: Store[F])( collective: Ident, maxItems: Int, maxTextLen: Int ): Task[F, A, Unit] = Task { ctx => for { - cats <- ctx.store.transact(RClassifierSetting.getActiveCategories(collective)) - task = learnTagCategory[F, A](analyser, collective, maxItems, maxTextLen) _ + cats <- store.transact(RClassifierSetting.getActiveCategories(collective)) + task = learnTagCategory[F, A](analyser, store, collective, maxItems, maxTextLen) _ _ <- cats.map(task).traverse(_.run(ctx)) } yield () } diff --git a/modules/joex/src/main/scala/docspell/joex/learn/SelectItems.scala b/modules/joex/src/main/scala/docspell/joex/learn/SelectItems.scala index fa99354d..a54d7f7e 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/SelectItems.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/SelectItems.scala @@ -10,7 +10,6 @@ import fs2.{Pipe, Stream} import docspell.analysis.classifier.TextClassifier.Data import docspell.common._ -import docspell.joex.scheduler.Context import docspell.store.Store import docspell.store.qb.Batch import docspell.store.queries.{QItem, TextAndTag} @@ -21,16 +20,7 @@ object SelectItems { val pageSep = LearnClassifierTask.pageSep val noClass = LearnClassifierTask.noClass - def forCategory[F[_]](ctx: Context[F, _], collective: Ident)( - maxItems: Int, - category: String, - maxTextLen: Int - ): Stream[F, Data] = - forCategory(ctx.store, collective, maxItems, category, maxTextLen) - - def forCategory[F[_]]( - store: Store[F], - collective: Ident, + def forCategory[F[_]](store: Store[F], collective: Ident)( maxItems: Int, category: String, maxTextLen: Int diff --git a/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala b/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala index e0e7eabc..9d5aafe9 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala @@ -12,7 +12,6 @@ import fs2.io.file.Files import docspell.analysis.classifier.ClassifierModel import docspell.common._ -import docspell.joex.scheduler._ import docspell.logging.Logger import docspell.store.Store import docspell.store.records.RClassifierModel @@ -20,21 +19,12 @@ import docspell.store.records.RClassifierModel object StoreClassifierModel { def handleModel[F[_]: Async]( - ctx: Context[F, _], + store: Store[F], + logger: Logger[F], collective: Ident, modelName: ClassifierName )( trainedModel: ClassifierModel - ): F[Unit] = - handleModel(ctx.store, ctx.logger)(collective, modelName, trainedModel) - - def handleModel[F[_]: Async]( - store: Store[F], - logger: Logger[F] - )( - collective: Ident, - modelName: ClassifierName, - trainedModel: ClassifierModel ): F[Unit] = for { oldFile <- store.transact( diff --git a/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala index 9b2db148..66450864 100644 --- a/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala +++ b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala @@ -70,7 +70,7 @@ object ReadMail { HtmlBodyViewConfig.default.copy( textToHtml = MarkdownBody.makeHtml(markdownCfg) ) - ).map(makeHtmlBinary[F] _).map(b => Some(b)) + ).map(makeHtmlBinary[F]).map(b => Some(b)) } for { diff --git a/modules/joex/src/main/scala/docspell/joex/notify/PeriodicDueItemsTask.scala b/modules/joex/src/main/scala/docspell/joex/notify/PeriodicDueItemsTask.scala index 8ef5bc33..94db119b 100644 --- a/modules/joex/src/main/scala/docspell/joex/notify/PeriodicDueItemsTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/notify/PeriodicDueItemsTask.scala @@ -12,14 +12,15 @@ import cats.implicits._ import docspell.backend.ops.ONotification import docspell.common._ -import docspell.joex.scheduler.Context -import docspell.joex.scheduler.Task import docspell.notification.api.EventContext import docspell.notification.api.NotificationChannel import docspell.notification.api.PeriodicDueItemsArgs import docspell.query.Date import docspell.query.ItemQuery._ import docspell.query.ItemQueryDsl._ +import docspell.scheduler.Context +import docspell.scheduler.Task +import docspell.store.Store import docspell.store.qb.Batch import docspell.store.queries.ListItem import docspell.store.queries.{QItem, Query} @@ -32,11 +33,14 @@ object PeriodicDueItemsTask { def onCancel[F[_]]: Task[F, Args, Unit] = Task.log(_.warn(s"Cancelling ${taskName.id} task")) - def apply[F[_]: Sync](notificationOps: ONotification[F]): Task[F, Args, Unit] = + def apply[F[_]: Sync]( + store: Store[F], + notificationOps: ONotification[F] + ): Task[F, Args, Unit] = Task { ctx => val limit = 7 Timestamp.current[F].flatMap { now => - withItems(ctx, limit, now) { items => + withItems(ctx, store, limit, now) { items => withEventContext(ctx, items, limit, now) { eventCtx => withChannel(ctx, notificationOps) { channels => notificationOps.sendMessage(ctx.logger, eventCtx, channels) @@ -51,7 +55,12 @@ object PeriodicDueItemsTask { ): F[Unit] = TaskOperations.withChannel(ctx.logger, ctx.args.channels, ctx.args.account, ops)(cont) - def withItems[F[_]: Sync](ctx: Context[F, Args], limit: Int, now: Timestamp)( + def withItems[F[_]: Sync]( + ctx: Context[F, Args], + store: Store[F], + limit: Int, + now: Timestamp + )( cont: Vector[ListItem] => F[Unit] ): F[Unit] = { val rightDate = Date((now + Duration.days(ctx.args.remindDays.toLong)).toMillis) @@ -77,7 +86,7 @@ object PeriodicDueItemsTask { for { res <- - ctx.store + store .transact( QItem .findItems(q, now.toUtcDate, 0, Batch.limit(limit)) diff --git a/modules/joex/src/main/scala/docspell/joex/notify/PeriodicQueryTask.scala b/modules/joex/src/main/scala/docspell/joex/notify/PeriodicQueryTask.scala index 088f65ed..af1242cd 100644 --- a/modules/joex/src/main/scala/docspell/joex/notify/PeriodicQueryTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/notify/PeriodicQueryTask.scala @@ -13,8 +13,6 @@ import cats.implicits._ import docspell.backend.ops.ONotification import docspell.common._ -import docspell.joex.scheduler.Context -import docspell.joex.scheduler.Task import docspell.notification.api.EventContext import docspell.notification.api.NotificationChannel import docspell.notification.api.PeriodicQueryArgs @@ -22,6 +20,9 @@ import docspell.query.ItemQuery import docspell.query.ItemQuery.Expr import docspell.query.ItemQuery.Expr.AndExpr import docspell.query.ItemQueryParser +import docspell.scheduler.Context +import docspell.scheduler.Task +import docspell.store.Store import docspell.store.qb.Batch import docspell.store.queries.ListItem import docspell.store.queries.{QItem, Query} @@ -36,11 +37,14 @@ object PeriodicQueryTask { def onCancel[F[_]]: Task[F, Args, Unit] = Task.log(_.warn(s"Cancelling ${taskName.id} task")) - def apply[F[_]: Sync](notificationOps: ONotification[F]): Task[F, Args, Unit] = + def apply[F[_]: Sync]( + store: Store[F], + notificationOps: ONotification[F] + ): Task[F, Args, Unit] = Task { ctx => val limit = 7 Timestamp.current[F].flatMap { now => - withItems(ctx, limit, now) { items => + withItems(ctx, store, limit, now) { items => withEventContext(ctx, items, limit, now) { eventCtx => withChannel(ctx, notificationOps) { channels => notificationOps.sendMessage(ctx.logger, eventCtx, channels) @@ -58,9 +62,11 @@ object PeriodicQueryTask { private def queryString(q: ItemQuery.Expr) = ItemQueryParser.asString(q) - def withQuery[F[_]: Sync](ctx: Context[F, Args])(cont: Query => F[Unit]): F[Unit] = { + def withQuery[F[_]: Sync](ctx: Context[F, Args], store: Store[F])( + cont: Query => F[Unit] + ): F[Unit] = { def fromBookmark(id: String) = - ctx.store + store .transact(RQueryBookmark.findByNameOrId(ctx.args.account, id)) .map(_.map(_.query)) .flatTap(q => @@ -68,7 +74,7 @@ object PeriodicQueryTask { ) def fromShare(id: String) = - ctx.store + store .transact(RShare.findOneByCollective(ctx.args.account.collective, Some(true), id)) .map(_.map(_.query)) .flatTap(q => @@ -120,11 +126,16 @@ object PeriodicQueryTask { } } - def withItems[F[_]: Sync](ctx: Context[F, Args], limit: Int, now: Timestamp)( + def withItems[F[_]: Sync]( + ctx: Context[F, Args], + store: Store[F], + limit: Int, + now: Timestamp + )( cont: Vector[ListItem] => F[Unit] ): F[Unit] = - withQuery(ctx) { query => - val items = ctx.store + withQuery(ctx, store) { query => + val items = store .transact(QItem.findItems(query, now.toUtcDate, 0, Batch.limit(limit))) .compile .to(Vector) diff --git a/modules/joex/src/main/scala/docspell/joex/pagecount/AllPageCountTask.scala b/modules/joex/src/main/scala/docspell/joex/pagecount/AllPageCountTask.scala index c882d582..f6f91346 100644 --- a/modules/joex/src/main/scala/docspell/joex/pagecount/AllPageCountTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/pagecount/AllPageCountTask.scala @@ -13,22 +13,24 @@ import fs2.{Chunk, Stream} import docspell.backend.JobFactory import docspell.backend.ops.OJoex import docspell.common._ -import docspell.joex.scheduler.Context -import docspell.joex.scheduler.Task -import docspell.store.queue.JobQueue +import docspell.scheduler._ +import docspell.store.Store import docspell.store.records.RAttachment -import docspell.store.records.RJob object AllPageCountTask { val taskName = Ident.unsafe("all-page-count") type Args = Unit - def apply[F[_]: Sync](queue: JobQueue[F], joex: OJoex[F]): Task[F, Args, Unit] = + def apply[F[_]: Sync]( + store: Store[F], + jobStore: JobStore[F], + joex: OJoex[F] + ): Task[F, Args, Unit] = Task { ctx => for { _ <- ctx.logger.info("Generating previews for attachments") - n <- submitConversionJobs(ctx, queue) + n <- submitConversionJobs(ctx, store, jobStore) _ <- ctx.logger.info(s"Submitted $n jobs") _ <- joex.notifyAllNodes } yield () @@ -39,14 +41,15 @@ object AllPageCountTask { def submitConversionJobs[F[_]: Sync]( ctx: Context[F, Args], - queue: JobQueue[F] + store: Store[F], + jobStore: JobStore[F] ): F[Int] = - ctx.store + store .transact(findAttachments) .chunks .flatMap(createJobs[F]) .chunks - .evalMap(jobs => queue.insertAllIfNew(jobs.toVector).map(_ => jobs.size)) + .evalMap(jobs => jobStore.insertAllIfNew(jobs.toVector).map(_ => jobs.size)) .evalTap(n => ctx.logger.debug(s"Submitted $n jobs …")) .compile .foldMonoid @@ -54,28 +57,25 @@ object AllPageCountTask { private def findAttachments[F[_]] = RAttachment.findAllWithoutPageCount(50) - private def createJobs[F[_]: Sync](ras: Chunk[RAttachment]): Stream[F, RJob] = { - def mkJob(ra: RAttachment): F[RJob] = + private def createJobs[F[_]: Sync](ras: Chunk[RAttachment]): Stream[F, Job[String]] = { + def mkJob(ra: RAttachment): F[Job[MakePageCountArgs]] = JobFactory.makePageCount(MakePageCountArgs(ra.id), None) val jobs = ras.traverse(mkJob) - Stream.evalUnChunk(jobs) + Stream.evalUnChunk(jobs).map(_.encode) } - def job[F[_]: Sync]: F[RJob] = - for { - id <- Ident.randomId[F] - now <- Timestamp.current[F] - } yield RJob.newJob( - id, - AllPageCountTask.taskName, - DocspellSystem.taskGroup, - (), - "Create all page-counts", - now, - DocspellSystem.taskGroup, - Priority.Low, - Some(DocspellSystem.allPageCountTaskTracker) - ) + def job[F[_]: Sync]: F[Job[String]] = + Job + .createNew( + AllPageCountTask.taskName, + DocspellSystem.taskGroup, + (), + "Create all page-counts", + DocspellSystem.taskGroup, + Priority.Low, + Some(DocspellSystem.allPageCountTaskTracker) + ) + .map(_.encode) } diff --git a/modules/joex/src/main/scala/docspell/joex/pagecount/PageCountTask.scala b/modules/joex/src/main/scala/docspell/joex/pagecount/MakePageCountTask.scala similarity index 68% rename from modules/joex/src/main/scala/docspell/joex/pagecount/PageCountTask.scala rename to modules/joex/src/main/scala/docspell/joex/pagecount/MakePageCountTask.scala index fcbaaa79..2769ac00 100644 --- a/modules/joex/src/main/scala/docspell/joex/pagecount/PageCountTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/pagecount/MakePageCountTask.scala @@ -11,8 +11,9 @@ import cats.implicits._ import docspell.common._ import docspell.joex.process.AttachmentPageCount -import docspell.joex.scheduler.Context -import docspell.joex.scheduler.Task +import docspell.scheduler.Context +import docspell.scheduler.Task +import docspell.store.Store import docspell.store.records.RAttachment import docspell.store.records.RAttachmentMeta @@ -20,10 +21,10 @@ object MakePageCountTask { type Args = MakePageCountArgs - def apply[F[_]: Sync](): Task[F, Args, Unit] = + def apply[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] = Task { ctx => for { - exists <- pageCountExists(ctx) + exists <- pageCountExists(ctx, store) _ <- if (exists) ctx.logger.info( @@ -32,7 +33,7 @@ object MakePageCountTask { else ctx.logger.info( s"Reading page-count for attachment ${ctx.args.attachment}" - ) *> generatePageCount(ctx) + ) *> generatePageCount(ctx, store) } yield () } @@ -40,19 +41,20 @@ object MakePageCountTask { Task.log(_.warn("Cancelling make-page-count task")) private def generatePageCount[F[_]: Sync]( - ctx: Context[F, Args] + ctx: Context[F, Args], + store: Store[F] ): F[Unit] = for { - ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment)) + ra <- store.transact(RAttachment.findById(ctx.args.attachment)) _ <- ra - .map(AttachmentPageCount.createPageCount(ctx)) + .map(AttachmentPageCount.createPageCount(ctx, store)) .getOrElse( ctx.logger.warn(s"No attachment found with id: ${ctx.args.attachment}") ) } yield () - private def pageCountExists[F[_]](ctx: Context[F, Args]): F[Boolean] = - ctx.store.transact( + private def pageCountExists[F[_]](ctx: Context[F, Args], store: Store[F]): F[Boolean] = + store.transact( RAttachmentMeta .findPageCountById(ctx.args.attachment) .map(_.exists(_ > 0)) diff --git a/modules/joex/src/main/scala/docspell/joex/pdfconv/ConvertAllPdfTask.scala b/modules/joex/src/main/scala/docspell/joex/pdfconv/ConvertAllPdfTask.scala index 1df9fcf4..84d3687d 100644 --- a/modules/joex/src/main/scala/docspell/joex/pdfconv/ConvertAllPdfTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/pdfconv/ConvertAllPdfTask.scala @@ -12,10 +12,9 @@ import fs2.{Chunk, Stream} import docspell.backend.ops.OJoex import docspell.common._ -import docspell.joex.scheduler.{Context, Task} -import docspell.store.queue.JobQueue +import docspell.scheduler._ +import docspell.store.Store import docspell.store.records.RAttachment -import docspell.store.records._ /* A task to find all non-converted pdf files (of a collective, or * all) and converting them using ocrmypdf by submitting a job for @@ -24,11 +23,15 @@ import docspell.store.records._ object ConvertAllPdfTask { type Args = ConvertAllPdfArgs - def apply[F[_]: Sync](queue: JobQueue[F], joex: OJoex[F]): Task[F, Args, Unit] = + def apply[F[_]: Sync]( + jobStore: JobStore[F], + joex: OJoex[F], + store: Store[F] + ): Task[F, Args, Unit] = Task { ctx => for { _ <- ctx.logger.info("Converting pdfs using ocrmypdf") - n <- submitConversionJobs(ctx, queue) + n <- submitConversionJobs(ctx, store, jobStore) _ <- ctx.logger.info(s"Submitted $n file conversion jobs") _ <- joex.notifyAllNodes } yield () @@ -39,40 +42,36 @@ object ConvertAllPdfTask { def submitConversionJobs[F[_]: Sync]( ctx: Context[F, Args], - queue: JobQueue[F] + store: Store[F], + jobStore: JobStore[F] ): F[Int] = - ctx.store + store .transact(RAttachment.findNonConvertedPdf(ctx.args.collective, 50)) .chunks .flatMap(createJobs[F](ctx)) .chunks - .evalMap(jobs => queue.insertAllIfNew(jobs.toVector).map(_ => jobs.size)) + .evalMap(jobs => jobStore.insertAllIfNew(jobs.toVector).map(_ => jobs.size)) .evalTap(n => ctx.logger.debug(s"Submitted $n jobs …")) .compile .foldMonoid private def createJobs[F[_]: Sync]( ctx: Context[F, Args] - )(ras: Chunk[RAttachment]): Stream[F, RJob] = { + )(ras: Chunk[RAttachment]): Stream[F, Job[String]] = { val collectiveOrSystem = ctx.args.collective.getOrElse(DocspellSystem.taskGroup) - def mkJob(ra: RAttachment): F[RJob] = - for { - id <- Ident.randomId[F] - now <- Timestamp.current[F] - } yield RJob.newJob( - id, + def mkJob(ra: RAttachment): F[Job[PdfConvTask.Args]] = + Job.createNew( PdfConvTask.taskName, collectiveOrSystem, PdfConvTask.Args(ra.id), s"Convert pdf ${ra.id.id}/${ra.name.getOrElse("-")}", - now, collectiveOrSystem, Priority.Low, Some(PdfConvTask.taskName / ra.id) ) val jobs = ras.traverse(mkJob) - Stream.evalUnChunk(jobs) + Stream.evalUnChunk(jobs).map(_.encode) } } diff --git a/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala b/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala index ca82615a..a1ce38fb 100644 --- a/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala @@ -16,7 +16,8 @@ import docspell.common._ import docspell.convert.ConversionResult import docspell.convert.extern.OcrMyPdf import docspell.joex.Config -import docspell.joex.scheduler.{Context, Task} +import docspell.scheduler.{Context, Task} +import docspell.store.Store import docspell.store.records._ import io.circe.generic.semiauto._ @@ -36,12 +37,12 @@ object PdfConvTask { val taskName = Ident.unsafe("pdf-files-migration") - def apply[F[_]: Async](cfg: Config): Task[F, Args, Unit] = + def apply[F[_]: Async](cfg: Config, store: Store[F]): Task[F, Args, Unit] = Task { ctx => for { _ <- ctx.logger.info(s"Converting pdf file ${ctx.args} using ocrmypdf") - meta <- checkInputs(cfg, ctx) - _ <- meta.traverse(fm => convert(cfg, ctx, fm)) + meta <- checkInputs(cfg, ctx, store) + _ <- meta.traverse(fm => convert(cfg, ctx, store, fm)) } yield () } @@ -53,19 +54,20 @@ object PdfConvTask { // check if file exists and if it is pdf and if source id is the same and if ocrmypdf is enabled def checkInputs[F[_]: Sync]( cfg: Config, - ctx: Context[F, Args] + ctx: Context[F, Args], + store: Store[F] ): F[Option[RFileMeta]] = { val none: Option[RFileMeta] = None val checkSameFiles = (for { - ra <- OptionT(ctx.store.transact(RAttachment.findById(ctx.args.attachId))) + ra <- OptionT(store.transact(RAttachment.findById(ctx.args.attachId))) isSame <- OptionT.liftF( - ctx.store.transact(RAttachmentSource.isSameFile(ra.id, ra.fileId)) + store.transact(RAttachmentSource.isSameFile(ra.id, ra.fileId)) ) } yield isSame).getOrElse(false) val existsPdf = for { - meta <- ctx.store.transact(RAttachment.findMeta(ctx.args.attachId)) + meta <- store.transact(RAttachment.findMeta(ctx.args.attachId)) res = meta.filter(_.mimetype.matches(MimeType.pdf)) _ <- if (res.isEmpty) @@ -90,18 +92,19 @@ object PdfConvTask { def convert[F[_]: Async]( cfg: Config, ctx: Context[F, Args], + store: Store[F], in: RFileMeta ): F[Unit] = { - val fs = ctx.store.fileRepo + val fs = store.fileRepo val data = fs.getBytes(in.id) val storeResult: ConversionResult.Handler[F, Unit] = Kleisli { case ConversionResult.SuccessPdf(file) => - storeToAttachment(ctx, in, file) + storeToAttachment(ctx, store, in, file) case ConversionResult.SuccessPdfTxt(file, _) => - storeToAttachment(ctx, in, file) + storeToAttachment(ctx, store, in, file) case ConversionResult.UnsupportedFormat(mime) => ctx.logger.warn( @@ -124,19 +127,20 @@ object PdfConvTask { )(data, storeResult) for { - lang <- getLanguage(ctx) + lang <- getLanguage(ctx, store) _ <- ocrMyPdf(lang) } yield () } - def getLanguage[F[_]: Sync](ctx: Context[F, Args]): F[Language] = + def getLanguage[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): F[Language] = (for { - coll <- OptionT(ctx.store.transact(RCollective.findByAttachment(ctx.args.attachId))) + coll <- OptionT(store.transact(RCollective.findByAttachment(ctx.args.attachId))) lang = coll.language } yield lang).getOrElse(Language.German) def storeToAttachment[F[_]: Sync]( ctx: Context[F, Args], + store: Store[F], meta: RFileMeta, newFile: Stream[F, Byte] ): F[Unit] = { @@ -146,10 +150,10 @@ object PdfConvTask { for { fid <- newFile - .through(ctx.store.fileRepo.save(collective, cat, mimeHint)) + .through(store.fileRepo.save(collective, cat, mimeHint)) .compile .lastOrError - _ <- ctx.store.transact(RAttachment.updateFileId(ctx.args.attachId, fid)) + _ <- store.transact(RAttachment.updateFileId(ctx.args.attachId, fid)) } yield () } } diff --git a/modules/joex/src/main/scala/docspell/joex/preview/AllPreviewsTask.scala b/modules/joex/src/main/scala/docspell/joex/preview/AllPreviewsTask.scala index 4021b67a..b0988305 100644 --- a/modules/joex/src/main/scala/docspell/joex/preview/AllPreviewsTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/preview/AllPreviewsTask.scala @@ -14,21 +14,23 @@ import docspell.backend.JobFactory import docspell.backend.ops.OJoex import docspell.common.MakePreviewArgs.StoreMode import docspell.common._ -import docspell.joex.scheduler.Context -import docspell.joex.scheduler.Task -import docspell.store.queue.JobQueue +import docspell.scheduler._ +import docspell.store.Store import docspell.store.records.RAttachment -import docspell.store.records.RJob object AllPreviewsTask { type Args = AllPreviewsArgs - def apply[F[_]: Sync](queue: JobQueue[F], joex: OJoex[F]): Task[F, Args, Unit] = + def apply[F[_]: Sync]( + jobStore: JobStore[F], + joex: OJoex[F], + store: Store[F] + ): Task[F, Args, Unit] = Task { ctx => for { _ <- ctx.logger.info("Generating previews for attachments") - n <- submitConversionJobs(ctx, queue) + n <- submitConversionJobs(ctx, store, jobStore) _ <- ctx.logger.info(s"Submitted $n jobs") _ <- joex.notifyAllNodes } yield () @@ -39,14 +41,17 @@ object AllPreviewsTask { def submitConversionJobs[F[_]: Sync]( ctx: Context[F, Args], - queue: JobQueue[F] + store: Store[F], + jobStore: JobStore[F] ): F[Int] = - ctx.store + store .transact(findAttachments(ctx)) .chunks .flatMap(createJobs[F](ctx)) .chunks - .evalMap(jobs => queue.insertAllIfNew(jobs.toVector).map(_ => jobs.size)) + .evalMap(jobs => + jobStore.insertAllIfNew(jobs.map(_.encode).toVector).map(_ => jobs.size) + ) .evalTap(n => ctx.logger.debug(s"Submitted $n jobs …")) .compile .foldMonoid @@ -61,13 +66,13 @@ object AllPreviewsTask { private def createJobs[F[_]: Sync]( ctx: Context[F, Args] - )(ras: Chunk[RAttachment]): Stream[F, RJob] = { + )(ras: Chunk[RAttachment]): Stream[F, Job[MakePreviewArgs]] = { val collectiveOrSystem = { val cid = ctx.args.collective.getOrElse(DocspellSystem.taskGroup) AccountId(cid, DocspellSystem.user) } - def mkJob(ra: RAttachment): F[RJob] = + def mkJob(ra: RAttachment): F[Job[MakePreviewArgs]] = JobFactory.makePreview( MakePreviewArgs(ra.id, ctx.args.storeMode), collectiveOrSystem.some @@ -77,7 +82,10 @@ object AllPreviewsTask { Stream.evalUnChunk(jobs) } - def job[F[_]: Sync](storeMode: MakePreviewArgs.StoreMode, cid: Option[Ident]): F[RJob] = - JobFactory.allPreviews(AllPreviewsArgs(cid, storeMode), None) + def job[F[_]: Sync]( + storeMode: MakePreviewArgs.StoreMode, + cid: Option[Ident] + ): F[Job[String]] = + JobFactory.allPreviews(AllPreviewsArgs(cid, storeMode), None).map(_.encode) } diff --git a/modules/joex/src/main/scala/docspell/joex/preview/MakePreviewTask.scala b/modules/joex/src/main/scala/docspell/joex/preview/MakePreviewTask.scala index 14bf18f3..ce807205 100644 --- a/modules/joex/src/main/scala/docspell/joex/preview/MakePreviewTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/preview/MakePreviewTask.scala @@ -13,8 +13,9 @@ import docspell.common._ import docspell.extract.pdfbox.PdfboxPreview import docspell.extract.pdfbox.PreviewConfig import docspell.joex.process.AttachmentPreview -import docspell.joex.scheduler.Context -import docspell.joex.scheduler.Task +import docspell.scheduler.Context +import docspell.scheduler.Task +import docspell.store.Store import docspell.store.records.RAttachment import docspell.store.records.RAttachmentPreview @@ -22,10 +23,10 @@ object MakePreviewTask { type Args = MakePreviewArgs - def apply[F[_]: Sync](pcfg: PreviewConfig): Task[F, Args, Unit] = + def apply[F[_]: Sync](pcfg: PreviewConfig, store: Store[F]): Task[F, Args, Unit] = Task { ctx => for { - exists <- previewExists(ctx) + exists <- previewExists(ctx, store) preview <- PdfboxPreview(pcfg) _ <- if (exists) @@ -35,7 +36,7 @@ object MakePreviewTask { else ctx.logger.info( s"Generating preview image for attachment ${ctx.args.attachment}" - ) *> generatePreview(ctx, preview) + ) *> generatePreview(ctx, store, preview) } yield () } @@ -44,20 +45,24 @@ object MakePreviewTask { private def generatePreview[F[_]: Sync]( ctx: Context[F, Args], + store: Store[F], preview: PdfboxPreview[F] ): F[Unit] = for { - ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment)) + ra <- store.transact(RAttachment.findById(ctx.args.attachment)) _ <- ra - .map(AttachmentPreview.createPreview(ctx, preview)) + .map(AttachmentPreview.createPreview(ctx, store, preview)) .getOrElse( ctx.logger.error(s"No attachment found with id: ${ctx.args.attachment}") ) } yield () - private def previewExists[F[_]: Sync](ctx: Context[F, Args]): F[Boolean] = + private def previewExists[F[_]: Sync]( + ctx: Context[F, Args], + store: Store[F] + ): F[Boolean] = if (ctx.args.store == MakePreviewArgs.StoreMode.WhenMissing) - ctx.store.transact( + store.transact( RAttachmentPreview.findById(ctx.args.attachment).map(_.isDefined) ) else diff --git a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala index cce9400d..7c2880aa 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala @@ -15,7 +15,8 @@ import fs2.Stream import docspell.common._ import docspell.extract.pdfbox.PdfMetaData import docspell.extract.pdfbox.PdfboxExtract -import docspell.joex.scheduler._ +import docspell.scheduler._ +import docspell.store.Store import docspell.store.records.RAttachment import docspell.store.records._ @@ -24,7 +25,7 @@ import docspell.store.records._ */ object AttachmentPageCount { - def apply[F[_]: Sync]()( + def apply[F[_]: Sync](store: Store[F])( item: ItemData ): Task[F, ProcessItemArgs, ItemData] = Task { ctx => @@ -33,7 +34,7 @@ object AttachmentPageCount { s"Retrieving page count for ${item.attachments.size} files…" ) _ <- item.attachments - .traverse(createPageCount(ctx)) + .traverse(createPageCount(ctx, store)) .attempt .flatMap { case Right(_) => ().pure[F] @@ -46,14 +47,15 @@ object AttachmentPageCount { } def createPageCount[F[_]: Sync]( - ctx: Context[F, _] + ctx: Context[F, _], + store: Store[F] )(ra: RAttachment): F[Option[PdfMetaData]] = - findMime[F](ctx)(ra).flatMap { + findMime[F](store)(ra).flatMap { case MimeType.PdfMatch(_) => - PdfboxExtract.getMetaData(loadFile(ctx)(ra)).flatMap { + PdfboxExtract.getMetaData(loadFile(store)(ra)).flatMap { case Right(md) => ctx.logger.debug(s"Found number of pages: ${md.pageCount}") *> - updatePageCount(ctx, md, ra).map(_.some) + updatePageCount(ctx, store, md, ra).map(_.some) case Left(ex) => ctx.logger.warn(s"Error obtaining pages count: ${ex.getMessage}") *> (None: Option[PdfMetaData]).pure[F] @@ -66,6 +68,7 @@ object AttachmentPageCount { private def updatePageCount[F[_]: Sync]( ctx: Context[F, _], + store: Store[F], md: PdfMetaData, ra: RAttachment ): F[PdfMetaData] = @@ -73,12 +76,12 @@ object AttachmentPageCount { _ <- ctx.logger.debug( s"Update attachment ${ra.id.id} with page count ${md.pageCount.some}" ) - n <- ctx.store.transact(RAttachmentMeta.updatePageCount(ra.id, md.pageCount.some)) + n <- store.transact(RAttachmentMeta.updatePageCount(ra.id, md.pageCount.some)) m <- if (n == 0) ctx.logger.warn( s"No attachmentmeta record exists for ${ra.id.id}. Creating new." - ) *> ctx.store.transact( + ) *> store.transact( RAttachmentMeta.insert( RAttachmentMeta( ra.id, @@ -94,11 +97,11 @@ object AttachmentPageCount { _ <- ctx.logger.debug(s"Stored page count (${n + m}).") } yield md - def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = - OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) + def findMime[F[_]: Functor](store: Store[F])(ra: RAttachment): F[MimeType] = + OptionT(store.transact(RFileMeta.findById(ra.fileId))) .map(_.mimetype) .getOrElse(MimeType.octetStream) - def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] = - ctx.store.fileRepo.getBytes(ra.fileId) + def loadFile[F[_]](store: Store[F])(ra: RAttachment): Stream[F, Byte] = + store.fileRepo.getBytes(ra.fileId) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala index 2a55775d..31c1e007 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala @@ -15,7 +15,8 @@ import fs2.Stream import docspell.common._ import docspell.extract.pdfbox.PdfboxPreview import docspell.extract.pdfbox.PreviewConfig -import docspell.joex.scheduler._ +import docspell.scheduler._ +import docspell.store.Store import docspell.store.queries.QAttachment import docspell.store.records.RAttachment import docspell.store.records._ @@ -26,7 +27,7 @@ import docspell.store.records._ */ object AttachmentPreview { - def apply[F[_]: Sync](pcfg: PreviewConfig)( + def apply[F[_]: Sync](pcfg: PreviewConfig, store: Store[F])( item: ItemData ): Task[F, ProcessItemArgs, ItemData] = Task { ctx => @@ -36,7 +37,7 @@ object AttachmentPreview { ) preview <- PdfboxPreview(pcfg) _ <- item.attachments - .traverse(createPreview(ctx, preview)) + .traverse(createPreview(ctx, store, preview)) .attempt .flatMap { case Right(_) => ().pure[F] @@ -50,16 +51,17 @@ object AttachmentPreview { def createPreview[F[_]: Sync]( ctx: Context[F, _], + store: Store[F], preview: PdfboxPreview[F] )( ra: RAttachment ): F[Option[RAttachmentPreview]] = - findMime[F](ctx)(ra).flatMap { + findMime[F](store)(ra).flatMap { case MimeType.PdfMatch(_) => - preview.previewPNG(loadFile(ctx)(ra)).flatMap { + preview.previewPNG(loadFile(store)(ra)).flatMap { case Some(out) => ctx.logger.debug("Preview generated, saving to database…") *> - createRecord(ctx, ra.fileId.collective, out, ra).map(_.some) + createRecord(store, ra.fileId.collective, out, ra).map(_.some) case None => ctx.logger .info(s"Preview could not be generated. Maybe the pdf has no pages?") *> @@ -72,7 +74,7 @@ object AttachmentPreview { } private def createRecord[F[_]: Sync]( - ctx: Context[F, _], + store: Store[F], collective: Ident, png: Stream[F, Byte], ra: RAttachment @@ -83,7 +85,7 @@ object AttachmentPreview { for { fileId <- png .through( - ctx.store.fileRepo.save( + store.fileRepo.save( collective, FileCategory.PreviewImage, MimeTypeHint(name.map(_.fullName), Some("image/png")) @@ -93,16 +95,16 @@ object AttachmentPreview { .lastOrError now <- Timestamp.current[F] rp = RAttachmentPreview(ra.id, fileId, name.map(_.fullName), now) - _ <- QAttachment.deletePreview(ctx.store)(ra.id) - _ <- ctx.store.transact(RAttachmentPreview.insert(rp)) + _ <- QAttachment.deletePreview(store)(ra.id) + _ <- store.transact(RAttachmentPreview.insert(rp)) } yield rp } - def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = - OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) + def findMime[F[_]: Functor](store: Store[F])(ra: RAttachment): F[MimeType] = + OptionT(store.transact(RFileMeta.findById(ra.fileId))) .map(_.mimetype) .getOrElse(MimeType.octetStream) - def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] = - ctx.store.fileRepo.getBytes(ra.fileId) + def loadFile[F[_]](store: Store[F])(ra: RAttachment): Stream[F, Byte] = + store.fileRepo.getBytes(ra.fileId) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala index 73754127..648d6b29 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala @@ -17,7 +17,8 @@ import docspell.convert.ConversionResult.Handler import docspell.convert.SanitizeHtml import docspell.convert._ import docspell.joex.extract.JsoupSanitizer -import docspell.joex.scheduler._ +import docspell.scheduler._ +import docspell.store.Store import docspell.store.records._ /** Goes through all attachments and creates a PDF version of it where supported. @@ -36,21 +37,22 @@ object ConvertPdf { def apply[F[_]: Async]( cfg: ConvertConfig, + store: Store[F], item: ItemData ): Task[F, Args, ItemData] = Task { ctx => def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] = - isConverted(ctx)(ra).flatMap { + isConverted(store)(ra).flatMap { case true if ctx.args.isNormalProcessing => ctx.logger.info( s"Conversion to pdf already done for attachment ${ra.name}." ) *> - ctx.store + store .transact(RAttachmentMeta.findById(ra.id)) .map(rmOpt => (ra, rmOpt)) case _ => - findMime(ctx)(ra).flatMap(m => - convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m) + findMime(store)(ra).flatMap(m => + convertSafe(cfg, JsoupSanitizer.clean, ctx, store, item)(ra, m) ) } @@ -62,13 +64,15 @@ object ConvertPdf { } - def isConverted[F[_]](ctx: Context[F, Args])( + def isConverted[F[_]](store: Store[F])( ra: RAttachment ): F[Boolean] = - ctx.store.transact(RAttachmentSource.isConverted(ra.id)) + store.transact(RAttachmentSource.isConverted(ra.id)) - def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = - OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) + def findMime[F[_]: Functor](store: Store[F])( + ra: RAttachment + ): F[MimeType] = + OptionT(store.transact(RFileMeta.findById(ra.fileId))) .map(_.mimetype) .getOrElse(MimeType.octetStream) @@ -76,14 +80,15 @@ object ConvertPdf { cfg: ConvertConfig, sanitizeHtml: SanitizeHtml, ctx: Context[F, Args], + store: Store[F], item: ItemData )(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] = - loadCollectivePasswords(ctx).flatMap(collPass => + loadCollectivePasswords(ctx, store).flatMap(collPass => Conversion.create[F](cfg, sanitizeHtml, collPass, ctx.logger).use { conv => mime match { case mt => - val data = ctx.store.fileRepo.getBytes(ra.fileId) - val handler = conversionHandler[F](ctx, cfg, ra, item) + val data = store.fileRepo.getBytes(ra.fileId) + val handler = conversionHandler[F](ctx, store, cfg, ra, item) ctx.logger .info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *> conv.toPDF(DataType(mt), ctx.args.meta.language, handler)( @@ -94,14 +99,16 @@ object ConvertPdf { ) private def loadCollectivePasswords[F[_]: Async]( - ctx: Context[F, Args] + ctx: Context[F, Args], + store: Store[F] ): F[List[Password]] = - ctx.store + store .transact(RCollectivePassword.findAll(ctx.args.meta.collective)) .map(_.map(_.password).distinct) private def conversionHandler[F[_]: Sync]( ctx: Context[F, Args], + store: Store[F], cfg: ConvertConfig, ra: RAttachment, item: ItemData @@ -109,12 +116,12 @@ object ConvertPdf { Kleisli { case ConversionResult.SuccessPdf(pdf) => ctx.logger.info(s"Conversion to pdf successful. Saving file.") *> - storePDF(ctx, cfg, ra, pdf) + storePDF(ctx, store, cfg, ra, pdf) .map(r => (r, None)) case ConversionResult.SuccessPdfTxt(pdf, txt) => ctx.logger.info(s"Conversion to pdf+txt successful. Saving file.") *> - storePDF(ctx, cfg, ra, pdf) + storePDF(ctx, store, cfg, ra, pdf) .flatMap(r => txt.map(t => ( @@ -148,6 +155,7 @@ object ConvertPdf { private def storePDF[F[_]: Sync]( ctx: Context[F, Args], + store: Store[F], cfg: ConvertConfig, ra: RAttachment, pdf: Stream[F, Byte] @@ -162,7 +170,7 @@ object ConvertPdf { pdf .through( - ctx.store.fileRepo.save( + store.fileRepo.save( ctx.args.meta.collective, FileCategory.AttachmentConvert, MimeTypeHint(hint.filename, hint.advertised) @@ -170,32 +178,33 @@ object ConvertPdf { ) .compile .lastOrError - .flatMap(fmId => updateAttachment[F](ctx, ra, fmId, newName).map(_ => fmId)) + .flatMap(fmId => updateAttachment[F](ctx, store, ra, fmId, newName).map(_ => fmId)) .map(fmId => ra.copy(fileId = fmId, name = newName)) } private def updateAttachment[F[_]: Sync]( ctx: Context[F, _], + store: Store[F], ra: RAttachment, fmId: FileKey, newName: Option[String] ): F[Unit] = for { - oldFile <- ctx.store.transact(RAttachment.findById(ra.id)) + oldFile <- store.transact(RAttachment.findById(ra.id)) _ <- - ctx.store + store .transact(RAttachment.updateFileIdAndName(ra.id, fmId, newName)) _ <- oldFile match { case Some(raPrev) => for { sameFile <- - ctx.store + store .transact(RAttachmentSource.isSameFile(ra.id, raPrev.fileId)) _ <- if (sameFile) ().pure[F] else ctx.logger.info("Deleting previous attachment file") *> - ctx.store.fileRepo + store.fileRepo .delete(raPrev.fileId) .attempt .flatMap { diff --git a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala index 092c8495..b5ae352b 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala @@ -13,7 +13,8 @@ import cats.implicits._ import fs2.Stream import docspell.common._ -import docspell.joex.scheduler.{Context, Task} +import docspell.scheduler.{Context, Task} +import docspell.store.Store import docspell.store.file.FileMetadata import docspell.store.queries.QItem import docspell.store.records._ @@ -21,13 +22,13 @@ import docspell.store.records._ /** Task that creates the item. */ object CreateItem { - def apply[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] = - findExisting[F].flatMap { + def apply[F[_]: Sync](store: Store[F]): Task[F, ProcessItemArgs, ItemData] = + findExisting[F](store).flatMap { case Some(ri) => Task.pure(ri) - case None => createNew[F] + case None => createNew[F](store) } - def createNew[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] = + def createNew[F[_]: Sync](store: Store[F]): Task[F, ProcessItemArgs, ItemData] = Task { ctx => def isValidFile(fm: FileMetadata) = ctx.args.meta.validFileTypes.isEmpty || @@ -36,11 +37,11 @@ object CreateItem { def fileMetas(itemId: Ident, now: Timestamp) = Stream - .eval(ctx.store.transact(RAttachment.nextPosition(itemId))) + .eval(store.transact(RAttachment.nextPosition(itemId))) .flatMap { offset => Stream .emits(ctx.args.files) - .evalMap(f => ctx.store.fileRepo.findMeta(f.fileMetaId).map(fm => (f, fm))) + .evalMap(f => store.fileRepo.findMeta(f.fileMetaId).map(fm => (f, fm))) .collect { case (f, Some(fm)) if isValidFile(fm) => f } .zipWithIndex .evalMap { case (f, index) => @@ -67,11 +68,11 @@ object CreateItem { (for { _ <- OptionT.liftF( ctx.logger.info( - s"Loading item with id ${id.id} to ammend" + s"Loading item with id ${id.id} to amend" ) ) item <- OptionT( - ctx.store + store .transact(RItem.findByIdAndCollective(id, ctx.args.meta.collective)) ) } yield (1, item)) @@ -88,7 +89,7 @@ object CreateItem { ctx.args.meta.direction.getOrElse(Direction.Incoming), ItemState.Premature ) - n <- ctx.store.transact(RItem.insert(item)) + n <- store.transact(RItem.insert(item)) } yield (n, item) } @@ -98,7 +99,7 @@ object CreateItem { _ <- if (it._1 != 1) storeItemError[F](ctx) else ().pure[F] now <- Timestamp.current[F] fm <- fileMetas(it._2.id, now) - k <- fm.traverse(insertAttachment(ctx)) + k <- fm.traverse(insertAttachment(store)) _ <- logDifferences(ctx, fm, k.sum) dur <- time _ <- ctx.logger.info(s"Creating item finished in ${dur.formatExact}") @@ -115,25 +116,27 @@ object CreateItem { ) } - def insertAttachment[F[_]](ctx: Context[F, _])(ra: RAttachment): F[Int] = { + def insertAttachment[F[_]](store: Store[F])(ra: RAttachment): F[Int] = { val rs = RAttachmentSource.of(ra) - ctx.store.transact(for { + store.transact(for { n <- RAttachment.insert(ra) _ <- RAttachmentSource.insert(rs) } yield n) } - private def findExisting[F[_]: Sync]: Task[F, ProcessItemArgs, Option[ItemData]] = + private def findExisting[F[_]: Sync]( + store: Store[F] + ): Task[F, ProcessItemArgs, Option[ItemData]] = Task { ctx => val states = ItemState.invalidStates val fileMetaIds = ctx.args.files.map(_.fileMetaId).toSet for { - cand <- ctx.store.transact(QItem.findByFileIds(fileMetaIds.toSeq, states)) + cand <- store.transact(QItem.findByFileIds(fileMetaIds.toSeq, states)) _ <- if (cand.nonEmpty) ctx.logger.warn(s"Found ${cand.size} existing item with these files.") else ().pure[F] - ht <- cand.drop(1).traverse(ri => QItem.delete(ctx.store)(ri.id, ri.cid)) + ht <- cand.drop(1).traverse(ri => QItem.delete(store)(ri.id, ri.cid)) _ <- if (ht.sum > 0) ctx.logger.warn(s"Removed ${ht.sum} items with same attachments") @@ -144,7 +147,7 @@ object CreateItem { OptionT( // load attachments but only those mentioned in the task's arguments cand.headOption.traverse(ri => - ctx.store + store .transact(RAttachment.findByItemCollectiveSource(ri.id, ri.cid, fids)) .flatTap(ats => ctx.logger.debug( @@ -156,7 +159,7 @@ object CreateItem { ) .getOrElse(Vector.empty) orig <- rms.traverse(a => - ctx.store.transact(RAttachmentSource.findById(a.id)).map(s => (a, s)) + store.transact(RAttachmentSource.findById(a.id)).map(s => (a, s)) ) origMap = orig diff --git a/modules/joex/src/main/scala/docspell/joex/process/CrossCheckProposals.scala b/modules/joex/src/main/scala/docspell/joex/process/CrossCheckProposals.scala index 3cd9d3ad..58f24854 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/CrossCheckProposals.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/CrossCheckProposals.scala @@ -12,8 +12,9 @@ import cats.effect.Sync import cats.implicits._ import docspell.common._ -import docspell.joex.scheduler.Task import docspell.logging.Logger +import docspell.scheduler.Task +import docspell.store.Store /** After candidates have been determined, the set is reduced by doing some cross checks. * For example: if a organization is suggested as correspondent, the correspondent person @@ -22,13 +23,15 @@ import docspell.logging.Logger */ object CrossCheckProposals { - def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = + def apply[F[_]: Sync]( + store: Store[F] + )(data: ItemData): Task[F, ProcessItemArgs, ItemData] = Task { ctx => val proposals = data.finalProposals val corrOrg = proposals.find(MetaProposalType.CorrOrg) (for { orgRef <- OptionT.fromOption[F](corrOrg) - persRefs <- OptionT.liftF(EvalProposals.findOrganizationRelation(data, ctx)) + persRefs <- OptionT.liftF(EvalProposals.findOrganizationRelation(data, store)) clProps <- OptionT.liftF( personOrgCheck[F](ctx.logger, data.classifyProposals, persRefs)(orgRef) ) @@ -53,7 +56,7 @@ object CrossCheckProposals { mpl.find(MetaProposalType.CorrPerson) match { case Some(ppl) => val list = ppl.values.filter(c => - persRefs.get(c.ref.id).exists(_.organization == Some(orgId)) + persRefs.get(c.ref.id).exists(_.organization.contains(orgId)) ) if (ppl.values.toList == list) mpl.pure[F] diff --git a/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala b/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala index 41fcd0e3..ed0035b3 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala @@ -10,7 +10,8 @@ import cats.effect._ import cats.implicits._ import docspell.common._ -import docspell.joex.scheduler.{Context, Task} +import docspell.scheduler.{Context, Task} +import docspell.store.Store import docspell.store.queries.QItem import docspell.store.records.RFileMeta import docspell.store.records.RJob @@ -20,46 +21,52 @@ import doobie._ object DuplicateCheck { type Args = ProcessItemArgs - def apply[F[_]: Sync]: Task[F, Args, Args] = + def apply[F[_]: Sync](store: Store[F]): Task[F, Args, Args] = Task { ctx => if (ctx.args.meta.skipDuplicate) for { - retries <- getRetryCount(ctx) + retries <- getRetryCount(ctx, store) res <- if (retries == 0) - ctx.logger.debug("Checking for duplicate files") *> removeDuplicates(ctx) + ctx.logger + .debug("Checking for duplicate files") *> removeDuplicates(ctx, store) else ctx.args.pure[F] } yield res else ctx.logger.debug("Not checking for duplicates") *> ctx.args.pure[F] } - def removeDuplicates[F[_]: Sync](ctx: Context[F, Args]): F[ProcessItemArgs] = + def removeDuplicates[F[_]: Sync]( + ctx: Context[F, Args], + store: Store[F] + ): F[ProcessItemArgs] = for { - fileMetas <- findDuplicates(ctx) - _ <- fileMetas.traverse(deleteDuplicate(ctx)) + fileMetas <- findDuplicates(ctx, store) + _ <- fileMetas.traverse(deleteDuplicate(ctx, store)) ids = fileMetas.filter(_.exists).map(_.fm.id).toSet } yield ctx.args.copy(files = ctx.args.files.filterNot(f => ids.contains(f.fileMetaId)) ) - private def getRetryCount[F[_]: Sync](ctx: Context[F, Args]): F[Int] = - ctx.store.transact(RJob.getRetries(ctx.jobId)).map(_.getOrElse(0)) + private def getRetryCount[F[_]: Sync](ctx: Context[F, _], store: Store[F]): F[Int] = + store.transact(RJob.getRetries(ctx.jobId)).map(_.getOrElse(0)) private def deleteDuplicate[F[_]: Sync]( - ctx: Context[F, Args] + ctx: Context[F, Args], + store: Store[F] )(fd: FileMetaDupes): F[Unit] = { val fname = ctx.args.files.find(_.fileMetaId == fd.fm.id).flatMap(_.name) if (fd.exists) ctx.logger - .info(s"Deleting duplicate file $fname!") *> ctx.store.fileRepo + .info(s"Deleting duplicate file $fname!") *> store.fileRepo .delete(fd.fm.id) else ().pure[F] } private def findDuplicates[F[_]]( - ctx: Context[F, Args] + ctx: Context[F, Args], + store: Store[F] ): F[Vector[FileMetaDupes]] = - ctx.store.transact(for { + store.transact(for { fileMetas <- RFileMeta.findByIds(ctx.args.files.map(_.fileMetaId)) dupes <- fileMetas.traverse(checkDuplicate(ctx)) } yield dupes) diff --git a/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala b/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala index 38c129b1..521aeb5b 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala @@ -12,25 +12,28 @@ import cats.effect.Sync import cats.implicits._ import docspell.common._ -import docspell.joex.scheduler.{Context, Task} +import docspell.scheduler.Task +import docspell.store.Store import docspell.store.records.{RAttachmentMeta, RPerson} /** Calculate weights for candidates that adds the most likely candidate a lower number. */ object EvalProposals { - def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = - Task { ctx => + def apply[F[_]: Sync]( + store: Store[F] + )(data: ItemData): Task[F, ProcessItemArgs, ItemData] = + Task { _ => for { now <- Timestamp.current[F] - personRefs <- findOrganizationRelation[F](data, ctx) + personRefs <- findOrganizationRelation[F](data, store) metas = data.metas.map(calcCandidateWeight(now.toUtcDate, personRefs)) } yield data.copy(metas = metas) } def findOrganizationRelation[F[_]: Sync]( data: ItemData, - ctx: Context[F, _] + store: Store[F] ): F[Map[Ident, PersonRef]] = { val corrPersIds = data.metas .map(_.proposals) @@ -38,7 +41,7 @@ object EvalProposals { .flatMap(_.find(MetaProposalType.CorrPerson)) .flatMap(_.values.toList.map(_.ref.id)) .toSet - ctx.store + store .transact(RPerson.findOrganization(corrPersIds)) .map(_.map(p => (p.id, p)).toMap) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala index 17f90b59..5e2d86b0 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala @@ -18,7 +18,8 @@ import fs2.Stream import docspell.common._ import docspell.files.Zip import docspell.joex.mail._ -import docspell.joex.scheduler._ +import docspell.scheduler._ +import docspell.store.Store import docspell.store.records._ import emil.Mail @@ -34,39 +35,41 @@ import emil.Mail object ExtractArchive { type Args = ProcessItemArgs - def apply[F[_]: Async]( + def apply[F[_]: Async](store: Store[F])( item: ItemData ): Task[F, Args, ItemData] = - multiPass(item, None).map(_._2) + multiPass(store, item, None).map(_._2) def multiPass[F[_]: Async]( + store: Store[F], item: ItemData, archive: Option[RAttachmentArchive] ): Task[F, Args, (Option[RAttachmentArchive], ItemData)] = - singlePass(item, archive).flatMap { t => + singlePass(store, item, archive).flatMap { t => if (t._1.isEmpty) Task.pure(t) - else multiPass(t._2, t._1) + else multiPass(store, t._2, t._1) } def singlePass[F[_]: Async]( + store: Store[F], item: ItemData, archive: Option[RAttachmentArchive] ): Task[F, Args, (Option[RAttachmentArchive], ItemData)] = Task { ctx => def extract(ra: RAttachment, pos: Int): F[Extracted] = - findMime(ctx)(ra).flatMap(m => extractSafe(ctx, archive)(ra, pos, m)) + findMime(store)(ra).flatMap(m => extractSafe(ctx, store, archive)(ra, pos, m)) for { - lastPos <- ctx.store.transact(RAttachment.nextPosition(item.item.id)) + lastPos <- store.transact(RAttachment.nextPosition(item.item.id)) extracts <- item.attachments.zipWithIndex .traverse(t => extract(t._1, lastPos + t._2)) .map(Monoid[Extracted].combineAll) .map(fixPositions) nra = extracts.files - _ <- extracts.files.traverse(storeAttachment(ctx)) + _ <- extracts.files.traverse(storeAttachment(store)) naa = extracts.archives - _ <- naa.traverse(storeArchive(ctx)) + _ <- naa.traverse(storeArchive(store)) } yield naa.headOption -> item.copy( attachments = nra, originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap, @@ -83,25 +86,26 @@ object ExtractArchive { if (extract.archives.isEmpty) extract else extract.updatePositions - def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = - OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) + def findMime[F[_]: Functor](store: Store[F])(ra: RAttachment): F[MimeType] = + OptionT(store.transact(RFileMeta.findById(ra.fileId))) .map(_.mimetype) .getOrElse(MimeType.octetStream) def extractSafe[F[_]: Async]( ctx: Context[F, Args], + store: Store[F], archive: Option[RAttachmentArchive] )(ra: RAttachment, pos: Int, mime: MimeType): F[Extracted] = mime match { case MimeType.ZipMatch(_) if ra.name.exists(_.toLowerCase.endsWith(".zip")) => ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("<noname>")}.") *> - extractZip(ctx, archive)(ra, pos) - .flatMap(cleanupParents(ctx, ra, archive)) + extractZip(ctx, store, archive)(ra, pos) + .flatMap(cleanupParents(ctx, store, ra, archive)) case MimeType.EmailMatch(_) => ctx.logger.info(s"Reading e-mail ${ra.name.getOrElse("<noname>")}") *> - extractMail(ctx, archive)(ra, pos) - .flatMap(cleanupParents(ctx, ra, archive)) + extractMail(ctx, store, archive)(ra, pos) + .flatMap(cleanupParents(ctx, store, ra, archive)) case _ => ctx.logger.debug(s"Not an archive: ${mime.asString}") *> @@ -110,6 +114,7 @@ object ExtractArchive { def cleanupParents[F[_]: Sync]( ctx: Context[F, _], + store: Store[F], ra: RAttachment, archive: Option[RAttachmentArchive] )(extracted: Extracted): F[Extracted] = @@ -119,30 +124,31 @@ object ExtractArchive { _ <- ctx.logger.debug( s"Extracted inner attachment ${ra.name}. Remove it completely." ) - _ <- ctx.store.transact(RAttachmentArchive.delete(ra.id)) - _ <- ctx.store.transact(RAttachment.delete(ra.id)) - _ <- ctx.store.fileRepo.delete(ra.fileId) + _ <- store.transact(RAttachmentArchive.delete(ra.id)) + _ <- store.transact(RAttachment.delete(ra.id)) + _ <- store.fileRepo.delete(ra.fileId) } yield extracted case None => for { _ <- ctx.logger.debug( s"Extracted attachment ${ra.name}. Remove it from the item." ) - _ <- ctx.store.transact(RAttachment.delete(ra.id)) + _ <- store.transact(RAttachment.delete(ra.id)) } yield extracted.copy(files = extracted.files.filter(_.id != ra.id)) } def extractZip[F[_]: Async]( ctx: Context[F, Args], + store: Store[F], archive: Option[RAttachmentArchive] )(ra: RAttachment, pos: Int): F[Extracted] = { - val zipData = ctx.store.fileRepo.getBytes(ra.fileId) + val zipData = store.fileRepo.getBytes(ra.fileId) val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *> zipData .through(Zip.unzipP[F](8192, glob)) .zipWithIndex - .flatMap(handleEntry(ctx, ra, pos, archive, None)) + .flatMap(handleEntry(ctx, store, ra, pos, archive, None)) .foldMonoid .compile .lastOrError @@ -150,9 +156,10 @@ object ExtractArchive { def extractMail[F[_]: Async]( ctx: Context[F, Args], + store: Store[F], archive: Option[RAttachmentArchive] )(ra: RAttachment, pos: Int): F[Extracted] = { - val email: Stream[F, Byte] = ctx.store.fileRepo.getBytes(ra.fileId) + val email: Stream[F, Byte] = store.fileRepo.getBytes(ra.fileId) val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) val attachOnly = ctx.args.meta.attachmentsOnly.getOrElse(false) @@ -170,7 +177,9 @@ object ExtractArchive { ReadMail .mailToEntries(ctx.logger, glob, attachOnly)(mail) .zipWithIndex - .flatMap(handleEntry(ctx, ra, pos, archive, mId)) ++ Stream.eval(givenMeta) + .flatMap(handleEntry(ctx, store, ra, pos, archive, mId)) ++ Stream.eval( + givenMeta + ) } .foldMonoid .compile @@ -185,6 +194,7 @@ object ExtractArchive { def handleEntry[F[_]: Sync]( ctx: Context[F, Args], + store: Store[F], ra: RAttachment, pos: Int, archive: Option[RAttachmentArchive], @@ -195,7 +205,7 @@ object ExtractArchive { val (entry, subPos) = tentry val mimeHint = MimeTypeHint.filename(entry.name).withAdvertised(entry.mime.asString) val fileId = entry.data.through( - ctx.store.fileRepo + store.fileRepo .save(ctx.args.meta.collective, FileCategory.AttachmentSource, mimeHint) ) @@ -217,16 +227,16 @@ object ExtractArchive { } - def storeAttachment[F[_]: Sync](ctx: Context[F, _])(ra: RAttachment): F[Int] = { - val insert = CreateItem.insertAttachment(ctx)(ra) + def storeAttachment[F[_]: Sync](store: Store[F])(ra: RAttachment): F[Int] = { + val insert = CreateItem.insertAttachment(store)(ra) for { - n1 <- ctx.store.transact(RAttachment.updatePosition(ra.id, ra.position)) + n1 <- store.transact(RAttachment.updatePosition(ra.id, ra.position)) n2 <- if (n1 > 0) 0.pure[F] else insert } yield n1 + n2 } - def storeArchive[F[_]](ctx: Context[F, _])(aa: RAttachmentArchive): F[Int] = - ctx.store.transact(RAttachmentArchive.insert(aa)) + def storeArchive[F[_]](store: Store[F])(aa: RAttachmentArchive): F[Int] = + store.transact(RAttachmentArchive.insert(aa)) case class Extracted( files: Vector[RAttachment], diff --git a/modules/joex/src/main/scala/docspell/joex/process/FindProposal.scala b/modules/joex/src/main/scala/docspell/joex/process/FindProposal.scala index a6898f51..b5cf948a 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/FindProposal.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/FindProposal.scala @@ -16,7 +16,8 @@ import docspell.analysis.contact._ import docspell.common.MetaProposal.Candidate import docspell.common._ import docspell.joex.Config -import docspell.joex.scheduler.{Context, Task} +import docspell.scheduler.{Context, Task} +import docspell.store.Store import docspell.store.records._ /** Super simple approach to find corresponding meta data to an item by looking up values @@ -26,7 +27,8 @@ object FindProposal { type Args = ProcessItemArgs def apply[F[_]: Sync]( - cfg: Config.TextAnalysis + cfg: Config.TextAnalysis, + store: Store[F] )(data: ItemData): Task[F, Args, ItemData] = Task { ctx => val rmas = data.metas.map(rm => rm.copy(nerlabels = removeDuplicates(rm.nerlabels))) @@ -34,15 +36,16 @@ object FindProposal { _ <- ctx.logger.info("Starting find-proposal") rmv <- rmas .traverse(rm => - processAttachment(cfg, rm, data.findDates(rm), ctx) + processAttachment(cfg, rm, data.findDates(rm), ctx, store) .map(ml => rm.copy(proposals = ml)) ) - clp <- lookupClassifierProposals(ctx, data.classifyProposals) + clp <- lookupClassifierProposals(ctx, store, data.classifyProposals) } yield data.copy(metas = rmv, classifyProposals = clp) } def lookupClassifierProposals[F[_]: Sync]( ctx: Context[F, Args], + store: Store[F], mpList: MetaProposalList ): F[MetaProposalList] = { val coll = ctx.args.meta.collective @@ -50,7 +53,7 @@ object FindProposal { def lookup(mp: MetaProposal): F[Option[IdRef]] = mp.proposalType match { case MetaProposalType.CorrOrg => - ctx.store + store .transact( ROrganization .findLike(coll, mp.values.head.ref.name.toLowerCase, OrgUse.notDisabled) @@ -60,7 +63,7 @@ object FindProposal { ctx.logger.debug(s"Found classifier organization for $mp: $oref") ) case MetaProposalType.CorrPerson => - ctx.store + store .transact( RPerson .findLike( @@ -74,7 +77,7 @@ object FindProposal { ctx.logger.debug(s"Found classifier corr-person for $mp: $oref") ) case MetaProposalType.ConcPerson => - ctx.store + store .transact( RPerson .findLike( @@ -88,7 +91,7 @@ object FindProposal { ctx.logger.debug(s"Found classifier conc-person for $mp: $oref") ) case MetaProposalType.ConcEquip => - ctx.store + store .transact( REquipment .findLike( @@ -123,9 +126,10 @@ object FindProposal { cfg: Config.TextAnalysis, rm: RAttachmentMeta, rd: Vector[NerDateLabel], - ctx: Context[F, ProcessItemArgs] + ctx: Context[F, Args], + store: Store[F] ): F[MetaProposalList] = { - val finder = Finder.searchExact(ctx).next(Finder.searchFuzzy(ctx)) + val finder = Finder.searchExact(ctx, store).next(Finder.searchFuzzy(ctx, store)) List(finder.find(rm.nerlabels), makeDateProposal(cfg, rd)) .traverse(identity) .map(MetaProposalList.flatten) @@ -215,19 +219,24 @@ object FindProposal { def unit[F[_]: Applicative](value: MetaProposalList): Finder[F] = _ => value.pure[F] - def searchExact[F[_]: Sync](ctx: Context[F, ProcessItemArgs]): Finder[F] = + def searchExact[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): Finder[F] = labels => - labels.toList.traverse(nl => search(nl, true, ctx)).map(MetaProposalList.flatten) + labels.toList + .traverse(nl => search(nl, true, ctx, store)) + .map(MetaProposalList.flatten) - def searchFuzzy[F[_]: Sync](ctx: Context[F, ProcessItemArgs]): Finder[F] = + def searchFuzzy[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): Finder[F] = labels => - labels.toList.traverse(nl => search(nl, false, ctx)).map(MetaProposalList.flatten) + labels.toList + .traverse(nl => search(nl, false, ctx, store)) + .map(MetaProposalList.flatten) } private def search[F[_]: Sync]( nt: NerLabel, exact: Boolean, - ctx: Context[F, ProcessItemArgs] + ctx: Context[F, ProcessItemArgs], + store: Store[F] ): F[MetaProposalList] = { val value = if (exact) normalizeSearchValue(nt.label) @@ -243,7 +252,7 @@ object FindProposal { nt.tag match { case NerTag.Organization => ctx.logger.debug(s"Looking for organizations: $value") *> - ctx.store + store .transact( ROrganization .findLike(ctx.args.meta.collective, value, OrgUse.notDisabled) @@ -251,20 +260,20 @@ object FindProposal { .map(MetaProposalList.from(MetaProposalType.CorrOrg, nt)) case NerTag.Person => - val s1 = ctx.store + val s1 = store .transact( RPerson .findLike(ctx.args.meta.collective, value, PersonUse.concerningAndBoth) ) .map(MetaProposalList.from(MetaProposalType.ConcPerson, nt)) - val s2 = ctx.store + val s2 = store .transact( RPerson .findLike(ctx.args.meta.collective, value, PersonUse.correspondentAndBoth) ) .map(MetaProposalList.from(MetaProposalType.CorrPerson, nt)) val s3 = - ctx.store + store .transact( ROrganization .findLike(ctx.args.meta.collective, value, OrgUse.notDisabled) @@ -283,7 +292,7 @@ object FindProposal { case NerTag.Misc => ctx.logger.debug(s"Looking for equipments: $value") *> - ctx.store + store .transact( REquipment .findLike(ctx.args.meta.collective, value, EquipmentUse.notDisabled) @@ -291,7 +300,7 @@ object FindProposal { .map(MetaProposalList.from(MetaProposalType.ConcEquip, nt)) case NerTag.Email => - searchContact(nt, ContactKind.Email, value, ctx) + searchContact(nt, ContactKind.Email, value, ctx, store) case NerTag.Website => if (!exact) { @@ -301,9 +310,9 @@ object FindProposal { .map(_.toPrimaryDomain.asString) .map(s => s"%$s%") .getOrElse(value) - searchContact(nt, ContactKind.Website, searchString, ctx) + searchContact(nt, ContactKind.Website, searchString, ctx, store) } else - searchContact(nt, ContactKind.Website, value, ctx) + searchContact(nt, ContactKind.Website, value, ctx, store) case NerTag.Date => // There is no database search required for this tag @@ -315,18 +324,19 @@ object FindProposal { nt: NerLabel, kind: ContactKind, value: String, - ctx: Context[F, ProcessItemArgs] + ctx: Context[F, ProcessItemArgs], + store: Store[F] ): F[MetaProposalList] = { - val orgs = ctx.store + val orgs = store .transact(ROrganization.findLike(ctx.args.meta.collective, kind, value)) .map(MetaProposalList.from(MetaProposalType.CorrOrg, nt)) - val corrP = ctx.store + val corrP = store .transact( RPerson .findLike(ctx.args.meta.collective, kind, value, PersonUse.correspondentAndBoth) ) .map(MetaProposalList.from(MetaProposalType.CorrPerson, nt)) - val concP = ctx.store + val concP = store .transact( RPerson .findLike(ctx.args.meta.collective, kind, value, PersonUse.concerningAndBoth) diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala index 4d1c03b5..c96c0189 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala @@ -8,6 +8,7 @@ package docspell.joex.process import docspell.common._ import docspell.joex.process.ItemData.AttachmentDates +import docspell.scheduler.JobTaskResultEncoder import docspell.store.records.{RAttachment, RAttachmentMeta, RItem} import io.circe.syntax.EncoderOps @@ -118,7 +119,28 @@ object ItemData { ) .asJson, "tags" -> data.tags.asJson, - "assumedTags" -> data.classifyTags.asJson + "assumedTags" -> data.classifyTags.asJson, + "assumedCorrOrg" -> data.finalProposals + .find(MetaProposalType.CorrOrg) + .map(_.values.head.ref) + .asJson ) } + + implicit val jobTaskResultEncoder: JobTaskResultEncoder[ItemData] = + JobTaskResultEncoder.fromJson[ItemData].withMessage { data => + val tags = + if (data.tags.isEmpty && data.classifyTags.isEmpty) "" + else (data.tags ++ data.classifyTags).mkString("[", ", ", "]") + + val corg = + data.finalProposals.find(MetaProposalType.CorrOrg).map(_.values.head.ref.name) + val cpers = + data.finalProposals.find(MetaProposalType.CorrPerson).map(_.values.head.ref.name) + val org = corg match { + case Some(o) => s" by $o" + cpers.map(p => s"/$p").getOrElse("") + case None => cpers.map(p => s" by $p").getOrElse("") + } + s"Processed '${data.item.name}' $tags$org" + } } diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala index fa1774e0..8d59a969 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala @@ -17,59 +17,62 @@ import docspell.common.{ItemState, ProcessItemArgs} import docspell.ftsclient.FtsClient import docspell.joex.Config import docspell.joex.analysis.RegexNerFile -import docspell.joex.scheduler.Task +import docspell.scheduler.Task +import docspell.store.Store import docspell.store.queries.QItem import docspell.store.records.RItem object ItemHandler { type Args = ProcessItemArgs - def onCancel[F[_]: Sync]: Task[F, Args, Unit] = - logWarn("Now cancelling.").flatMap(_ => - markItemCreated.flatMap { + def onCancel[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] = + logWarn[F]("Now cancelling.").flatMap(_ => + markItemCreated(store).flatMap { case true => Task.pure(()) case false => - deleteByFileIds[F].flatMap(_ => deleteFiles) + deleteByFileIds[F](store).flatMap(_ => deleteFiles(store)) } ) def newItem[F[_]: Async]( cfg: Config, + store: Store[F], itemOps: OItem[F], fts: FtsClient[F], analyser: TextAnalyser[F], regexNer: RegexNerFile[F] ): Task[F, Args, Option[ItemData]] = - logBeginning.flatMap(_ => - DuplicateCheck[F] + logBeginning[F].flatMap(_ => + DuplicateCheck[F](store) .flatMap(args => - if (args.files.isEmpty) logNoFiles.map(_ => None) + if (args.files.isEmpty) logNoFiles[F].map(_ => None) else { val create: Task[F, Args, ItemData] = - CreateItem[F].contramap(_ => args.pure[F]) + CreateItem[F](store).contramap(_ => args.pure[F]) create - .flatMap(itemStateTask(ItemState.Processing)) - .flatMap(safeProcess[F](cfg, itemOps, fts, analyser, regexNer)) + .flatMap(itemStateTask(store, ItemState.Processing)) + .flatMap(safeProcess[F](cfg, store, itemOps, fts, analyser, regexNer)) .map(_.some) } ) ) - def itemStateTask[F[_]: Sync, A]( - state: ItemState - )(data: ItemData): Task[F, A, ItemData] = - Task(ctx => - ctx.store + def itemStateTask[F[_]: Sync, A](store: Store[F], state: ItemState)( + data: ItemData + ): Task[F, A, ItemData] = + Task(_ => + store .transact(RItem.updateState(data.item.id, state, ItemState.invalidStates)) .map(_ => data) ) - def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] = + def isLastRetry[F[_]]: Task[F, Args, Boolean] = Task(_.isLastRetry) def safeProcess[F[_]: Async]( cfg: Config, + store: Store[F], itemOps: OItem[F], fts: FtsClient[F], analyser: TextAnalyser[F], @@ -77,30 +80,31 @@ object ItemHandler { )(data: ItemData): Task[F, Args, ItemData] = isLastRetry[F].flatMap { case true => - ProcessItem[F](cfg, itemOps, fts, analyser, regexNer)(data).attempt.flatMap { - case Right(d) => - Task.pure(d) - case Left(ex) => - logWarn[F]( - "Processing failed on last retry. Creating item but without proposals." - ).flatMap(_ => itemStateTask(ItemState.Created)(data)) - .andThen(_ => Sync[F].raiseError(ex)) - } + ProcessItem[F](cfg, itemOps, fts, analyser, regexNer, store)(data).attempt + .flatMap { + case Right(d) => + Task.pure(d) + case Left(ex) => + logWarn[F]( + "Processing failed on last retry. Creating item but without proposals." + ).flatMap(_ => itemStateTask(store, ItemState.Created)(data)) + .andThen(_ => Sync[F].raiseError(ex)) + } case false => - ProcessItem[F](cfg, itemOps, fts, analyser, regexNer)(data) - .flatMap(itemStateTask(ItemState.Created)) + ProcessItem[F](cfg, itemOps, fts, analyser, regexNer, store)(data) + .flatMap(itemStateTask(store, ItemState.Created)) } - private def markItemCreated[F[_]: Sync]: Task[F, Args, Boolean] = + private def markItemCreated[F[_]: Sync](store: Store[F]): Task[F, Args, Boolean] = Task { ctx => val fileMetaIds = ctx.args.files.map(_.fileMetaId).toSet (for { - item <- OptionT(ctx.store.transact(QItem.findOneByFileIds(fileMetaIds.toSeq))) + item <- OptionT(store.transact(QItem.findOneByFileIds(fileMetaIds.toSeq))) _ <- OptionT.liftF( ctx.logger.info("Processing cancelled. Marking item as created anyways.") ) _ <- OptionT.liftF( - ctx.store + store .transact( RItem.updateState(item.id, ItemState.Created, ItemState.invalidStates) ) @@ -111,11 +115,11 @@ object ItemHandler { ) } - private def deleteByFileIds[F[_]: Sync]: Task[F, Args, Unit] = + private def deleteByFileIds[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] = Task { ctx => val states = ItemState.invalidStates for { - items <- ctx.store.transact( + items <- store.transact( QItem.findByFileIds(ctx.args.files.map(_.fileMetaId), states) ) _ <- @@ -124,16 +128,16 @@ object ItemHandler { ctx.logger.info( s"No items found for file ids ${ctx.args.files.map(_.fileMetaId)}" ) - _ <- items.traverse(i => QItem.delete(ctx.store)(i.id, ctx.args.meta.collective)) + _ <- items.traverse(i => QItem.delete(store)(i.id, ctx.args.meta.collective)) } yield () } - private def deleteFiles[F[_]: Sync]: Task[F, Args, Unit] = + private def deleteFiles[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] = Task(ctx => ctx.logger.info("Deleting input files …") *> Stream .emits(ctx.args.files.map(_.fileMetaId)) - .evalMap(id => ctx.store.fileRepo.delete(id).attempt) + .evalMap(id => store.fileRepo.delete(id).attempt) .compile .drain ) diff --git a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala index 7f5418fc..a656c230 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala @@ -11,20 +11,25 @@ import cats.effect.Sync import cats.implicits._ import docspell.common._ -import docspell.joex.scheduler.{Context, Task} +import docspell.scheduler.{Context, Task} +import docspell.store.Store import docspell.store.records.RItem object LinkProposal { - def onlyNew[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = + def onlyNew[F[_]: Sync]( + store: Store[F] + )(data: ItemData): Task[F, ProcessItemArgs, ItemData] = if (data.item.state.isValid) Task .log[F, ProcessItemArgs](_.debug(s"Not linking proposals on existing item")) .map(_ => data) else - LinkProposal[F](data) + LinkProposal[F](store)(data) - def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = + def apply[F[_]: Sync]( + store: Store[F] + )(data: ItemData): Task[F, ProcessItemArgs, ItemData] = if (data.item.state == ItemState.Confirmed) Task .log[F, ProcessItemArgs](_.debug(s"Not linking proposals on confirmed item")) @@ -35,7 +40,7 @@ object LinkProposal { ctx.logger.info(s"Starting linking proposals") *> MetaProposalType.all - .traverse(applyValue(data, proposals, ctx)) + .traverse(applyValue(data, proposals, ctx, store)) .map(result => ctx.logger.info(s"Results from proposal processing: $result")) .map(_ => data) } @@ -43,7 +48,8 @@ object LinkProposal { def applyValue[F[_]: Sync]( data: ItemData, proposalList: MetaProposalList, - ctx: Context[F, ProcessItemArgs] + ctx: Context[F, ProcessItemArgs], + store: Store[F] )(mpt: MetaProposalType): F[Result] = data.givenMeta.find(mpt).orElse(proposalList.find(mpt)) match { case None => @@ -51,29 +57,30 @@ object LinkProposal { Result.noneFound(mpt).pure[F] case Some(a) if a.isSingleValue => ctx.logger.info(s"Found one candidate for ${a.proposalType}") *> - setItemMeta(data.item.id, ctx, a.proposalType, a.values.head.ref.id).map(_ => - Result.single(mpt) + setItemMeta(data.item.id, ctx, store, a.proposalType, a.values.head.ref.id).map( + _ => Result.single(mpt) ) case Some(a) => val ids = a.values.map(_.ref.id.id) ctx.logger.info( s"Found many (${a.size}, $ids) candidates for ${a.proposalType}. Setting first." ) *> - setItemMeta(data.item.id, ctx, a.proposalType, a.values.head.ref.id).map(_ => - Result.multiple(mpt) + setItemMeta(data.item.id, ctx, store, a.proposalType, a.values.head.ref.id).map( + _ => Result.multiple(mpt) ) } def setItemMeta[F[_]: Sync]( itemId: Ident, ctx: Context[F, ProcessItemArgs], + store: Store[F], mpt: MetaProposalType, value: Ident ): F[Int] = mpt match { case MetaProposalType.CorrOrg => ctx.logger.debug(s"Updating item organization with: ${value.id}") *> - ctx.store.transact( + store.transact( RItem.updateCorrOrg( NonEmptyList.of(itemId), ctx.args.meta.collective, @@ -82,7 +89,7 @@ object LinkProposal { ) case MetaProposalType.ConcPerson => ctx.logger.debug(s"Updating item concerning person with: $value") *> - ctx.store.transact( + store.transact( RItem.updateConcPerson( NonEmptyList.of(itemId), ctx.args.meta.collective, @@ -91,7 +98,7 @@ object LinkProposal { ) case MetaProposalType.CorrPerson => ctx.logger.debug(s"Updating item correspondent person with: $value") *> - ctx.store.transact( + store.transact( RItem.updateCorrPerson( NonEmptyList.of(itemId), ctx.args.meta.collective, @@ -100,7 +107,7 @@ object LinkProposal { ) case MetaProposalType.ConcEquip => ctx.logger.debug(s"Updating item concerning equipment with: $value") *> - ctx.store.transact( + store.transact( RItem.updateConcEquip( NonEmptyList.of(itemId), ctx.args.meta.collective, @@ -112,7 +119,7 @@ object LinkProposal { case Some(ld) => val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC)) ctx.logger.debug(s"Updating item date ${value.id}") *> - ctx.store.transact( + store.transact( RItem.updateDate( NonEmptyList.of(itemId), ctx.args.meta.collective, @@ -128,7 +135,7 @@ object LinkProposal { case Some(ld) => val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC)) ctx.logger.debug(s"Updating item due-date suggestion ${value.id}") *> - ctx.store.transact( + store.transact( RItem.updateDueDate( NonEmptyList.of(itemId), ctx.args.meta.collective, diff --git a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala index 18cb7a31..6087b37f 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala @@ -7,6 +7,7 @@ package docspell.joex.process import cats.effect._ +import cats.implicits._ import docspell.analysis.TextAnalyser import docspell.backend.ops.OItem @@ -14,7 +15,8 @@ import docspell.common.ProcessItemArgs import docspell.ftsclient.FtsClient import docspell.joex.Config import docspell.joex.analysis.RegexNerFile -import docspell.joex.scheduler.Task +import docspell.scheduler.Task +import docspell.store.Store object ProcessItem { @@ -23,12 +25,13 @@ object ProcessItem { itemOps: OItem[F], fts: FtsClient[F], analyser: TextAnalyser[F], - regexNer: RegexNerFile[F] + regexNer: RegexNerFile[F], + store: Store[F] )(item: ItemData): Task[F, ProcessItemArgs, ItemData] = - ExtractArchive(item) + ExtractArchive(store)(item) .flatMap(Task.setProgress(20)) - .flatMap(processAttachments0(cfg, fts, analyser, regexNer, (40, 60, 80))) - .flatMap(LinkProposal.onlyNew[F]) + .flatMap(processAttachments0(cfg, fts, analyser, regexNer, store, (40, 60, 80))) + .flatMap(LinkProposal.onlyNew[F](store)) .flatMap(SetGivenData.onlyNew[F](itemOps)) .flatMap(Task.setProgress(99)) .flatMap(RemoveEmptyItem(itemOps)) @@ -37,34 +40,37 @@ object ProcessItem { cfg: Config, fts: FtsClient[F], analyser: TextAnalyser[F], - regexNer: RegexNerFile[F] + regexNer: RegexNerFile[F], + store: Store[F] )(item: ItemData): Task[F, ProcessItemArgs, ItemData] = - processAttachments0[F](cfg, fts, analyser, regexNer, (30, 60, 90))(item) + processAttachments0[F](cfg, fts, analyser, regexNer, store, (30, 60, 90))(item) def analysisOnly[F[_]: Async]( cfg: Config, analyser: TextAnalyser[F], - regexNer: RegexNerFile[F] + regexNer: RegexNerFile[F], + store: Store[F] )(item: ItemData): Task[F, ProcessItemArgs, ItemData] = - TextAnalysis[F](cfg.textAnalysis, analyser, regexNer)(item) - .flatMap(FindProposal[F](cfg.textAnalysis)) - .flatMap(EvalProposals[F]) - .flatMap(CrossCheckProposals[F]) - .flatMap(SaveProposals[F]) + TextAnalysis[F](cfg.textAnalysis, analyser, regexNer, store)(item) + .flatMap(FindProposal[F](cfg.textAnalysis, store)) + .flatMap(EvalProposals[F](store)) + .flatMap(CrossCheckProposals[F](store)) + .flatMap(SaveProposals[F](store)) private def processAttachments0[F[_]: Async]( cfg: Config, fts: FtsClient[F], analyser: TextAnalyser[F], regexNer: RegexNerFile[F], + store: Store[F], progress: (Int, Int, Int) )(item: ItemData): Task[F, ProcessItemArgs, ItemData] = - ConvertPdf(cfg.convert, item) + ConvertPdf(cfg.convert, store, item) .flatMap(Task.setProgress(progress._1)) - .flatMap(TextExtraction(cfg.extraction, fts)) - .flatMap(AttachmentPreview(cfg.extraction.preview)) - .flatMap(AttachmentPageCount()) + .flatMap(TextExtraction(cfg.extraction, fts, store)) + .flatMap(AttachmentPreview(cfg.extraction.preview, store)) + .flatMap(AttachmentPageCount(store)) .flatMap(Task.setProgress(progress._2)) - .flatMap(analysisOnly[F](cfg, analyser, regexNer)) + .flatMap(analysisOnly[F](cfg, analyser, regexNer, store)) .flatMap(Task.setProgress(progress._3)) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala index 15aa939d..1863d2ef 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala @@ -16,8 +16,9 @@ import docspell.common._ import docspell.ftsclient.FtsClient import docspell.joex.Config import docspell.joex.analysis.RegexNerFile -import docspell.joex.scheduler.Context -import docspell.joex.scheduler.Task +import docspell.scheduler.Context +import docspell.scheduler.Task +import docspell.store.Store import docspell.store.queries.QItem import docspell.store.records.RAttachment import docspell.store.records.RAttachmentSource @@ -32,13 +33,14 @@ object ReProcessItem { fts: FtsClient[F], itemOps: OItem[F], analyser: TextAnalyser[F], - regexNer: RegexNerFile[F] + regexNer: RegexNerFile[F], + store: Store[F] ): Task[F, Args, Unit] = Task .log[F, Args](_.info("===== Start reprocessing ======")) .flatMap(_ => - loadItem[F] - .flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer)) + loadItem[F](store) + .flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer, store)) .map(_ => ()) ) @@ -53,13 +55,13 @@ object ReProcessItem { else ra => selection.contains(ra.id) } - def loadItem[F[_]: Sync]: Task[F, Args, ItemData] = + def loadItem[F[_]: Sync](store: Store[F]): Task[F, Args, ItemData] = Task { ctx => (for { - item <- OptionT(ctx.store.transact(RItem.findById(ctx.args.itemId))) - attach <- OptionT.liftF(ctx.store.transact(RAttachment.findByItem(item.id))) + item <- OptionT(store.transact(RItem.findById(ctx.args.itemId))) + attach <- OptionT.liftF(store.transact(RAttachment.findByItem(item.id))) asrc <- - OptionT.liftF(ctx.store.transact(RAttachmentSource.findByItem(ctx.args.itemId))) + OptionT.liftF(store.transact(RAttachmentSource.findByItem(ctx.args.itemId))) asrcMap = asrc.map(s => s.id -> s).toMap // copy the original files over to attachments to run the default processing task // the processing doesn't touch the original files, only RAttachments @@ -97,6 +99,7 @@ object ReProcessItem { itemOps: OItem[F], analyser: TextAnalyser[F], regexNer: RegexNerFile[F], + store: Store[F], data: ItemData ): Task[F, Args, ItemData] = { @@ -121,27 +124,27 @@ object ReProcessItem { Nil ).pure[F] - getLanguage[F].flatMap { lang => + getLanguage[F](store).flatMap { lang => ProcessItem - .processAttachments[F](cfg, fts, analyser, regexNer)(data) - .flatMap(LinkProposal[F]) + .processAttachments[F](cfg, fts, analyser, regexNer, store)(data) + .flatMap(LinkProposal[F](store)) .flatMap(SetGivenData[F](itemOps)) .contramap[Args](convertArgs(lang)) } } - def getLanguage[F[_]: Sync]: Task[F, Args, Language] = + def getLanguage[F[_]: Sync](store: Store[F]): Task[F, Args, Language] = Task { ctx => val lang1 = OptionT( - ctx.store.transact(QItem.getItemLanguage(ctx.args.itemId)).map(_.headOption) + store.transact(QItem.getItemLanguage(ctx.args.itemId)).map(_.headOption) ) - val lang2 = OptionT(ctx.store.transact(RCollective.findByItem(ctx.args.itemId))) + val lang2 = OptionT(store.transact(RCollective.findByItem(ctx.args.itemId))) .map(_.language) lang1.orElse(lang2).getOrElse(Language.German) } - def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] = + def isLastRetry[F[_]]: Task[F, Args, Boolean] = Task(_.isLastRetry) def safeProcess[F[_]: Async]( @@ -149,11 +152,12 @@ object ReProcessItem { fts: FtsClient[F], itemOps: OItem[F], analyser: TextAnalyser[F], - regexNer: RegexNerFile[F] + regexNer: RegexNerFile[F], + store: Store[F] )(data: ItemData): Task[F, Args, ItemData] = isLastRetry[F].flatMap { case true => - processFiles[F](cfg, fts, itemOps, analyser, regexNer, data).attempt + processFiles[F](cfg, fts, itemOps, analyser, regexNer, store, data).attempt .flatMap { case Right(d) => Task.pure(d) @@ -163,7 +167,7 @@ object ReProcessItem { ).andThen(_ => Sync[F].raiseError(ex)) } case false => - processFiles[F](cfg, fts, itemOps, analyser, regexNer, data) + processFiles[F](cfg, fts, itemOps, analyser, regexNer, store, data) } private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] = diff --git a/modules/joex/src/main/scala/docspell/joex/process/RemoveEmptyItem.scala b/modules/joex/src/main/scala/docspell/joex/process/RemoveEmptyItem.scala index fadcb949..3d961937 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/RemoveEmptyItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/RemoveEmptyItem.scala @@ -11,7 +11,7 @@ import cats.implicits._ import docspell.backend.ops.OItem import docspell.common._ -import docspell.joex.scheduler.Task +import docspell.scheduler.Task object RemoveEmptyItem { diff --git a/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala b/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala index 18cbb933..fc17a54f 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala @@ -10,15 +10,15 @@ import cats.effect.Sync import cats.implicits._ import docspell.common._ -import docspell.joex.scheduler.{Context, Task} -import docspell.store.AddResult +import docspell.scheduler.{Context, Task} import docspell.store.records._ +import docspell.store.{AddResult, Store} /** Saves the proposals in the database */ object SaveProposals { type Args = ProcessItemArgs - def apply[F[_]: Sync](data: ItemData): Task[F, Args, ItemData] = + def apply[F[_]: Sync](store: Store[F])(data: ItemData): Task[F, Args, ItemData] = Task { ctx => for { _ <- ctx.logger.info("Storing proposals") @@ -26,20 +26,24 @@ object SaveProposals { .traverse(rm => ctx.logger.debug( s"Storing attachment proposals: ${rm.proposals}" - ) *> ctx.store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals)) + ) *> store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals)) ) _ <- if (data.classifyProposals.isEmpty && data.classifyTags.isEmpty) 0.pure[F] - else saveItemProposal(ctx, data) + else saveItemProposal(ctx, store, data) } yield data } - def saveItemProposal[F[_]: Sync](ctx: Context[F, Args], data: ItemData): F[Unit] = { + def saveItemProposal[F[_]: Sync]( + ctx: Context[F, Args], + store: Store[F], + data: ItemData + ): F[Unit] = { def upsert(v: RItemProposal): F[Int] = - ctx.store.add(RItemProposal.insert(v), RItemProposal.exists(v.itemId)).flatMap { + store.add(RItemProposal.insert(v), RItemProposal.exists(v.itemId)).flatMap { case AddResult.Success => 1.pure[F] case AddResult.EntityExists(_) => - ctx.store.transact(RItemProposal.update(v)) + store.transact(RItemProposal.update(v)) case AddResult.Failure(ex) => ctx.logger.warn(s"Could not store item proposals: ${ex.getMessage}") *> 0 .pure[F] @@ -47,7 +51,7 @@ object SaveProposals { for { _ <- ctx.logger.debug(s"Storing classifier proposals: ${data.classifyProposals}") - tags <- ctx.store.transact( + tags <- store.transact( RTag.findAllByNameOrId(data.classifyTags, ctx.args.meta.collective) ) tagRefs = tags.map(t => IdRef(t.tagId, t.name)) diff --git a/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala b/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala index 54283101..0734d294 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala @@ -11,7 +11,7 @@ import cats.implicits._ import docspell.backend.ops.OItem import docspell.common._ -import docspell.joex.scheduler.Task +import docspell.scheduler.Task import docspell.store.UpdateResult object SetGivenData { diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala b/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala index 00e410da..3a8ab6aa 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala @@ -18,8 +18,9 @@ import docspell.joex.Config import docspell.joex.analysis.RegexNerFile import docspell.joex.learn.{ClassifierName, Classify, LearnClassifierTask} import docspell.joex.process.ItemData.AttachmentDates -import docspell.joex.scheduler.Context -import docspell.joex.scheduler.Task +import docspell.scheduler.Context +import docspell.scheduler.Task +import docspell.store.Store import docspell.store.records.{RAttachmentMeta, RClassifierSetting} object TextAnalysis { @@ -28,7 +29,8 @@ object TextAnalysis { def apply[F[_]: Async]( cfg: Config.TextAnalysis, analyser: TextAnalyser[F], - nerFile: RegexNerFile[F] + nerFile: RegexNerFile[F], + store: Store[F] )(item: ItemData): Task[F, Args, ItemData] = Task { ctx => for { @@ -41,18 +43,19 @@ object TextAnalysis { ) _ <- ctx.logger.debug(s"Storing tags: ${t.map(_._1.copy(content = None))}") _ <- t.traverse(m => - ctx.store.transact(RAttachmentMeta.updateLabels(m._1.id, m._1.nerlabels)) + store.transact(RAttachmentMeta.updateLabels(m._1.id, m._1.nerlabels)) ) v = t.toVector - autoTagEnabled <- getActiveAutoTag(ctx, cfg) + autoTagEnabled <- getActiveAutoTag(ctx, store, cfg) tag <- - if (autoTagEnabled) predictTags(ctx, cfg, item.metas, analyser.classifier) + if (autoTagEnabled) + predictTags(ctx, store, cfg, item.metas, analyser.classifier) else List.empty[String].pure[F] classProposals <- if (cfg.classification.enabled) - predictItemEntities(ctx, cfg, item.metas, analyser.classifier) + predictItemEntities(ctx, store, cfg, item.metas, analyser.classifier) else MetaProposalList.empty.pure[F] e <- s @@ -86,16 +89,17 @@ object TextAnalysis { def predictTags[F[_]: Async]( ctx: Context[F, Args], + store: Store[F], cfg: Config.TextAnalysis, metas: Vector[RAttachmentMeta], classifier: TextClassifier[F] ): F[List[String]] = { val text = metas.flatMap(_.content).mkString(LearnClassifierTask.pageSep) val classifyWith: ClassifierName => F[Option[String]] = - makeClassify(ctx, cfg, classifier)(text) + makeClassify(ctx, store, cfg, classifier)(text) for { - names <- ctx.store.transact( + names <- store.transact( ClassifierName.findTagClassifiers(ctx.args.meta.collective) ) _ <- ctx.logger.debug(s"Guessing tags for ${names.size} categories") @@ -105,6 +109,7 @@ object TextAnalysis { def predictItemEntities[F[_]: Async]( ctx: Context[F, Args], + store: Store[F], cfg: Config.TextAnalysis, metas: Vector[RAttachmentMeta], classifier: TextClassifier[F] @@ -116,7 +121,7 @@ object TextAnalysis { mtype: MetaProposalType ): F[Option[MetaProposal]] = for { - label <- makeClassify(ctx, cfg, classifier)(text).apply(cname) + label <- makeClassify(ctx, store, cfg, classifier)(text).apply(cname) } yield label.map(str => MetaProposal(mtype, Candidate(IdRef(Ident.unsafe(""), str), Set.empty)) ) @@ -136,13 +141,14 @@ object TextAnalysis { private def makeClassify[F[_]: Async]( ctx: Context[F, Args], + store: Store[F], cfg: Config.TextAnalysis, classifier: TextClassifier[F] )(text: String): ClassifierName => F[Option[String]] = Classify[F]( ctx.logger, cfg.workingDir, - ctx.store, + store, classifier, ctx.args.meta.collective, text @@ -150,10 +156,11 @@ object TextAnalysis { private def getActiveAutoTag[F[_]: Sync]( ctx: Context[F, Args], + store: Store[F], cfg: Config.TextAnalysis ): F[Boolean] = if (cfg.classification.enabled) - ctx.store + store .transact(RClassifierSetting.findById(ctx.args.meta.collective)) .map(_.exists(_.autoTagEnabled)) .flatTap(enabled => diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala index 3f6be877..ee6b8939 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala @@ -13,12 +13,13 @@ import cats.implicits._ import docspell.common._ import docspell.extract.{ExtractConfig, ExtractResult, Extraction} import docspell.ftsclient.{FtsClient, TextData} -import docspell.joex.scheduler.{Context, Task} +import docspell.scheduler.{Context, Task} +import docspell.store.Store import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta} object TextExtraction { - def apply[F[_]: Async](cfg: ExtractConfig, fts: FtsClient[F])( + def apply[F[_]: Async](cfg: ExtractConfig, fts: FtsClient[F], store: Store[F])( item: ItemData ): Task[F, ProcessItemArgs, ItemData] = Task { ctx => @@ -30,6 +31,7 @@ object TextExtraction { txt <- item.attachments.traverse( extractTextIfEmpty( ctx, + store, cfg, ctx.args.meta.language, ctx.args.meta.collective, @@ -38,7 +40,7 @@ object TextExtraction { ) _ <- ctx.logger.debug("Storing extracted texts …") _ <- - txt.toList.traverse(res => ctx.store.transact(RAttachmentMeta.upsert(res.am))) + txt.toList.traverse(res => store.transact(RAttachmentMeta.upsert(res.am))) _ <- ctx.logger.debug(s"Extracted text stored.") idxItem = TextData.item( item.item.id, @@ -65,6 +67,7 @@ object TextExtraction { def extractTextIfEmpty[F[_]: Async]( ctx: Context[F, ProcessItemArgs], + store: Store[F], cfg: ExtractConfig, lang: Language, collective: Ident, @@ -91,13 +94,14 @@ object TextExtraction { ctx.logger.info("TextExtraction skipped, since text is already available.") *> makeTextData((rm, Nil)).pure[F] case _ => - extractTextToMeta[F](ctx, cfg, lang, item)(ra) + extractTextToMeta[F](ctx, store, cfg, lang, item)(ra) .map(makeTextData) } } def extractTextToMeta[F[_]: Async]( ctx: Context[F, _], + store: Store[F], cfg: ExtractConfig, lang: Language, item: ItemData @@ -105,8 +109,8 @@ object TextExtraction { for { _ <- ctx.logger.debug(s"Extracting text for attachment ${stripAttachmentName(ra)}") dst <- Duration.stopTime[F] - fids <- filesToExtract(ctx)(item, ra) - res <- extractTextFallback(ctx, cfg, ra, lang)(fids) + fids <- filesToExtract(store)(item, ra) + res <- extractTextFallback(ctx, store, cfg, ra, lang)(fids) meta = item.changeMeta( ra.id, lang, @@ -123,14 +127,14 @@ object TextExtraction { } yield (meta, tags) def extractText[F[_]: Sync]( - ctx: Context[F, _], + store: Store[F], extr: Extraction[F], lang: Language )(fileId: FileKey): F[ExtractResult] = { - val data = ctx.store.fileRepo.getBytes(fileId) + val data = store.fileRepo.getBytes(fileId) def findMime: F[MimeType] = - OptionT(ctx.store.fileRepo.findMeta(fileId)) + OptionT(store.fileRepo.findMeta(fileId)) .map(_.mimetype) .getOrElse(MimeType.octetStream) @@ -140,6 +144,7 @@ object TextExtraction { private def extractTextFallback[F[_]: Async]( ctx: Context[F, _], + store: Store[F], cfg: ExtractConfig, ra: RAttachment, lang: Language @@ -151,7 +156,7 @@ object TextExtraction { case id :: rest => val extr = Extraction.create[F](ctx.logger, cfg) - extractText[F](ctx, extr, lang)(id) + extractText[F](store, extr, lang)(id) .flatMap { case res @ ExtractResult.Success(_, _) => res.some.pure[F] @@ -161,12 +166,12 @@ object TextExtraction { .warn( s"Cannot extract text from file ${stripAttachmentName(ra)}: unsupported format ${mt.asString}. Try with converted file." ) - .flatMap(_ => extractTextFallback[F](ctx, cfg, ra, lang)(rest)) + .flatMap(_ => extractTextFallback[F](ctx, store, cfg, ra, lang)(rest)) case ExtractResult.Failure(ex) => ctx.logger .warn(s"Cannot extract text: ${ex.getMessage}. Try with converted file") - .flatMap(_ => extractTextFallback[F](ctx, cfg, ra, lang)(rest)) + .flatMap(_ => extractTextFallback[F](ctx, store, cfg, ra, lang)(rest)) } } @@ -176,13 +181,13 @@ object TextExtraction { * If the source file is a PDF, then use the converted file. This may then already * contain the text if ocrmypdf is enabled. If it is disabled, both files are the same. */ - private def filesToExtract[F[_]: Sync](ctx: Context[F, _])( + private def filesToExtract[F[_]: Sync](store: Store[F])( item: ItemData, ra: RAttachment ): F[List[FileKey]] = item.originFile.get(ra.id) match { case Some(sid) => - ctx.store.transact(RFileMeta.findMime(sid)).map { + store.transact(RFileMeta.findMime(sid)).map { case Some(MimeType.PdfMatch(_)) => List(ra.fileId) case _ => diff --git a/modules/joex/src/main/scala/docspell/joex/routes/JoexRoutes.scala b/modules/joex/src/main/scala/docspell/joex/routes/JoexRoutes.scala index 1a582b98..6810f3d9 100644 --- a/modules/joex/src/main/scala/docspell/joex/routes/JoexRoutes.scala +++ b/modules/joex/src/main/scala/docspell/joex/routes/JoexRoutes.scala @@ -12,7 +12,7 @@ import cats.implicits._ import docspell.common.{Duration, Ident, Timestamp} import docspell.joex.JoexApp import docspell.joexapi.model._ -import docspell.store.records.{RJob, RJobLog} +import docspell.store.records.RJobLog import org.http4s.HttpRoutes import org.http4s.circe.CirceEntityEncoder._ @@ -67,17 +67,19 @@ object JoexRoutes { } } - def mkJob(j: RJob): Job = + // TODO !! + + def mkJob(j: docspell.scheduler.Job[String]): Job = Job( j.id, j.subject, - j.submitted, + Timestamp.Epoch, j.priority, - j.retries, - j.progress, - j.started.getOrElse(Timestamp.Epoch) + -1, + -1, + Timestamp.Epoch ) - def mkJobLog(j: RJob, jl: Vector[RJobLog]): JobAndLog = + def mkJobLog(j: docspell.scheduler.Job[String], jl: Vector[RJobLog]): JobAndLog = JobAndLog(mkJob(j), jl.map(r => JobLogEvent(r.created, r.level, r.message)).toList) } diff --git a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala index 869362e3..039dee08 100644 --- a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala @@ -16,8 +16,9 @@ import fs2._ import docspell.backend.ops.{OJoex, OUpload} import docspell.common._ import docspell.joex.Config -import docspell.joex.scheduler.{Context, Task} import docspell.logging.Logger +import docspell.scheduler.{Context, Task} +import docspell.store.Store import docspell.store.queries.QOrganization import docspell.store.records._ @@ -32,6 +33,7 @@ object ScanMailboxTask { def apply[F[_]: Sync]( cfg: Config.ScanMailbox, + store: Store[F], emil: Emil[F], upload: OUpload[F], joex: OJoex[F] @@ -42,22 +44,22 @@ object ScanMailboxTask { s"=== Start importing mails for user ${ctx.args.account.user.id}" ) _ <- ctx.logger.debug(s"Settings: ${ctx.args.asJson.noSpaces}") - mailCfg <- getMailSettings(ctx) + mailCfg <- getMailSettings(ctx, store) folders = ctx.args.folders.mkString(", ") userId = ctx.args.account.user imapConn = ctx.args.imapConnection _ <- ctx.logger.info( s"Reading mails for user ${userId.id} from ${imapConn.id}/$folders" ) - _ <- importMails(cfg, mailCfg, emil, upload, joex, ctx) + _ <- importMails(cfg, mailCfg, emil, upload, joex, ctx, store) } yield () } def onCancel[F[_]]: Task[F, ScanMailboxArgs, Unit] = Task.log(_.warn("Cancelling scan-mailbox task")) - def getMailSettings[F[_]: Sync](ctx: Context[F, Args]): F[RUserImap] = - ctx.store + def getMailSettings[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): F[RUserImap] = + store .transact(RUserImap.getByName(ctx.args.account, ctx.args.imapConnection)) .flatMap { case Some(c) => c.pure[F] @@ -75,10 +77,11 @@ object ScanMailboxTask { theEmil: Emil[F], upload: OUpload[F], joex: OJoex[F], - ctx: Context[F, Args] + ctx: Context[F, Args], + store: Store[F] ): F[Unit] = { val mailer = theEmil(mailCfg.toMailConfig) - val impl = new Impl[F](cfg, ctx) + val impl = new Impl[F](cfg, ctx, store) val inFolders = ctx.args.folders.take(cfg.maxFolders) val getInitialInput = @@ -142,7 +145,11 @@ object ScanMailboxTask { ScanResult(List(folder -> left), processed) } - final private class Impl[F[_]: Sync](cfg: Config.ScanMailbox, ctx: Context[F, Args]) { + final private class Impl[F[_]: Sync]( + cfg: Config.ScanMailbox, + ctx: Context[F, Args], + store: Store[F] + ) { private def logOp[C](f: Logger[F] => F[Unit]): MailOp[F, C, Unit] = MailOp(_ => f(ctx.logger)) @@ -213,7 +220,7 @@ object ScanMailboxTask { NonEmptyList.fromFoldable(headers.flatMap(_.mh.messageId)) match { case Some(nl) => for { - archives <- ctx.store.transact( + archives <- store.transact( RAttachmentArchive .findByMessageIdAndCollective(nl, ctx.args.account.collective) ) @@ -237,7 +244,7 @@ object ScanMailboxTask { for { from <- OptionT.fromOption[F](mh.from) _ <- OptionT( - ctx.store.transact( + store.transact( QOrganization .findPersonByContact( ctx.args.account.collective, diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/PeriodicScheduler.scala b/modules/joex/src/main/scala/docspell/joex/scheduler/PeriodicScheduler.scala deleted file mode 100644 index 13f61705..00000000 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/PeriodicScheduler.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2020 Eike K. & Contributors - * - * SPDX-License-Identifier: AGPL-3.0-or-later - */ - -package docspell.joex.scheduler - -import cats.effect._ -import fs2._ -import fs2.concurrent.SignallingRef - -import docspell.backend.ops.OJoex -import docspell.store.queue._ - -/** A periodic scheduler takes care to submit periodic tasks to the job queue. - * - * It is run in the background to regularily find a periodic task to execute. If the task - * is due, it will be submitted into the job queue where it will be picked up by the - * scheduler from some joex instance. If it is due in the future, a notification is - * scheduled to be received at that time so the task can be looked up again. - */ -trait PeriodicScheduler[F[_]] { - - def config: PeriodicSchedulerConfig - - def start: Stream[F, Nothing] - - def shutdown: F[Unit] - - def periodicAwake: F[Fiber[F, Throwable, Unit]] - - def notifyChange: F[Unit] -} - -object PeriodicScheduler { - - def create[F[_]: Async]( - cfg: PeriodicSchedulerConfig, - sch: Scheduler[F], - queue: JobQueue[F], - store: PeriodicTaskStore[F], - joex: OJoex[F] - ): Resource[F, PeriodicScheduler[F]] = - for { - waiter <- Resource.eval(SignallingRef(true)) - state <- Resource.eval(SignallingRef(PeriodicSchedulerImpl.emptyState[F])) - psch = new PeriodicSchedulerImpl[F]( - cfg, - sch, - queue, - store, - joex, - waiter, - state - ) - _ <- Resource.eval(psch.init) - } yield psch - -} diff --git a/modules/joex/src/main/scala/docspell/joex/updatecheck/UpdateCheckTask.scala b/modules/joex/src/main/scala/docspell/joex/updatecheck/UpdateCheckTask.scala index c89b9fde..39dfbbbc 100644 --- a/modules/joex/src/main/scala/docspell/joex/updatecheck/UpdateCheckTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/updatecheck/UpdateCheckTask.scala @@ -11,12 +11,10 @@ import cats.effect._ import cats.implicits._ import docspell.common._ -import docspell.joex.scheduler.Context -import docspell.joex.scheduler.Task -import docspell.store.records.RPeriodicTask +import docspell.scheduler.Task +import docspell.scheduler.usertask.UserTask +import docspell.store.Store import docspell.store.records.RUserEmail -import docspell.store.usertask.UserTask -import docspell.store.usertask.UserTaskScope import emil._ @@ -28,22 +26,20 @@ object UpdateCheckTask { def onCancel[F[_]]: Task[F, Args, Unit] = Task.log(_.warn("Cancelling update-check task")) - def periodicTask[F[_]: Sync](cfg: UpdateCheckConfig): F[RPeriodicTask] = + def periodicTask[F[_]: Sync](cfg: UpdateCheckConfig): F[UserTask[Unit]] = UserTask( Ident.unsafe("docspell-update-check"), taskName, cfg.enabled, cfg.schedule, - None, + "Docspell Update Check".some, () - ).encode.toPeriodicTask( - UserTaskScope(cfg.senderAccount.collective), - "Docspell Update Check".some - ) + ).pure[F] def apply[F[_]: Async]( cfg: UpdateCheckConfig, sendCfg: MailSendConfig, + store: Store[F], emil: Emil[F], updateCheck: UpdateCheck[F], thisVersion: ThisVersion @@ -57,7 +53,7 @@ object UpdateCheckTask { _ <- ctx.logger.debug( s"Get SMTP connection for ${cfg.senderAccount.asString} and ${cfg.smtpId}" ) - smtpCfg <- findConnection(ctx, cfg) + smtpCfg <- findConnection(store, cfg) _ <- ctx.logger.debug("Checking for latest release at GitHub") latest <- updateCheck.latestRelease _ <- ctx.logger.debug(s"Got latest release: $latest.") @@ -84,10 +80,10 @@ object UpdateCheckTask { Task.pure(()) def findConnection[F[_]: Sync]( - ctx: Context[F, _], + store: Store[F], cfg: UpdateCheckConfig ): F[RUserEmail] = - OptionT(ctx.store.transact(RUserEmail.getByName(cfg.senderAccount, cfg.smtpId))) + OptionT(store.transact(RUserEmail.getByName(cfg.senderAccount, cfg.smtpId))) .getOrElseF( Sync[F].raiseError( new Exception( diff --git a/modules/logging/scribe/src/main/scala/docspell/logging/impl/ScribeConfigure.scala b/modules/logging/scribe/src/main/scala/docspell/logging/impl/ScribeConfigure.scala index 6bf3ad8d..81e6e662 100644 --- a/modules/logging/scribe/src/main/scala/docspell/logging/impl/ScribeConfigure.scala +++ b/modules/logging/scribe/src/main/scala/docspell/logging/impl/ScribeConfigure.scala @@ -22,10 +22,12 @@ object ScribeConfigure { Sync[F].delay { replaceJUL() val docspellLogger = scribe.Logger("docspell") - val flywayLogger = scribe.Logger("org.flywaydb") + unsafeConfigure(scribe.Logger.root, cfg.copy(minimumLevel = getRootMinimumLevel)) unsafeConfigure(docspellLogger, cfg) - unsafeConfigure(flywayLogger, cfg) + unsafeConfigure(scribe.Logger("org.flywaydb"), cfg) + unsafeConfigure(scribe.Logger("binny"), cfg) + unsafeConfigure(scribe.Logger("org.http4s"), cfg) } private[this] def getRootMinimumLevel: Level = diff --git a/modules/notification/api/src/main/scala/docspell/notification/api/Event.scala b/modules/notification/api/src/main/scala/docspell/notification/api/Event.scala index 3da27455..83f320bb 100644 --- a/modules/notification/api/src/main/scala/docspell/notification/api/Event.scala +++ b/modules/notification/api/src/main/scala/docspell/notification/api/Event.scala @@ -204,7 +204,8 @@ object Event { state: JobState, subject: String, submitter: Ident, - result: Json + resultData: Json, + resultMsg: Option[String] ) extends Event { val eventType = JobDone val baseUrl = None @@ -222,7 +223,8 @@ object Event { JobState.running, "Process 3 files", account.user, - Json.Null + Json.Null, + None ) } yield ev } diff --git a/modules/notification/api/src/main/scala/docspell/notification/api/EventContext.scala b/modules/notification/api/src/main/scala/docspell/notification/api/EventContext.scala index fc3f2984..e1bd8d27 100644 --- a/modules/notification/api/src/main/scala/docspell/notification/api/EventContext.scala +++ b/modules/notification/api/src/main/scala/docspell/notification/api/EventContext.scala @@ -31,30 +31,25 @@ trait EventContext { "content" -> content ) - def defaultTitle: Either[String, String] - def defaultTitleHtml: Either[String, String] - - def defaultBody: Either[String, String] - def defaultBodyHtml: Either[String, String] + def defaultMessage: Either[String, EventMessage] + def defaultMessageHtml: Either[String, EventMessage] def defaultBoth: Either[String, String] def defaultBothHtml: Either[String, String] lazy val asJsonWithMessage: Either[String, Json] = for { - tt1 <- defaultTitle - tb1 <- defaultBody - tt2 <- defaultTitleHtml - tb2 <- defaultBodyHtml + dm1 <- defaultMessage + dm2 <- defaultMessageHtml data = asJson msg = Json.obj( "message" -> Json.obj( - "title" -> tt1.asJson, - "body" -> tb1.asJson + "title" -> dm1.title.asJson, + "body" -> dm1.body.asJson ), "messageHtml" -> Json.obj( - "title" -> tt2.asJson, - "body" -> tb2.asJson + "title" -> dm2.title.asJson, + "body" -> dm2.body.asJson ) ) } yield data.withObject(o1 => msg.withObject(o2 => o1.deepMerge(o2).asJson)) @@ -65,10 +60,8 @@ object EventContext { new EventContext { val event = ev def content = Json.obj() - def defaultTitle = Right("") - def defaultTitleHtml = Right("") - def defaultBody = Right("") - def defaultBodyHtml = Right("") + def defaultMessage = Right(EventMessage.empty) + def defaultMessageHtml = Right(EventMessage.empty) def defaultBoth = Right("") def defaultBothHtml = Right("") } diff --git a/modules/notification/api/src/main/scala/docspell/notification/api/EventMessage.scala b/modules/notification/api/src/main/scala/docspell/notification/api/EventMessage.scala new file mode 100644 index 00000000..eb341cf7 --- /dev/null +++ b/modules/notification/api/src/main/scala/docspell/notification/api/EventMessage.scala @@ -0,0 +1,13 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.notification.api + +final case class EventMessage(title: String, body: String) + +object EventMessage { + val empty: EventMessage = EventMessage("", "") +} diff --git a/modules/notification/impl/src/main/scala/docspell/notification/impl/AbstractEventContext.scala b/modules/notification/impl/src/main/scala/docspell/notification/impl/AbstractEventContext.scala index 04dc3990..416c0760 100644 --- a/modules/notification/impl/src/main/scala/docspell/notification/impl/AbstractEventContext.scala +++ b/modules/notification/impl/src/main/scala/docspell/notification/impl/AbstractEventContext.scala @@ -6,7 +6,7 @@ package docspell.notification.impl -import docspell.notification.api.EventContext +import docspell.notification.api.{EventContext, EventMessage} import yamusca.circe._ import yamusca.implicits._ @@ -24,17 +24,17 @@ abstract class AbstractEventContext extends EventContext { def renderHtml(template: Template): String = Markdown.toHtml(render(template)) - lazy val defaultTitle: Either[String, String] = - titleTemplate.map(render) + lazy val defaultMessage: Either[String, EventMessage] = + for { + title <- titleTemplate.map(render) + body <- bodyTemplate.map(render) + } yield EventMessage(title, body) - lazy val defaultTitleHtml: Either[String, String] = - titleTemplate.map(renderHtml) - - lazy val defaultBody: Either[String, String] = - bodyTemplate.map(render) - - lazy val defaultBodyHtml: Either[String, String] = - bodyTemplate.map(renderHtml) + lazy val defaultMessageHtml: Either[String, EventMessage] = + for { + title <- titleTemplate.map(renderHtml) + body <- bodyTemplate.map(renderHtml) + } yield EventMessage(title, body) lazy val defaultBoth: Either[String, String] = for { diff --git a/modules/notification/impl/src/main/scala/docspell/notification/impl/EventContextSyntax.scala b/modules/notification/impl/src/main/scala/docspell/notification/impl/EventContextSyntax.scala index 1609105c..adf5b486 100644 --- a/modules/notification/impl/src/main/scala/docspell/notification/impl/EventContextSyntax.scala +++ b/modules/notification/impl/src/main/scala/docspell/notification/impl/EventContextSyntax.scala @@ -18,8 +18,9 @@ trait EventContextSyntax { implicit final class EventContextOps(self: EventContext) { def withDefault[F[_]](logger: Logger[F])(f: (String, String) => F[Unit]): F[Unit] = (for { - tt <- self.defaultTitle - tb <- self.defaultBody + dm <- self.defaultMessage + tt = dm.title + tb = dm.body } yield f(tt, tb)).fold(logError(logger), identity) def withJsonMessage[F[_]](logger: Logger[F])(f: Json => F[Unit]): F[Unit] = diff --git a/modules/notification/impl/src/main/scala/docspell/notification/impl/context/JobDoneCtx.scala b/modules/notification/impl/src/main/scala/docspell/notification/impl/context/JobDoneCtx.scala index 8ef19754..b23161fc 100644 --- a/modules/notification/impl/src/main/scala/docspell/notification/impl/context/JobDoneCtx.scala +++ b/modules/notification/impl/src/main/scala/docspell/notification/impl/context/JobDoneCtx.scala @@ -23,9 +23,14 @@ final case class JobDoneCtx(event: Event.JobDone, data: JobDoneCtx.Data) val content = data.asJson val titleTemplate = Right(mustache"{{eventType}} (by *{{account.user}}*)") - val bodyTemplate = Right( - mustache"""{{#content}}_'{{subject}}'_ finished {{/content}}""" - ) + val bodyTemplate = + data.resultMsg match { + case None => + Right(mustache"""{{#content}}_'{{subject}}'_ finished {{/content}}""") + case Some(msg) => + val tpl = s"""{{#content}}$msg{{/content}}""" + yamusca.imports.mustache.parse(tpl).left.map(_._2) + } } object JobDoneCtx { @@ -46,7 +51,8 @@ object JobDoneCtx { state: JobState, subject: String, submitter: Ident, - result: Json + resultData: Json, + resultMsg: Option[String] ) object Data { implicit val jsonEncoder: Encoder[Data] = @@ -61,7 +67,8 @@ object JobDoneCtx { ev.state, ev.subject, ev.submitter, - ev.result + ev.resultData, + ev.resultMsg ) } } diff --git a/modules/notification/impl/src/test/scala/docspell/notification/impl/context/TagsChangedCtxTest.scala b/modules/notification/impl/src/test/scala/docspell/notification/impl/context/TagsChangedCtxTest.scala index 094ae368..fe72545b 100644 --- a/modules/notification/impl/src/test/scala/docspell/notification/impl/context/TagsChangedCtxTest.scala +++ b/modules/notification/impl/src/test/scala/docspell/notification/impl/context/TagsChangedCtxTest.scala @@ -46,9 +46,10 @@ class TagsChangedCtxTest extends FunSuite { TagsChangedCtx.Data(account, List(item), List(tag), Nil, url.some.map(_.asString)) ) - assertEquals(ctx.defaultTitle.toOption.get, "TagsChanged (by *user2*)") + val dm = ctx.defaultMessage.toOption.get + assertEquals(dm.title, "TagsChanged (by *user2*)") assertEquals( - ctx.defaultBody.toOption.get, + dm.body, "Adding *tag-red* on [`Report 2`](http://test/item-1)." ) } @@ -65,9 +66,10 @@ class TagsChangedCtxTest extends FunSuite { ) ) - assertEquals(ctx.defaultTitle.toOption.get, "TagsChanged (by *user2*)") + val dm = ctx.defaultMessage.toOption.get + assertEquals(dm.title, "TagsChanged (by *user2*)") assertEquals( - ctx.defaultBody.toOption.get, + dm.body, "Adding *tag-red*; Removing *tag-blue* on [`Report 2`](http://test/item-1)." ) } diff --git a/modules/restapi/src/main/resources/docspell-openapi.yml b/modules/restapi/src/main/resources/docspell-openapi.yml index 50dc715d..669825e4 100644 --- a/modules/restapi/src/main/resources/docspell-openapi.yml +++ b/modules/restapi/src/main/resources/docspell-openapi.yml @@ -2487,6 +2487,59 @@ paths: schema: $ref: "#/components/schemas/BasicResult" + /admin/files/cloneFileRepository: + post: + operationId: "admin-files-cloneFileRepository" + tags: [Admin] + summary: Copy all files into a new repository + description: | + Submits a task that will copy all files of the application + (from the default file repository) into another file + repository as specified in the request. The request may define + ids of file repository configurations that must be present in + the config file. An empty list means to copy to all enabled + file repositories from te default file repository. + security: + - adminHeader: [] + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/FileRepositoryCloneRequest" + responses: + 422: + description: BadRequest + 200: + description: Ok + content: + application/json: + schema: + $ref: "#/components/schemas/BasicResult" + + /admin/files/integrityCheck: + post: + operationId: "admin-files-integrityCheck" + tags: [ Admin ] + summary: Verifies the stored checksum + description: | + Submits a task that goes through the files and compares the + stored checksum (at the time of inserting) against a newly + calculated one. + security: + - adminHeader: [] + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/FileIntegrityCheckRequest" + responses: + 200: + description: Ok + content: + application/json: + schema: + $ref: "#/components/schemas/BasicResult" + /sec/source: get: operationId: "sec-source-get-all" @@ -5433,6 +5486,26 @@ paths: components: schemas: + FileIntegrityCheckRequest: + description: | + Data for running a file integrity check + properties: + collective: + type: string + format: ident + + FileRepositoryCloneRequest: + description: | + Clone the file repository to a new location. + required: + - targetRepositories + properties: + targetRepositories: + type: array + items: + type: string + format: ident + BookmarkedQuery: description: | A query bookmark. diff --git a/modules/restserver/src/main/resources/reference.conf b/modules/restserver/src/main/resources/reference.conf index 14b21372..d3306679 100644 --- a/modules/restserver/src/main/resources/reference.conf +++ b/modules/restserver/src/main/resources/reference.conf @@ -358,6 +358,41 @@ docspell.server { # restrict file types that should be handed over to processing. # By default all files are allowed. valid-mime-types = [ ] + + # The id of an enabled store from the `stores` array that should + # be used. + # + # IMPORTANT NOTE: All nodes must have the exact same file store + # configuration! + default-store = "database" + + # A list of possible file stores. Each entry must have a unique + # id. The `type` is one of: default-database, filesystem, s3. + # + # The enabled property serves currently to define target stores + # for te "copy files" task. All stores with enabled=false are + # removed from the list. The `default-store` must be enabled. + stores = { + database = + { enabled = true + type = "default-database" + } + + filesystem = + { enabled = false + type = "file-system" + directory = "/some/directory" + } + + minio = + { enabled = false + type = "s3" + endpoint = "http://localhost:9000" + access-key = "username" + secret-key = "password" + bucket = "docspell" + } + } } } } \ No newline at end of file diff --git a/modules/restserver/src/main/scala/docspell/restserver/ConfigFile.scala b/modules/restserver/src/main/scala/docspell/restserver/ConfigFile.scala index 5e72a1d4..2e225cae 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/ConfigFile.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/ConfigFile.scala @@ -24,12 +24,18 @@ import scodec.bits.ByteVector object ConfigFile { private[this] val unsafeLogger = docspell.logging.unsafeLogger + // IntelliJ is wrong, this is required import Implicits._ def loadConfig[F[_]: Async](args: List[String]): F[Config] = { val logger = docspell.logging.getLogger[F] val validate = - Validation.of(generateSecretIfEmpty, duplicateOpenIdProvider, signKeyVsUserUrl) + Validation.of( + generateSecretIfEmpty, + duplicateOpenIdProvider, + signKeyVsUserUrl, + filesValidate + ) ConfigFactory .default[F, Config](logger, "docspell.server")(args, validate) } @@ -97,4 +103,7 @@ object ConfigFile { .map(checkProvider) ) } + + def filesValidate: Validation[Config] = + Validation(cfg => cfg.backend.files.validate.map(_ => cfg)) } diff --git a/modules/restserver/src/main/scala/docspell/restserver/Main.scala b/modules/restserver/src/main/scala/docspell/restserver/Main.scala index c8a6a003..106052a1 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/Main.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/Main.scala @@ -28,7 +28,8 @@ object Main extends IOApp { Option(System.getProperty("config.file")), cfg.appId, cfg.baseUrl, - Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled) + Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled), + cfg.backend.files.defaultStoreConfig ) _ <- logger.info(s"\n${banner.render("***>")}") _ <- diff --git a/modules/restserver/src/main/scala/docspell/restserver/RestApp.scala b/modules/restserver/src/main/scala/docspell/restserver/RestApp.scala index 44c80eaa..b7288f60 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/RestApp.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/RestApp.scala @@ -10,6 +10,9 @@ import fs2.Stream import docspell.backend.BackendApp +import org.http4s.HttpRoutes +import org.http4s.server.websocket.WebSocketBuilder2 + trait RestApp[F[_]] { /** Access to the configuration used to build backend services. */ @@ -25,4 +28,7 @@ trait RestApp[F[_]] { * via websocket. */ def subscriptions: Stream[F, Nothing] + + /** Http4s endpoint definitions. */ + def routes(wsb: WebSocketBuilder2[F]): HttpRoutes[F] } diff --git a/modules/restserver/src/main/scala/docspell/restserver/RestAppImpl.scala b/modules/restserver/src/main/scala/docspell/restserver/RestAppImpl.scala index ce14ecd3..6b0f87b8 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/RestAppImpl.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/RestAppImpl.scala @@ -11,20 +11,31 @@ import fs2.Stream import fs2.concurrent.Topic import docspell.backend.BackendApp +import docspell.backend.auth.{AuthToken, ShareToken} import docspell.ftsclient.FtsClient import docspell.ftssolr.SolrFtsClient import docspell.notification.api.NotificationModule import docspell.notification.impl.NotificationModuleImpl +import docspell.oidc.CodeFlowRoutes import docspell.pubsub.api.{PubSub, PubSubT} -import docspell.restserver.ws.OutputEvent +import docspell.restserver.auth.OpenId +import docspell.restserver.http4s.EnvMiddleware +import docspell.restserver.routes._ +import docspell.restserver.webapp.{TemplateRoutes, Templates, WebjarRoutes} +import docspell.restserver.ws.{OutputEvent, WebSocketRoutes} +import docspell.scheduler.impl.JobStoreModuleBuilder import docspell.store.Store import emil.javamail.JavaMailEmil +import org.http4s.HttpRoutes import org.http4s.client.Client +import org.http4s.server.Router +import org.http4s.server.websocket.WebSocketBuilder2 final class RestAppImpl[F[_]: Async]( val config: Config, val backend: BackendApp[F], + httpClient: Client[F], notificationMod: NotificationModule[F], wsTopic: Topic[F, OutputEvent], pubSub: PubSubT[F] @@ -35,6 +46,108 @@ final class RestAppImpl[F[_]: Async]( def subscriptions: Stream[F, Nothing] = Subscriptions[F](wsTopic, pubSub) + + def routes(wsb: WebSocketBuilder2[F]): HttpRoutes[F] = + createHttpApp(wsb) + + val templates = TemplateRoutes[F](config, Templates[F]) + + def createHttpApp( + wsB: WebSocketBuilder2[F] + ) = + Router( + "/api/info" -> InfoRoutes(), + "/api/v1/open/" -> openRoutes(httpClient), + "/api/v1/sec/" -> Authenticate(backend.login, config.auth) { token => + securedRoutes(wsB, token) + }, + "/api/v1/admin" -> AdminAuth(config.adminEndpoint) { + adminRoutes + }, + "/api/v1/share" -> ShareAuth(backend.share, config.auth) { token => + shareRoutes(token) + }, + "/api/doc" -> templates.doc, + "/app/assets" -> EnvMiddleware(WebjarRoutes.appRoutes[F]), + "/app" -> EnvMiddleware(templates.app), + "/sw.js" -> EnvMiddleware(templates.serviceWorker) + ) + + def adminRoutes: HttpRoutes[F] = + Router( + "fts" -> FullTextIndexRoutes.admin(config, backend), + "user/otp" -> TotpRoutes.admin(backend), + "user" -> UserRoutes.admin(backend), + "info" -> InfoRoutes.admin(config), + "attachments" -> AttachmentRoutes.admin(backend), + "files" -> FileRepositoryRoutes.admin(backend) + ) + + def shareRoutes( + token: ShareToken + ): HttpRoutes[F] = + Router( + "search" -> ShareSearchRoutes(backend, config, token), + "attachment" -> ShareAttachmentRoutes(backend, token), + "item" -> ShareItemRoutes(backend, token), + "clientSettings" -> ClientSettingsRoutes.share(backend, token) + ) + def openRoutes( + client: Client[F] + ): HttpRoutes[F] = + Router( + "auth/openid" -> CodeFlowRoutes( + config.openIdEnabled, + OpenId.handle[F](backend, config), + OpenId.codeFlowConfig(config), + client + ), + "auth" -> LoginRoutes.login(backend.login, config), + "signup" -> RegisterRoutes(backend, config), + "upload" -> UploadRoutes.open(backend, config), + "checkfile" -> CheckFileRoutes.open(backend), + "integration" -> IntegrationEndpointRoutes.open(backend, config), + "share" -> ShareRoutes.verify(backend, config) + ) + + def securedRoutes( + wsB: WebSocketBuilder2[F], + token: AuthToken + ): HttpRoutes[F] = + Router( + "ws" -> WebSocketRoutes(token, backend, wsTopic, wsB), + "auth" -> LoginRoutes.session(backend.login, config, token), + "tag" -> TagRoutes(backend, token), + "equipment" -> EquipmentRoutes(backend, token), + "organization" -> OrganizationRoutes(backend, token), + "person" -> PersonRoutes(backend, token), + "source" -> SourceRoutes(backend, token), + "user/otp" -> TotpRoutes(backend, config, token), + "user" -> UserRoutes(backend, token), + "collective" -> CollectiveRoutes(backend, token), + "queue" -> JobQueueRoutes(backend, token), + "item" -> ItemRoutes(config, backend, token), + "items" -> ItemMultiRoutes(config, backend, token), + "attachment" -> AttachmentRoutes(backend, token), + "attachments" -> AttachmentMultiRoutes(backend, token), + "upload" -> UploadRoutes.secured(backend, config, token), + "checkfile" -> CheckFileRoutes.secured(backend, token), + "email/send" -> MailSendRoutes(backend, token), + "email/settings" -> MailSettingsRoutes(backend, token), + "email/sent" -> SentMailRoutes(backend, token), + "share" -> ShareRoutes.manage(backend, token), + "usertask/notifydueitems" -> NotifyDueItemsRoutes(config, backend, token), + "usertask/scanmailbox" -> ScanMailboxRoutes(backend, token), + "usertask/periodicquery" -> PeriodicQueryRoutes(config, backend, token), + "calevent/check" -> CalEventCheckRoutes(), + "fts" -> FullTextIndexRoutes.secured(config, backend, token), + "folder" -> FolderRoutes(backend, token), + "customfield" -> CustomFieldRoutes(backend, token), + "clientSettings" -> ClientSettingsRoutes(backend, token), + "notification" -> NotificationRoutes(config, backend, token), + "querybookmark" -> BookmarkRoutes(backend, token) + ) + } object RestAppImpl { @@ -55,10 +168,21 @@ object RestAppImpl { notificationMod <- Resource.eval( NotificationModuleImpl[F](store, javaEmil, httpClient, 200) ) + schedulerMod = JobStoreModuleBuilder(store) + .withPubsub(pubSubT) + .withEventSink(notificationMod) + .build backend <- BackendApp - .create[F](store, javaEmil, ftsClient, pubSubT, notificationMod) + .create[F](store, javaEmil, ftsClient, pubSubT, schedulerMod, notificationMod) - app = new RestAppImpl[F](cfg, backend, notificationMod, wsTopic, pubSubT) + app = new RestAppImpl[F]( + cfg, + backend, + httpClient, + notificationMod, + wsTopic, + pubSubT + ) } yield app } diff --git a/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala b/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala index 891d177e..a4d4fb6d 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala @@ -13,24 +13,18 @@ import cats.implicits._ import fs2.Stream import fs2.concurrent.Topic -import docspell.backend.auth.{AuthToken, ShareToken} import docspell.backend.msg.Topics import docspell.common._ -import docspell.oidc.CodeFlowRoutes import docspell.pubsub.naive.NaivePubSub -import docspell.restserver.auth.OpenId -import docspell.restserver.http4s.{EnvMiddleware, InternalHeader} -import docspell.restserver.routes._ -import docspell.restserver.webapp._ +import docspell.restserver.http4s.InternalHeader +import docspell.restserver.ws.OutputEvent import docspell.restserver.ws.OutputEvent.KeepAlive -import docspell.restserver.ws.{OutputEvent, WebSocketRoutes} import docspell.store.Store import docspell.store.records.RInternalSetting import org.http4s._ import org.http4s.blaze.client.BlazeClientBuilder import org.http4s.blaze.server.BlazeServerBuilder -import org.http4s.client.Client import org.http4s.dsl.Http4sDsl import org.http4s.headers.Location import org.http4s.implicits._ @@ -51,7 +45,7 @@ object RestServer { server = Stream .resource(createApp(cfg, pools, wsTopic)) - .flatMap { case (restApp, pubSub, httpClient, setting) => + .flatMap { case (restApp, pubSub, setting) => Stream( restApp.subscriptions, restApp.eventConsume(2), @@ -59,7 +53,7 @@ object RestServer { .bindHttp(cfg.bind.port, cfg.bind.address) .withoutBanner .withHttpWebSocketApp( - createHttpApp(cfg, setting, httpClient, pubSub, restApp, wsTopic) + createHttpApp(setting, pubSub, restApp) ) .serve .drain @@ -76,13 +70,13 @@ object RestServer { wsTopic: Topic[F, OutputEvent] ): Resource[ F, - (RestApp[F], NaivePubSub[F], Client[F], RInternalSetting) + (RestApp[F], NaivePubSub[F], RInternalSetting) ] = for { httpClient <- BlazeClientBuilder[F].resource store <- Store.create[F]( cfg.backend.jdbc, - cfg.backend.files.chunkSize, + cfg.backend.files.defaultFileRepositoryConfig, pools.connectEC ) setting <- Resource.eval(store.transact(RInternalSetting.create)) @@ -92,41 +86,22 @@ object RestServer { httpClient )(Topics.all.map(_.topic)) restApp <- RestAppImpl.create[F](cfg, store, httpClient, pubSub, wsTopic) - } yield (restApp, pubSub, httpClient, setting) + } yield (restApp, pubSub, setting) def createHttpApp[F[_]: Async]( - cfg: Config, internSettings: RInternalSetting, - httpClient: Client[F], pubSub: NaivePubSub[F], - restApp: RestApp[F], - topic: Topic[F, OutputEvent] + restApp: RestApp[F] )( wsB: WebSocketBuilder2[F] ) = { - val templates = TemplateRoutes[F](cfg, Templates[F]) - val httpApp = Router( + val internal = Router( + "/" -> redirectTo("/app"), "/internal" -> InternalHeader(internSettings.internalRouteKey) { internalRoutes(pubSub) - }, - "/api/info" -> routes.InfoRoutes(), - "/api/v1/open/" -> openRoutes(cfg, httpClient, restApp), - "/api/v1/sec/" -> Authenticate(restApp.backend.login, cfg.auth) { token => - securedRoutes(cfg, restApp, wsB, topic, token) - }, - "/api/v1/admin" -> AdminAuth(cfg.adminEndpoint) { - adminRoutes(cfg, restApp) - }, - "/api/v1/share" -> ShareAuth(restApp.backend.share, cfg.auth) { token => - shareRoutes(cfg, restApp, token) - }, - "/api/doc" -> templates.doc, - "/app/assets" -> EnvMiddleware(WebjarRoutes.appRoutes[F]), - "/app" -> EnvMiddleware(templates.app), - "/sw.js" -> EnvMiddleware(templates.serviceWorker), - "/" -> redirectTo("/app") - ).orNotFound - + } + ) + val httpApp = (internal <+> restApp.routes(wsB)).orNotFound Logger.httpApp(logHeaders = false, logBody = false)(httpApp) } @@ -135,88 +110,6 @@ object RestServer { "pubsub" -> pubSub.receiveRoute ) - def securedRoutes[F[_]: Async]( - cfg: Config, - restApp: RestApp[F], - wsB: WebSocketBuilder2[F], - topic: Topic[F, OutputEvent], - token: AuthToken - ): HttpRoutes[F] = - Router( - "ws" -> WebSocketRoutes(token, restApp.backend, topic, wsB), - "auth" -> LoginRoutes.session(restApp.backend.login, cfg, token), - "tag" -> TagRoutes(restApp.backend, token), - "equipment" -> EquipmentRoutes(restApp.backend, token), - "organization" -> OrganizationRoutes(restApp.backend, token), - "person" -> PersonRoutes(restApp.backend, token), - "source" -> SourceRoutes(restApp.backend, token), - "user/otp" -> TotpRoutes(restApp.backend, cfg, token), - "user" -> UserRoutes(restApp.backend, token), - "collective" -> CollectiveRoutes(restApp.backend, token), - "queue" -> JobQueueRoutes(restApp.backend, token), - "item" -> ItemRoutes(cfg, restApp.backend, token), - "items" -> ItemMultiRoutes(cfg, restApp.backend, token), - "attachment" -> AttachmentRoutes(restApp.backend, token), - "attachments" -> AttachmentMultiRoutes(restApp.backend, token), - "upload" -> UploadRoutes.secured(restApp.backend, cfg, token), - "checkfile" -> CheckFileRoutes.secured(restApp.backend, token), - "email/send" -> MailSendRoutes(restApp.backend, token), - "email/settings" -> MailSettingsRoutes(restApp.backend, token), - "email/sent" -> SentMailRoutes(restApp.backend, token), - "share" -> ShareRoutes.manage(restApp.backend, token), - "usertask/notifydueitems" -> NotifyDueItemsRoutes(cfg, restApp.backend, token), - "usertask/scanmailbox" -> ScanMailboxRoutes(restApp.backend, token), - "usertask/periodicquery" -> PeriodicQueryRoutes(cfg, restApp.backend, token), - "calevent/check" -> CalEventCheckRoutes(), - "fts" -> FullTextIndexRoutes.secured(cfg, restApp.backend, token), - "folder" -> FolderRoutes(restApp.backend, token), - "customfield" -> CustomFieldRoutes(restApp.backend, token), - "clientSettings" -> ClientSettingsRoutes(restApp.backend, token), - "notification" -> NotificationRoutes(cfg, restApp.backend, token), - "querybookmark" -> BookmarkRoutes(restApp.backend, token) - ) - - def openRoutes[F[_]: Async]( - cfg: Config, - client: Client[F], - restApp: RestApp[F] - ): HttpRoutes[F] = - Router( - "auth/openid" -> CodeFlowRoutes( - cfg.openIdEnabled, - OpenId.handle[F](restApp.backend, cfg), - OpenId.codeFlowConfig(cfg), - client - ), - "auth" -> LoginRoutes.login(restApp.backend.login, cfg), - "signup" -> RegisterRoutes(restApp.backend, cfg), - "upload" -> UploadRoutes.open(restApp.backend, cfg), - "checkfile" -> CheckFileRoutes.open(restApp.backend), - "integration" -> IntegrationEndpointRoutes.open(restApp.backend, cfg), - "share" -> ShareRoutes.verify(restApp.backend, cfg) - ) - - def adminRoutes[F[_]: Async](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] = - Router( - "fts" -> FullTextIndexRoutes.admin(cfg, restApp.backend), - "user/otp" -> TotpRoutes.admin(restApp.backend), - "user" -> UserRoutes.admin(restApp.backend), - "info" -> InfoRoutes.admin(cfg), - "attachments" -> AttachmentRoutes.admin(restApp.backend) - ) - - def shareRoutes[F[_]: Async]( - cfg: Config, - restApp: RestApp[F], - token: ShareToken - ): HttpRoutes[F] = - Router( - "search" -> ShareSearchRoutes(restApp.backend, cfg, token), - "attachment" -> ShareAttachmentRoutes(restApp.backend, token), - "item" -> ShareItemRoutes(restApp.backend, token), - "clientSettings" -> ClientSettingsRoutes.share(restApp.backend, token) - ) - def redirectTo[F[_]: Async](path: String): HttpRoutes[F] = { val dsl = new Http4sDsl[F] {} import dsl._ diff --git a/modules/restserver/src/main/scala/docspell/restserver/Subscriptions.scala b/modules/restserver/src/main/scala/docspell/restserver/Subscriptions.scala index faecac11..8ffa7045 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/Subscriptions.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/Subscriptions.scala @@ -10,9 +10,9 @@ import cats.effect.Async import fs2.Stream import fs2.concurrent.Topic -import docspell.backend.msg.{JobDone, JobSubmitted} import docspell.pubsub.api.PubSubT import docspell.restserver.ws.OutputEvent +import docspell.scheduler.msg.{JobDone, JobSubmitted} /** Subscribes to those events from docspell that are forwarded to the websocket endpoints */ diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/FileRepositoryRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/FileRepositoryRoutes.scala new file mode 100644 index 00000000..3d4592c2 --- /dev/null +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/FileRepositoryRoutes.scala @@ -0,0 +1,69 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.restserver.routes + +import cats.data.NonEmptyList +import cats.effect._ +import cats.implicits._ + +import docspell.backend.BackendApp +import docspell.common.FileCopyTaskArgs.Selection +import docspell.common.{FileCopyTaskArgs, FileIntegrityCheckArgs, FileKeyPart} +import docspell.restapi.model._ + +import org.http4s._ +import org.http4s.circe.CirceEntityDecoder._ +import org.http4s.circe.CirceEntityEncoder._ +import org.http4s.dsl.Http4sDsl + +object FileRepositoryRoutes { + + def admin[F[_]: Async](backend: BackendApp[F]): HttpRoutes[F] = { + val dsl = Http4sDsl[F] + import dsl._ + val logger = docspell.logging.getLogger[F] + + HttpRoutes.of { + case req @ POST -> Root / "cloneFileRepository" => + for { + input <- req.as[FileRepositoryCloneRequest] + args = makeTaskArgs(input) + job <- backend.fileRepository.cloneFileRepository(args, true) + result = BasicResult( + job.isDefined, + job.fold(s"Job for '${FileCopyTaskArgs.taskName.id}' already running")(j => + s"Job for '${FileCopyTaskArgs.taskName.id}' submitted: ${j.id.id}" + ) + ) + _ <- logger.info(result.message) + resp <- Ok(result) + } yield resp + + case req @ POST -> Root / "integrityCheck" => + for { + input <- req.as[FileKeyPart] + job <- backend.fileRepository.checkIntegrityAll(input, true) + result = BasicResult( + job.isDefined, + job.fold(s"Job for '${FileCopyTaskArgs.taskName.id}' already running")(j => + s"Job for '${FileIntegrityCheckArgs.taskName.id}' submitted: ${j.id.id}" + ) + ) + _ <- logger.info(result.message) + resp <- Ok(result) + } yield resp + } + } + + def makeTaskArgs(input: FileRepositoryCloneRequest): FileCopyTaskArgs = + NonEmptyList.fromList(input.targetRepositories) match { + case Some(nel) => + FileCopyTaskArgs(None, Selection.Stores(nel)) + case None => + FileCopyTaskArgs(None, Selection.All) + } +} diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/NotifyDueItemsRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/NotifyDueItemsRoutes.scala index eae528e2..dc180ba0 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/routes/NotifyDueItemsRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/NotifyDueItemsRoutes.scala @@ -19,7 +19,7 @@ import docspell.restapi.model._ import docspell.restserver.Config import docspell.restserver.conv.{Conversions, NonEmptyListSupport} import docspell.restserver.http4s.ClientRequestInfo -import docspell.store.usertask._ +import docspell.scheduler.usertask._ import org.http4s._ import org.http4s.circe.CirceEntityDecoder._ diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/PeriodicQueryRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/PeriodicQueryRoutes.scala index 496b069e..0a300130 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/routes/PeriodicQueryRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/PeriodicQueryRoutes.scala @@ -20,7 +20,7 @@ import docspell.restapi.model._ import docspell.restserver.Config import docspell.restserver.conv.{Conversions, NonEmptyListSupport} import docspell.restserver.http4s.ClientRequestInfo -import docspell.store.usertask._ +import docspell.scheduler.usertask._ import org.http4s._ import org.http4s.circe.CirceEntityDecoder._ diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala index 3912d872..df271180 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala @@ -15,7 +15,7 @@ import docspell.backend.auth.AuthToken import docspell.common._ import docspell.restapi.model._ import docspell.restserver.conv.Conversions -import docspell.store.usertask._ +import docspell.scheduler.usertask._ import org.http4s._ import org.http4s.circe.CirceEntityDecoder._ diff --git a/modules/scheduler/api/src/main/scala/docspell/scheduler/Context.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/Context.scala new file mode 100644 index 00000000..0594dcf8 --- /dev/null +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/Context.scala @@ -0,0 +1,28 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler + +import docspell.common._ +import docspell.logging.Logger + +trait Context[F[_], A] { self => + + def jobId: Ident + + def args: A + + def config: SchedulerConfig + + def logger: Logger[F] + + def setProgress(percent: Int): F[Unit] + + def isLastRetry: F[Boolean] + + def map[C](f: A => C): Context[F, C] + +} diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/CountingScheme.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/CountingScheme.scala similarity index 97% rename from modules/joex/src/main/scala/docspell/joex/scheduler/CountingScheme.scala rename to modules/scheduler/api/src/main/scala/docspell/scheduler/CountingScheme.scala index 68890f57..86aecd1a 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/CountingScheme.scala +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/CountingScheme.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler import cats.implicits._ diff --git a/modules/scheduler/api/src/main/scala/docspell/scheduler/Job.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/Job.scala new file mode 100644 index 00000000..a6f4fc82 --- /dev/null +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/Job.scala @@ -0,0 +1,44 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler + +import cats.effect.Sync +import cats.syntax.functor._ + +import docspell.common._ + +import io.circe.Encoder + +final case class Job[A]( + id: Ident, + task: Ident, + group: Ident, + args: A, + subject: String, + submitter: Ident, + priority: Priority, + tracker: Option[Ident] +) { + + def encode(implicit E: Encoder[A]): Job[String] = + Job(id, task, group, E.apply(args).noSpaces, subject, submitter, priority, tracker) +} + +object Job { + def createNew[F[_]: Sync, A]( + task: Ident, + group: Ident, + args: A, + subject: String, + submitter: Ident, + priority: Priority, + tracker: Option[Ident] + ): F[Job[A]] = + Ident.randomId[F].map { id => + Job(id, task, group, args, subject, submitter, priority, tracker) + } +} diff --git a/modules/scheduler/api/src/main/scala/docspell/scheduler/JobStore.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/JobStore.scala new file mode 100644 index 00000000..de9eee9d --- /dev/null +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/JobStore.scala @@ -0,0 +1,27 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler + +trait JobStore[F[_]] { + + /** Inserts the job into the queue to get picked up as soon as possible. The job must + * have a new unique id. + */ + def insert(job: Job[String]): F[Unit] + + /** Inserts the job into the queue only, if there is no job with the same tracker-id + * running at the moment. The job id must be a new unique id. + * + * If the job has no tracker defined, it is simply inserted. + */ + def insertIfNew(job: Job[String]): F[Boolean] + + def insertAll(jobs: Seq[Job[String]]): F[List[Boolean]] + + def insertAllIfNew(jobs: Seq[Job[String]]): F[List[Boolean]] + +} diff --git a/modules/scheduler/api/src/main/scala/docspell/scheduler/JobStoreModule.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/JobStoreModule.scala new file mode 100644 index 00000000..729bf645 --- /dev/null +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/JobStoreModule.scala @@ -0,0 +1,15 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler + +import docspell.scheduler.usertask.UserTaskStore + +trait JobStoreModule[F[_]] { + + def userTasks: UserTaskStore[F] + def jobs: JobStore[F] +} diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/JobTask.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/JobTask.scala similarity index 82% rename from modules/joex/src/main/scala/docspell/joex/scheduler/JobTask.scala rename to modules/scheduler/api/src/main/scala/docspell/scheduler/JobTask.scala index fd291d0b..a7198378 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/JobTask.scala +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/JobTask.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler import cats.effect.Sync import cats.implicits._ @@ -12,7 +12,7 @@ import cats.implicits._ import docspell.common.Ident import docspell.common.syntax.all._ -import io.circe.{Decoder, Encoder, Json} +import io.circe.Decoder /** Binds a Task to a name. This is required to lookup the code based on the taskName in * the RJob data and to execute it given the arguments that have to be read from a @@ -24,7 +24,7 @@ import io.circe.{Decoder, Encoder, Json} */ case class JobTask[F[_]]( name: Ident, - task: Task[F, String, Json], + task: Task[F, String, JobTaskResult], onCancel: Task[F, String, Unit] ) @@ -36,16 +36,16 @@ object JobTask { onCancel: Task[F, A, Unit] )(implicit D: Decoder[A], - E: Encoder[B] + E: JobTaskResultEncoder[B] ): JobTask[F] = { val convert: String => F[A] = str => str.parseJsonAs[A] match { case Right(a) => a.pure[F] case Left(ex) => - Sync[F].raiseError(new Exception(s"Cannot parse task arguments: $str", ex)) + Sync[F].raiseError(new Exception(s"Cannot parse task arguments: '$str'", ex)) } - JobTask(name, task.contramap(convert).map(E.apply), onCancel.contramap(convert)) + JobTask(name, task.contramap(convert).map(E.encode), onCancel.contramap(convert)) } } diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/JobTaskRegistry.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/JobTaskRegistry.scala similarity index 95% rename from modules/joex/src/main/scala/docspell/joex/scheduler/JobTaskRegistry.scala rename to modules/scheduler/api/src/main/scala/docspell/scheduler/JobTaskRegistry.scala index 0efb37b7..baa12150 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/JobTaskRegistry.scala +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/JobTaskRegistry.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler import docspell.common.Ident diff --git a/modules/scheduler/api/src/main/scala/docspell/scheduler/JobTaskResult.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/JobTaskResult.scala new file mode 100644 index 00000000..c121fc47 --- /dev/null +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/JobTaskResult.scala @@ -0,0 +1,27 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler + +import io.circe.Json + +final case class JobTaskResult(message: Option[String], json: Option[Json]) { + + def withMessage(m: String): JobTaskResult = + copy(message = Some(m)) + + def withJson(json: Json): JobTaskResult = + copy(json = Some(json)) +} + +object JobTaskResult { + + val empty: JobTaskResult = JobTaskResult(None, None) + + def message(msg: String): JobTaskResult = JobTaskResult(Some(msg), None) + + def json(json: Json): JobTaskResult = JobTaskResult(None, Some(json)) +} diff --git a/modules/scheduler/api/src/main/scala/docspell/scheduler/JobTaskResultEncoder.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/JobTaskResultEncoder.scala new file mode 100644 index 00000000..a0ad96a5 --- /dev/null +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/JobTaskResultEncoder.scala @@ -0,0 +1,49 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler + +import docspell.scheduler.JobTaskResultEncoder.instance + +import io.circe.Encoder + +trait JobTaskResultEncoder[A] { self => + def encode(a: A): JobTaskResult + + final def contramap[B](f: B => A): JobTaskResultEncoder[B] = + JobTaskResultEncoder.instance(b => self.encode(f(b))) + + final def map(f: JobTaskResult => JobTaskResult): JobTaskResultEncoder[A] = + instance(a => f(self.encode(a))) + + final def modify(f: (A, JobTaskResult) => JobTaskResult): JobTaskResultEncoder[A] = + instance(a => f(a, self.encode(a))) + + final def withMessage(f: A => String): JobTaskResultEncoder[A] = + modify((a, r) => r.withMessage(f(a))) +} + +object JobTaskResultEncoder { + + def apply[A](implicit v: JobTaskResultEncoder[A]): JobTaskResultEncoder[A] = v + + def instance[A](f: A => JobTaskResult): JobTaskResultEncoder[A] = + (a: A) => f(a) + + def fromJson[A: Encoder]: JobTaskResultEncoder[A] = + instance(a => JobTaskResult.json(Encoder[A].apply(a))) + + implicit val unitJobTaskResultEncoder: JobTaskResultEncoder[Unit] = + instance(_ => JobTaskResult.empty) + + implicit def optionJobTaskResultEncoder[A](implicit + ea: JobTaskResultEncoder[A] + ): JobTaskResultEncoder[Option[A]] = + instance { + case Some(a) => ea.encode(a) + case None => JobTaskResult.empty + } +} diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/LogEvent.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/LogEvent.scala similarity index 95% rename from modules/joex/src/main/scala/docspell/joex/scheduler/LogEvent.scala rename to modules/scheduler/api/src/main/scala/docspell/scheduler/LogEvent.scala index 8edc966d..29a91631 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/LogEvent.scala +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/LogEvent.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler import cats.effect.Sync import cats.implicits._ diff --git a/modules/scheduler/api/src/main/scala/docspell/scheduler/PeriodicScheduler.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/PeriodicScheduler.scala new file mode 100644 index 00000000..3a633a88 --- /dev/null +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/PeriodicScheduler.scala @@ -0,0 +1,33 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler + +import cats.effect._ +import fs2._ + +/** A periodic scheduler takes care to submit periodic tasks to the job queue. + * + * It is run in the background to regularly find a periodic task to execute. If the task + * is due, it will be submitted into the job queue where it will be picked up by the + * scheduler from some joex instance. If it is due in the future, a notification is + * scheduled to be received at that time so the task can be looked up again. + */ +trait PeriodicScheduler[F[_]] { + + def config: PeriodicSchedulerConfig + + def start: Stream[F, Nothing] + + def shutdown: F[Unit] + + def periodicAwake: F[Fiber[F, Throwable, Unit]] + + def notifyChange: F[Unit] + + /** Starts listening for notify messages in the background. */ + def startSubscriptions: F[Unit] +} diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/PeriodicSchedulerConfig.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/PeriodicSchedulerConfig.scala similarity index 55% rename from modules/joex/src/main/scala/docspell/joex/scheduler/PeriodicSchedulerConfig.scala rename to modules/scheduler/api/src/main/scala/docspell/scheduler/PeriodicSchedulerConfig.scala index c960f4d0..6fa82069 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/PeriodicSchedulerConfig.scala +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/PeriodicSchedulerConfig.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler import docspell.common._ @@ -12,3 +12,8 @@ case class PeriodicSchedulerConfig( name: Ident, wakeupPeriod: Duration ) + +object PeriodicSchedulerConfig { + def default(id: Ident): PeriodicSchedulerConfig = + PeriodicSchedulerConfig(id, Duration.minutes(10)) +} diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/Scheduler.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/Scheduler.scala similarity index 79% rename from modules/joex/src/main/scala/docspell/joex/scheduler/Scheduler.scala rename to modules/scheduler/api/src/main/scala/docspell/scheduler/Scheduler.scala index 730b616b..e3f1193e 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/Scheduler.scala +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/Scheduler.scala @@ -4,24 +4,27 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler import cats.effect._ import fs2.Stream import docspell.common.Ident -import docspell.store.records.RJob trait Scheduler[F[_]] { def config: SchedulerConfig - def getRunning: F[Vector[RJob]] + def getRunning: F[Vector[Job[String]]] def requestCancel(jobId: Ident): F[Boolean] def notifyChange: F[Unit] + /** Starts reacting on notify and cancel messages. */ + def startSubscriptions: F[Unit] + + /** Starts the schedulers main loop. */ def start: Stream[F, Nothing] /** Requests to shutdown the scheduler. diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/SchedulerConfig.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/SchedulerConfig.scala similarity index 66% rename from modules/joex/src/main/scala/docspell/joex/scheduler/SchedulerConfig.scala rename to modules/scheduler/api/src/main/scala/docspell/scheduler/SchedulerConfig.scala index fc67dbe7..25fad892 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/SchedulerConfig.scala +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/SchedulerConfig.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler import docspell.common._ @@ -20,11 +20,10 @@ case class SchedulerConfig( object SchedulerConfig { - val default = SchedulerConfig( - name = Ident.unsafe("default-scheduler"), - poolSize = 2 // math.max(2, Runtime.getRuntime.availableProcessors / 2) - , - countingScheme = CountingScheme(2, 1), + def default(id: Ident) = SchedulerConfig( + name = id, + poolSize = 1, + countingScheme = CountingScheme(3, 1), retries = 5, retryDelay = Duration.seconds(30), logBufferSize = 500, diff --git a/modules/scheduler/api/src/main/scala/docspell/scheduler/SchedulerModule.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/SchedulerModule.scala new file mode 100644 index 00000000..a8e5f53e --- /dev/null +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/SchedulerModule.scala @@ -0,0 +1,12 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler + +trait SchedulerModule[F[_]] { + def scheduler: Scheduler[F] + def periodicScheduler: PeriodicScheduler[F] +} diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/Task.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/Task.scala similarity index 73% rename from modules/joex/src/main/scala/docspell/joex/scheduler/Task.scala rename to modules/scheduler/api/src/main/scala/docspell/scheduler/Task.scala index d211d5a0..d6868a08 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/Task.scala +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/Task.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler import cats._ import cats.data.Kleisli @@ -18,12 +18,6 @@ trait Task[F[_], A, B] { def run(ctx: Context[F, A]): F[B] - def map[C](f: B => C)(implicit F: Functor[F]): Task[F, A, C] = - Task(Task.toKleisli(this).map(f)) - - def flatMap[C](f: B => Task[F, A, C])(implicit F: FlatMap[F]): Task[F, A, C] = - Task(Task.toKleisli(this).flatMap(a => Task.toKleisli(f(a)))) - def andThen[C](f: B => F[C])(implicit F: FlatMap[F]): Task[F, A, C] = Task(Task.toKleisli(this).andThen(f)) @@ -62,4 +56,17 @@ object Task { def log[F[_], A](f: Logger[F] => F[Unit]): Task[F, A, Unit] = Task(ctx => f(ctx.logger)) + + implicit def taskMonad[F[_]: Monad, T]: Monad[Task[F, T, *]] = + new Monad[Task[F, T, *]] { + def pure[A](x: A) = Task(_ => Monad[F].pure(x)) + def flatMap[A, B](fa: Task[F, T, A])(f: A => Task[F, T, B]) = + Task(Task.toKleisli(fa).flatMap(a => Task.toKleisli(f(a)))) + + def tailRecM[A, B](a: A)(f: A => Task[F, T, Either[A, B]]) = { + val monadK = Monad[Kleisli[F, Context[F, T], *]] + val r = monadK.tailRecM(a)(x => Task.toKleisli(f(x))) + Task(r) + } + } } diff --git a/modules/backend/src/main/scala/docspell/backend/msg/CancelJob.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/msg/CancelJob.scala similarity index 95% rename from modules/backend/src/main/scala/docspell/backend/msg/CancelJob.scala rename to modules/scheduler/api/src/main/scala/docspell/scheduler/msg/CancelJob.scala index 677f1fba..1c9bae36 100644 --- a/modules/backend/src/main/scala/docspell/backend/msg/CancelJob.scala +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/msg/CancelJob.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.backend.msg +package docspell.scheduler.msg import docspell.common._ import docspell.pubsub.api.{Topic, TypedTopic} diff --git a/modules/backend/src/main/scala/docspell/backend/msg/JobDone.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/msg/JobDone.scala similarity index 95% rename from modules/backend/src/main/scala/docspell/backend/msg/JobDone.scala rename to modules/scheduler/api/src/main/scala/docspell/scheduler/msg/JobDone.scala index 0ac701c3..61eff3f9 100644 --- a/modules/backend/src/main/scala/docspell/backend/msg/JobDone.scala +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/msg/JobDone.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.backend.msg +package docspell.scheduler.msg import docspell.common._ import docspell.pubsub.api.{Topic, TypedTopic} diff --git a/modules/backend/src/main/scala/docspell/backend/msg/JobSubmitted.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/msg/JobSubmitted.scala similarity index 94% rename from modules/backend/src/main/scala/docspell/backend/msg/JobSubmitted.scala rename to modules/scheduler/api/src/main/scala/docspell/scheduler/msg/JobSubmitted.scala index 2cecdde8..c798b9d7 100644 --- a/modules/backend/src/main/scala/docspell/backend/msg/JobSubmitted.scala +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/msg/JobSubmitted.scala @@ -4,7 +4,8 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.backend.msg +package docspell.scheduler.msg + import docspell.common._ import docspell.pubsub.api.{Topic, TypedTopic} diff --git a/modules/scheduler/api/src/main/scala/docspell/scheduler/msg/JobsNotify.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/msg/JobsNotify.scala new file mode 100644 index 00000000..1f4331d2 --- /dev/null +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/msg/JobsNotify.scala @@ -0,0 +1,15 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler.msg + +import docspell.pubsub.api.{Topic, TypedTopic} + +/** A generic notification to the job executors to look for new work. */ +object JobsNotify { + def apply(): TypedTopic[Unit] = + TypedTopic[Unit](Topic("jobs-notify")) +} diff --git a/modules/scheduler/api/src/main/scala/docspell/scheduler/msg/PeriodicTaskNotify.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/msg/PeriodicTaskNotify.scala new file mode 100644 index 00000000..2ff4e055 --- /dev/null +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/msg/PeriodicTaskNotify.scala @@ -0,0 +1,15 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler.msg + +import docspell.pubsub.api.{Topic, TypedTopic} + +/** A generic notification to the periodic task scheduler to look for new work. */ +object PeriodicTaskNotify { + def apply(): TypedTopic[Unit] = + TypedTopic[Unit](Topic("periodic-task-notify")) +} diff --git a/modules/scheduler/api/src/main/scala/docspell/scheduler/usertask/UserTask.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/usertask/UserTask.scala new file mode 100644 index 00000000..b799a20c --- /dev/null +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/usertask/UserTask.scala @@ -0,0 +1,31 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler.usertask + +import docspell.common._ + +import com.github.eikek.calev.CalEvent +import io.circe.Encoder + +final case class UserTask[A]( + id: Ident, + name: Ident, + enabled: Boolean, + timer: CalEvent, + summary: Option[String], + args: A +) { + + def encode(implicit E: Encoder[A]): UserTask[String] = + copy(args = E(args).noSpaces) + + def withArgs[B](newArgs: B): UserTask[B] = + copy(args = newArgs) + + def mapArgs[B](f: A => B): UserTask[B] = + withArgs(f(args)) +} diff --git a/modules/store/src/main/scala/docspell/store/usertask/UserTaskScope.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/usertask/UserTaskScope.scala similarity index 88% rename from modules/store/src/main/scala/docspell/store/usertask/UserTaskScope.scala rename to modules/scheduler/api/src/main/scala/docspell/scheduler/usertask/UserTaskScope.scala index 6b269d1d..236c7ee6 100644 --- a/modules/store/src/main/scala/docspell/store/usertask/UserTaskScope.scala +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/usertask/UserTaskScope.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.store.usertask +package docspell.scheduler.usertask import docspell.common._ @@ -20,7 +20,7 @@ sealed trait UserTaskScope { self: Product => /** Maps to the account or uses the collective for both parts if the scope is collective * wide. */ - private[usertask] def toAccountId: AccountId = + private[scheduler] def toAccountId: AccountId = AccountId(collective, fold(_.user, identity)) } @@ -49,4 +49,7 @@ object UserTaskScope { def apply(collective: Ident): UserTaskScope = UserTaskScope.collective(collective) + + def system: UserTaskScope = + collective(DocspellSystem.taskGroup) } diff --git a/modules/scheduler/api/src/main/scala/docspell/scheduler/usertask/UserTaskStore.scala b/modules/scheduler/api/src/main/scala/docspell/scheduler/usertask/UserTaskStore.scala new file mode 100644 index 00000000..8d8b594d --- /dev/null +++ b/modules/scheduler/api/src/main/scala/docspell/scheduler/usertask/UserTaskStore.scala @@ -0,0 +1,97 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler.usertask + +import cats.data.OptionT +import fs2.Stream + +import docspell.common._ + +import io.circe._ + +/** User tasks are `RPeriodicTask`s that can be managed by the user. The user can change + * arguments, enable/disable it or run it just once. + * + * This class defines methods at a higher level, dealing with `UserTask` and + * `UserTaskScope` instead of directly using `RPeriodicTask`. A user task is associated + * to a specific user (not just the collective). But it can be associated to the whole + * collective by using the collective as submitter, too. This is abstracted in + * `UserTaskScope`. + * + * implNote: The mapping is as follows: The collective is the task group. The submitter + * property contains the username. Once a task is saved to the database, it can only be + * referenced uniquely by its id. A user may submit multiple same tasks (with different + * properties). + */ +trait UserTaskStore[F[_]] { + + /** Return all tasks of the given user. */ + def getAll(scope: UserTaskScope): Stream[F, UserTask[String]] + + /** Return all tasks of the given name and user. The task's arguments are returned as + * stored in the database. + */ + def getByNameRaw(scope: UserTaskScope, name: Ident): Stream[F, UserTask[String]] + + /** Return all tasks of the given name and user. The task's arguments are decoded using + * the given json decoder. + */ + def getByName[A](scope: UserTaskScope, name: Ident)(implicit + D: Decoder[A] + ): Stream[F, UserTask[A]] + + /** Return a user-task with the given id. */ + def getByIdRaw(scope: UserTaskScope, id: Ident): OptionT[F, UserTask[String]] + + /** Updates or inserts the given task. + * + * The task is identified by its id. If no task with this id exists, a new one is + * created. Otherwise the existing task is updated. + */ + def updateTask[A](scope: UserTaskScope, subject: Option[String], ut: UserTask[A])( + implicit E: Encoder[A] + ): F[Int] + + /** Delete the task with the given id of the given user. */ + def deleteTask(scope: UserTaskScope, id: Ident): F[Int] + + /** Return the task of the given user and name. If multiple exists, an error is + * returned. The task's arguments are returned as stored in the database. + */ + def getOneByNameRaw(scope: UserTaskScope, name: Ident): OptionT[F, UserTask[String]] + + /** Return the task of the given user and name. If multiple exists, an error is + * returned. The task's arguments are decoded using the given json decoder. + */ + def getOneByName[A](scope: UserTaskScope, name: Ident)(implicit + D: Decoder[A] + ): OptionT[F, UserTask[A]] + + /** Updates or inserts the given task. + * + * Unlike `updateTask`, this ensures that there is at most one task of some name in the + * db. Multiple same tasks (task with same name) may not be allowed to run, depending + * on what they do. This is not ensured by the database, though. The task is identified + * by task name, submitter and group. + * + * If there are currently multiple tasks with same name as `ut` for the user `account`, + * they will all be removed and the given task inserted! + */ + def updateOneTask[A](scope: UserTaskScope, subject: Option[String], ut: UserTask[A])( + implicit E: Encoder[A] + ): F[UserTask[String]] + + /** Delete all tasks of the given user that have name `name`. */ + def deleteAll(scope: UserTaskScope, name: Ident): F[Int] + + /** Discards the schedule and immediately submits the task to the job executor's queue. + * It will not update the corresponding periodic task. + */ + def executeNow[A](scope: UserTaskScope, subject: Option[String], task: UserTask[A])( + implicit E: Encoder[A] + ): F[Unit] +} diff --git a/modules/joex/src/test/scala/docspell/joex/scheduler/CountingSchemeSpec.scala b/modules/scheduler/api/src/test/scala/docspell/scheduler/CountingSchemeSpec.scala similarity index 93% rename from modules/joex/src/test/scala/docspell/joex/scheduler/CountingSchemeSpec.scala rename to modules/scheduler/api/src/test/scala/docspell/scheduler/CountingSchemeSpec.scala index cf819f2f..e396127e 100644 --- a/modules/joex/src/test/scala/docspell/joex/scheduler/CountingSchemeSpec.scala +++ b/modules/scheduler/api/src/test/scala/docspell/scheduler/CountingSchemeSpec.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler import docspell.common.Priority diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/Context.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/ContextImpl.scala similarity index 55% rename from modules/joex/src/main/scala/docspell/joex/scheduler/Context.scala rename to modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/ContextImpl.scala index 2fb2a529..d801a169 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/Context.scala +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/ContextImpl.scala @@ -4,43 +4,42 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler.impl +import cats._ import cats.effect._ -import cats.implicits._ -import cats.{Applicative, Functor} +import cats.syntax.all._ import docspell.common._ import docspell.logging.Logger +import docspell.scheduler._ import docspell.store.Store import docspell.store.records.RJob -trait Context[F[_], A] { self => +class ContextImpl[F[_]: Functor, A]( + val args: A, + val logger: Logger[F], + store: Store[F], + val config: SchedulerConfig, + val jobId: Ident +) extends Context[F, A] { - def jobId: Ident + def setProgress(percent: Int): F[Unit] = { + val pval = math.min(100, math.max(0, percent)) + store.transact(RJob.setProgress(jobId, pval)).map(_ => ()) + } - def args: A - - def config: SchedulerConfig - - def logger: Logger[F] - - def setProgress(percent: Int): F[Unit] - - def store: Store[F] - - final def isLastRetry(implicit ev: Applicative[F]): F[Boolean] = + def isLastRetry: F[Boolean] = for { current <- store.transact(RJob.getRetries(jobId)) last = config.retries == current.getOrElse(0) } yield last - def map[C](f: A => C)(implicit F: Functor[F]): Context[F, C] = - new Context.ContextImpl[F, C](f(args), logger, store, config, jobId) + def map[C](f: A => C) = + new ContextImpl[F, C](f(args), logger, store, config, jobId) } -object Context { - +object ContextImpl { def create[F[_]: Async, A]( jobId: Ident, arg: A, @@ -65,18 +64,4 @@ object Context { ctx = create[F, A](job.id, arg, config, logger, store) } yield ctx } - - final private class ContextImpl[F[_]: Functor, A]( - val args: A, - val logger: Logger[F], - val store: Store[F], - val config: SchedulerConfig, - val jobId: Ident - ) extends Context[F, A] { - - def setProgress(percent: Int): F[Unit] = { - val pval = math.min(100, math.max(0, percent)) - store.transact(RJob.setProgress(jobId, pval)).map(_ => ()) - } - } } diff --git a/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/JobQueue.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/JobQueue.scala new file mode 100644 index 00000000..5317b7dc --- /dev/null +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/JobQueue.scala @@ -0,0 +1,38 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler.impl + +import cats.effect._ +import cats.implicits._ + +import docspell.common._ +import docspell.store.Store +import docspell.store.queries.QJob +import docspell.store.records.RJob + +trait JobQueue[F[_]] { + def nextJob( + prio: Ident => F[Priority], + worker: Ident, + retryPause: Duration + ): F[Option[RJob]] +} + +object JobQueue { + private[scheduler] def apply[F[_]: Async](store: Store[F]): JobQueue[F] = + new JobQueue[F] { + private[this] val logger = docspell.logging.getLogger[F] + + def nextJob( + prio: Ident => F[Priority], + worker: Ident, + retryPause: Duration + ): F[Option[RJob]] = + logger + .trace("Select next job") *> QJob.takeNextJob(store)(prio, worker, retryPause) + } +} diff --git a/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/JobStoreImpl.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/JobStoreImpl.scala new file mode 100644 index 00000000..38b0b067 --- /dev/null +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/JobStoreImpl.scala @@ -0,0 +1,92 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler.impl + +import cats.effect.Sync +import cats.syntax.all._ + +import docspell.common.Timestamp +import docspell.scheduler._ +import docspell.store.Store +import docspell.store.records.RJob + +final class JobStoreImpl[F[_]: Sync](store: Store[F]) extends JobStore[F] { + private[this] val logger = docspell.logging.getLogger[F] + + def insert(job: Job[String]): F[Unit] = + for { + now <- Timestamp.current[F] + _ <- insert0(job, now) + } yield () + + def insert0(job: Job[String], submitted: Timestamp): F[Unit] = + store + .transact(RJob.insert(toRecord(job, submitted))) + .flatMap { n => + if (n != 1) + Sync[F] + .raiseError(new Exception(s"Inserting job failed. Update count: $n")) + else ().pure[F] + } + + def insertIfNew(job: Job[String]): F[Boolean] = + Timestamp.current[F].flatMap(now => insertIfNew0(job, now)) + + def insertIfNew0(job: Job[String], submitted: Timestamp): F[Boolean] = + for { + rj <- job.tracker match { + case Some(tid) => + store.transact(RJob.findNonFinalByTracker(tid)) + case None => + None.pure[F] + } + ret <- + if (rj.isDefined) false.pure[F] + else insert0(job, submitted).as(true) + } yield ret + + def insertAll(jobs: Seq[Job[String]]): F[List[Boolean]] = + Timestamp.current[F].flatMap { now => + jobs.toList + .traverse(j => insert0(j, now).attempt) + .flatMap(_.traverse { + case Right(()) => true.pure[F] + case Left(ex) => + logger.error(ex)("Could not insert job. Skipping it.").as(false) + }) + } + + def insertAllIfNew(jobs: Seq[Job[String]]) = + Timestamp.current[F].flatMap { now => + jobs.toList + .traverse(j => insertIfNew0(j, now).attempt) + .flatMap(_.traverse { + case Right(true) => true.pure[F] + case Right(false) => false.pure[F] + case Left(ex) => + logger.error(ex)("Could not insert job. Skipping it.").as(false) + }) + } + + def toRecord(job: Job[String], timestamp: Timestamp): RJob = + RJob.newJob( + job.id, + job.task, + job.group, + job.args, + job.subject, + timestamp, + job.submitter, + job.priority, + job.tracker + ) +} + +object JobStoreImpl { + def apply[F[_]: Sync](store: Store[F]): JobStore[F] = + new JobStoreImpl[F](store) +} diff --git a/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/JobStoreModuleBuilder.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/JobStoreModuleBuilder.scala new file mode 100644 index 00000000..0cab5e30 --- /dev/null +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/JobStoreModuleBuilder.scala @@ -0,0 +1,56 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler.impl + +import cats.effect.Async + +import docspell.notification.api.EventSink +import docspell.pubsub.api.PubSubT +import docspell.scheduler._ +import docspell.scheduler.usertask.UserTaskStore +import docspell.store.Store + +case class JobStoreModuleBuilder[F[_]: Async]( + store: Store[F], + pubsub: PubSubT[F], + eventSink: EventSink[F] +) { + def withPubsub(ps: PubSubT[F]): JobStoreModuleBuilder[F] = + copy(pubsub = ps) + + def withEventSink(es: EventSink[F]): JobStoreModuleBuilder[F] = + copy(eventSink = es) + + def build: JobStoreModuleBuilder.Module[F] = { + val jobStore = JobStorePublish(store, pubsub, eventSink) + val periodicTaskStore = PeriodicTaskStore(store, jobStore) + val userTaskStore = UserTaskStoreImpl(store, periodicTaskStore) + new JobStoreModuleBuilder.Module( + userTaskStore, + periodicTaskStore, + jobStore, + store, + eventSink, + pubsub + ) + } +} + +object JobStoreModuleBuilder { + + def apply[F[_]: Async](store: Store[F]): JobStoreModuleBuilder[F] = + JobStoreModuleBuilder(store, PubSubT.noop[F], EventSink.silent[F]) + + final class Module[F[_]]( + val userTasks: UserTaskStore[F], + val periodicTaskStore: PeriodicTaskStore[F], + val jobs: JobStore[F], + val store: Store[F], + val eventSink: EventSink[F], + val pubSubT: PubSubT[F] + ) extends JobStoreModule[F] {} +} diff --git a/modules/backend/src/main/scala/docspell/backend/msg/JobQueuePublish.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/JobStorePublish.scala similarity index 57% rename from modules/backend/src/main/scala/docspell/backend/msg/JobQueuePublish.scala rename to modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/JobStorePublish.scala index d4011943..f9ced949 100644 --- a/modules/backend/src/main/scala/docspell/backend/msg/JobQueuePublish.scala +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/JobStorePublish.scala @@ -4,53 +4,52 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.backend.msg +package docspell.scheduler.impl import cats.effect._ import cats.implicits._ -import docspell.common.{Duration, Ident, Priority} -import docspell.notification.api.Event -import docspell.notification.api.EventSink +import docspell.common.JobState +import docspell.notification.api.{Event, EventSink} import docspell.pubsub.api.PubSubT +import docspell.scheduler._ +import docspell.scheduler.msg.JobSubmitted import docspell.store.Store -import docspell.store.queue.JobQueue -import docspell.store.records.RJob -final class JobQueuePublish[F[_]: Sync]( - delegate: JobQueue[F], +final class JobStorePublish[F[_]: Sync]( + delegate: JobStore[F], pubsub: PubSubT[F], eventSink: EventSink[F] -) extends JobQueue[F] { +) extends JobStore[F] { - private def msg(job: RJob): JobSubmitted = + private def msg(job: Job[String]): JobSubmitted = JobSubmitted(job.id, job.group, job.task, job.args) - private def event(job: RJob): Event.JobSubmitted = + private def event(job: Job[String]): Event.JobSubmitted = Event.JobSubmitted( job.id, job.group, job.task, job.args, - job.state, + JobState.waiting, job.subject, job.submitter ) - private def publish(job: RJob): F[Unit] = + private def publish(job: Job[String]): F[Unit] = pubsub.publish1(JobSubmitted.topic, msg(job)).as(()) *> eventSink.offer(event(job)) - def insert(job: RJob) = + def insert(job: Job[String]) = delegate.insert(job).flatTap(_ => publish(job)) - def insertIfNew(job: RJob) = + def insertIfNew(job: Job[String]) = delegate.insertIfNew(job).flatTap { case true => publish(job) case false => ().pure[F] } - def insertAll(jobs: Seq[RJob]) = + def insertAll(jobs: Seq[Job[String]]) = delegate.insertAll(jobs).flatTap { results => results.zip(jobs).traverse { case (res, job) => if (res) publish(job) @@ -58,23 +57,20 @@ final class JobQueuePublish[F[_]: Sync]( } } - def insertAllIfNew(jobs: Seq[RJob]) = + def insertAllIfNew(jobs: Seq[Job[String]]) = delegate.insertAllIfNew(jobs).flatTap { results => results.zip(jobs).traverse { case (res, job) => if (res) publish(job) else ().pure[F] } } - - def nextJob(prio: Ident => F[Priority], worker: Ident, retryPause: Duration) = - delegate.nextJob(prio, worker, retryPause) } -object JobQueuePublish { +object JobStorePublish { def apply[F[_]: Async]( store: Store[F], pubSub: PubSubT[F], eventSink: EventSink[F] - ): Resource[F, JobQueue[F]] = - JobQueue(store).map(q => new JobQueuePublish[F](q, pubSub, eventSink)) + ): JobStore[F] = + new JobStorePublish[F](JobStoreImpl(store), pubSub, eventSink) } diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/LogSink.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/LogSink.scala similarity index 96% rename from modules/joex/src/main/scala/docspell/joex/scheduler/LogSink.scala rename to modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/LogSink.scala index bf01a050..c59b45ce 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/LogSink.scala +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/LogSink.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler.impl import cats.effect._ import cats.implicits._ @@ -12,6 +12,7 @@ import fs2.Pipe import docspell.common._ import docspell.logging +import docspell.scheduler.LogEvent import docspell.store.Store import docspell.store.records.RJobLog diff --git a/modules/store/src/main/scala/docspell/store/queue/Marked.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/Marked.scala similarity index 93% rename from modules/store/src/main/scala/docspell/store/queue/Marked.scala rename to modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/Marked.scala index af7f32db..27e9eb7d 100644 --- a/modules/store/src/main/scala/docspell/store/queue/Marked.scala +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/Marked.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.store.queue +package docspell.scheduler.impl sealed trait Marked[+A] {} diff --git a/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/PeriodicSchedulerBuilder.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/PeriodicSchedulerBuilder.scala new file mode 100644 index 00000000..0933105e --- /dev/null +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/PeriodicSchedulerBuilder.scala @@ -0,0 +1,34 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler.impl + +import cats.effect._ +import fs2.concurrent.SignallingRef + +import docspell.pubsub.api.PubSubT +import docspell.scheduler._ + +object PeriodicSchedulerBuilder { + + def resource[F[_]: Async]( + cfg: PeriodicSchedulerConfig, + store: PeriodicTaskStore[F], + pubsub: PubSubT[F] + ): Resource[F, PeriodicScheduler[F]] = + for { + waiter <- Resource.eval(SignallingRef(true)) + state <- Resource.eval(SignallingRef(PeriodicSchedulerImpl.emptyState[F])) + psch = new PeriodicSchedulerImpl[F]( + cfg, + store, + pubsub, + waiter, + state + ) + _ <- Resource.eval(psch.init) + } yield psch +} diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/PeriodicSchedulerImpl.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/PeriodicSchedulerImpl.scala similarity index 89% rename from modules/joex/src/main/scala/docspell/joex/scheduler/PeriodicSchedulerImpl.scala rename to modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/PeriodicSchedulerImpl.scala index 39761a74..2e7ac515 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/PeriodicSchedulerImpl.scala +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/PeriodicSchedulerImpl.scala @@ -4,27 +4,26 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler.impl import cats.effect._ import cats.implicits._ import fs2._ import fs2.concurrent.SignallingRef -import docspell.backend.ops.OJoex import docspell.common._ -import docspell.joex.scheduler.PeriodicSchedulerImpl.State -import docspell.store.queue._ +import docspell.pubsub.api.PubSubT +import docspell.scheduler._ +import docspell.scheduler.impl.PeriodicSchedulerImpl.State +import docspell.scheduler.msg.{JobsNotify, PeriodicTaskNotify} import docspell.store.records.RPeriodicTask import eu.timepit.fs2cron.calev.CalevScheduler final class PeriodicSchedulerImpl[F[_]: Async]( val config: PeriodicSchedulerConfig, - sch: Scheduler[F], - queue: JobQueue[F], store: PeriodicTaskStore[F], - joex: OJoex[F], + pubSub: PubSubT[F], waiter: SignallingRef[F, Boolean], state: SignallingRef[F, State[F]] ) extends PeriodicScheduler[F] { @@ -49,6 +48,13 @@ final class PeriodicSchedulerImpl[F[_]: Async]( def notifyChange: F[Unit] = waiter.update(b => !b) + def startSubscriptions: F[Unit] = + for { + _ <- Async[F].start(pubSub.subscribeSink(PeriodicTaskNotify()) { _ => + logger.info("Notify periodic scheduler from message") *> notifyChange + }) + } yield () + // internal /** On startup, get all periodic jobs from this scheduler and remove the mark, so they @@ -113,11 +119,11 @@ final class PeriodicSchedulerImpl[F[_]: Async]( case None => logger.info(s"Submitting job for periodic task '${pj.task.id}'") *> - pj.toJob.flatMap(queue.insert) *> notifyJoex *> true.pure[F] + store.submit(pj) *> notifyJoex *> true.pure[F] } def notifyJoex: F[Unit] = - sch.notifyChange *> joex.notifyAllNodes + pubSub.publish1IgnoreErrors(JobsNotify(), ()).void def scheduleNotify(pj: RPeriodicTask): F[Unit] = Timestamp diff --git a/modules/store/src/main/scala/docspell/store/queue/PeriodicTaskStore.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/PeriodicTaskStore.scala similarity index 81% rename from modules/store/src/main/scala/docspell/store/queue/PeriodicTaskStore.scala rename to modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/PeriodicTaskStore.scala index f1fad91f..bd922cb2 100644 --- a/modules/store/src/main/scala/docspell/store/queue/PeriodicTaskStore.scala +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/PeriodicTaskStore.scala @@ -4,12 +4,13 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.store.queue +package docspell.scheduler.impl import cats.effect._ import cats.implicits._ import docspell.common._ +import docspell.scheduler.{Job, JobStore} import docspell.store.queries.QPeriodicTask import docspell.store.records._ import docspell.store.{AddResult, Store} @@ -38,12 +39,18 @@ trait PeriodicTaskStore[F[_]] { /** Find all joex nodes as registered in the database. */ def findJoexNodes: F[Vector[RNode]] + + /** Creates a job from the given task and submits it into the job queue */ + def submit(task: RPeriodicTask): F[Unit] } object PeriodicTaskStore { - def create[F[_]: Sync](store: Store[F]): Resource[F, PeriodicTaskStore[F]] = - Resource.pure[F, PeriodicTaskStore[F]](new PeriodicTaskStore[F] { + def apply[F[_]: Sync]( + store: Store[F], + jobStore: JobStore[F] + ): PeriodicTaskStore[F] = + new PeriodicTaskStore[F] { private[this] val logger = docspell.logging.getLogger[F] def takeNext( worker: Ident, @@ -57,7 +64,7 @@ object PeriodicTaskStore { case false => Marked.notMarkable } case None => - Marked.notFound.pure[F] + Marked.notFound[RPeriodicTask].pure[F] } Resource.make(chooseNext) { @@ -117,5 +124,22 @@ object PeriodicTaskStore { def findJoexNodes: F[Vector[RNode]] = store.transact(RNode.findAll(NodeType.Joex)) - }) + def submit(task: RPeriodicTask) = + makeJob(task).flatMap(jobStore.insert) + + def makeJob(rt: RPeriodicTask): F[Job[String]] = + Ident.randomId[F].map { id => + Job( + id, + rt.task, + rt.group, + rt.args, + rt.subject, + rt.submitter, + rt.priority, + Some(id) + ) + } + + } } diff --git a/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/QUserTask.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/QUserTask.scala new file mode 100644 index 00000000..058ed421 --- /dev/null +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/QUserTask.scala @@ -0,0 +1,196 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler.impl + +import cats.effect.Sync +import cats.syntax.all._ +import fs2.Stream + +import docspell.common._ +import docspell.scheduler.usertask.{UserTask, UserTaskScope} +import docspell.store.qb.DML +import docspell.store.qb.DSL._ +import docspell.store.records.RPeriodicTask + +import com.github.eikek.calev.CalEvent +import doobie._ +import io.circe.{Decoder, Encoder} + +object QUserTask { + private val RT = RPeriodicTask.T + + implicit final class UserTaskCodec(ut: UserTask[String]) { + import docspell.common.syntax.all._ + + def decode[A](implicit D: Decoder[A]): Either[String, UserTask[A]] = + ut.args + .parseJsonAs[A] + .left + .map(_.getMessage) + .map(a => ut.copy(args = a)) + + def toPeriodicTask[F[_]: Sync]( + scope: UserTaskScope, + subject: Option[String] + ): F[RPeriodicTask] = + QUserTask + .create[F]( + ut.enabled, + scope, + ut.name, + ut.args, + subject.getOrElse(s"${scope.fold(_.user.id, _.id)}: ${ut.name.id}"), + Priority.Low, + ut.timer, + ut.summary + ) + .map(r => r.copy(id = ut.id)) + } + + def findAll(account: AccountId): Stream[ConnectionIO, UserTask[String]] = + run( + select(RT.all), + from(RT), + RT.group === account.collective && RT.submitter === account.user + ).query[RPeriodicTask].stream.map(makeUserTask) + + def findByName( + account: AccountId, + name: Ident + ): Stream[ConnectionIO, UserTask[String]] = + run( + select(RT.all), + from(RT), + where( + RT.group === account.collective, + RT.submitter === account.user, + RT.task === name + ) + ).query[RPeriodicTask].stream.map(makeUserTask) + + def findById( + account: AccountId, + id: Ident + ): ConnectionIO[Option[UserTask[String]]] = + run( + select(RT.all), + from(RT), + where( + RT.group === account.collective, + RT.submitter === account.user, + RT.id === id + ) + ).query[RPeriodicTask].option.map(_.map(makeUserTask)) + + def insert( + scope: UserTaskScope, + subject: Option[String], + task: UserTask[String] + ): ConnectionIO[Int] = + for { + r <- task.toPeriodicTask[ConnectionIO](scope, subject) + n <- RPeriodicTask.insert(r) + } yield n + + def update( + scope: UserTaskScope, + subject: Option[String], + task: UserTask[String] + ): ConnectionIO[Int] = + for { + r <- task.toPeriodicTask[ConnectionIO](scope, subject) + n <- RPeriodicTask.update(r) + } yield n + + def exists(id: Ident): ConnectionIO[Boolean] = + RPeriodicTask.exists(id) + + def delete(account: AccountId, id: Ident): ConnectionIO[Int] = + DML + .delete( + RT, + where( + RT.group === account.collective, + RT.submitter === account.user, + RT.id === id + ) + ) + + def deleteAll(account: AccountId, name: Ident): ConnectionIO[Int] = + DML.delete( + RT, + where( + RT.group === account.collective, + RT.submitter === account.user, + RT.task === name + ) + ) + + def makeUserTask(r: RPeriodicTask): UserTask[String] = + UserTask(r.id, r.task, r.enabled, r.timer, r.summary, r.args) + + def create[F[_]: Sync]( + enabled: Boolean, + scope: UserTaskScope, + task: Ident, + args: String, + subject: String, + priority: Priority, + timer: CalEvent, + summary: Option[String] + ): F[RPeriodicTask] = + Ident + .randomId[F] + .flatMap(id => + Timestamp + .current[F] + .map { now => + RPeriodicTask( + id, + enabled, + task, + scope.collective, + args, + subject, + scope.fold(_.user, identity), + priority, + None, + None, + timer, + timer + .nextElapse(now.atZone(Timestamp.UTC)) + .map(_.toInstant) + .map(Timestamp.apply) + .getOrElse(Timestamp.Epoch), + now, + summary + ) + } + ) + + def createJson[F[_]: Sync, A]( + enabled: Boolean, + scope: UserTaskScope, + task: Ident, + args: A, + subject: String, + priority: Priority, + timer: CalEvent, + summary: Option[String] + )(implicit E: Encoder[A]): F[RPeriodicTask] = + create[F]( + enabled, + scope, + task, + E(args).noSpaces, + subject, + priority, + timer, + summary + ) + +} diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/QueueLogger.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/QueueLogger.scala similarity index 88% rename from modules/joex/src/main/scala/docspell/joex/scheduler/QueueLogger.scala rename to modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/QueueLogger.scala index 357a1e83..8a4a0824 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/QueueLogger.scala +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/QueueLogger.scala @@ -4,16 +4,17 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler.impl -import cats.effect._ import cats.effect.std.Queue -import cats.implicits._ +import cats.effect.{Async, Sync} +import cats.syntax.all._ import fs2.Stream -import docspell.common._ +import docspell.common.{Ident, LogLevel} import docspell.logging import docspell.logging.{Level, Logger} +import docspell.scheduler.LogEvent object QueueLogger { diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/SchedulerBuilder.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/SchedulerBuilder.scala similarity index 76% rename from modules/joex/src/main/scala/docspell/joex/scheduler/SchedulerBuilder.scala rename to modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/SchedulerBuilder.scala index cc5cef12..6a6e0c2a 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/SchedulerBuilder.scala +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/SchedulerBuilder.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler.impl import cats.effect._ import cats.effect.std.Semaphore @@ -13,14 +13,14 @@ import fs2.concurrent.SignallingRef import docspell.notification.api.EventSink import docspell.pubsub.api.PubSubT +import docspell.scheduler._ import docspell.store.Store -import docspell.store.queue.JobQueue case class SchedulerBuilder[F[_]: Async]( config: SchedulerConfig, tasks: JobTaskRegistry[F], store: Store[F], - queue: Resource[F, JobQueue[F]], + queue: JobQueue[F], logSink: LogSink[F], pubSub: PubSubT[F], eventSink: EventSink[F] @@ -35,14 +35,11 @@ case class SchedulerBuilder[F[_]: Async]( def withTask[A](task: JobTask[F]): SchedulerBuilder[F] = withTaskRegistry(tasks.withTask(task)) - def withQueue(queue: Resource[F, JobQueue[F]]): SchedulerBuilder[F] = - copy(queue = queue) - def withLogSink(sink: LogSink[F]): SchedulerBuilder[F] = copy(logSink = sink) def withQueue(queue: JobQueue[F]): SchedulerBuilder[F] = - copy(queue = Resource.pure[F, JobQueue[F]](queue)) + copy(queue = queue) def withPubSub(pubSubT: PubSubT[F]): SchedulerBuilder[F] = copy(pubSub = pubSubT) @@ -54,14 +51,13 @@ case class SchedulerBuilder[F[_]: Async]( resource.evalMap(sch => Async[F].start(sch.start.compile.drain).map(_ => sch)) def resource: Resource[F, Scheduler[F]] = { - val scheduler: Resource[F, SchedulerImpl[F]] = for { - jq <- queue - waiter <- Resource.eval(SignallingRef(true)) - state <- Resource.eval(SignallingRef(SchedulerImpl.emptyState[F])) - perms <- Resource.eval(Semaphore(config.poolSize.toLong)) + val scheduler: F[SchedulerImpl[F]] = for { + waiter <- SignallingRef(true) + state <- SignallingRef(SchedulerImpl.emptyState[F]) + perms <- Semaphore(config.poolSize.toLong) } yield new SchedulerImpl[F]( config, - jq, + queue, pubSub, eventSink, tasks, @@ -72,7 +68,7 @@ case class SchedulerBuilder[F[_]: Async]( perms ) - scheduler.evalTap(_.init).map(s => s: Scheduler[F]) + Resource.eval(scheduler.flatTap(_.init)).map(s => s: Scheduler[F]) } } @@ -92,5 +88,4 @@ object SchedulerBuilder { PubSubT.noop[F], EventSink.silent[F] ) - } diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/SchedulerImpl.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/SchedulerImpl.scala similarity index 86% rename from modules/joex/src/main/scala/docspell/joex/scheduler/SchedulerImpl.scala rename to modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/SchedulerImpl.scala index be83f9d6..955a3b59 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/SchedulerImpl.scala +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/SchedulerImpl.scala @@ -4,7 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.joex.scheduler +package docspell.scheduler.impl import cats.data.OptionT import cats.effect._ @@ -13,15 +13,15 @@ import cats.implicits._ import fs2.Stream import fs2.concurrent.SignallingRef -import docspell.backend.msg.JobDone import docspell.common._ -import docspell.joex.scheduler.SchedulerImpl._ import docspell.notification.api.Event import docspell.notification.api.EventSink import docspell.pubsub.api.PubSubT +import docspell.scheduler._ +import docspell.scheduler.impl.SchedulerImpl._ +import docspell.scheduler.msg.{CancelJob, JobDone, JobsNotify} import docspell.store.Store import docspell.store.queries.QJob -import docspell.store.queue.JobQueue import docspell.store.records.RJob import io.circe.Json @@ -41,6 +41,16 @@ final class SchedulerImpl[F[_]: Async]( private[this] val logger = docspell.logging.getLogger[F] + def startSubscriptions = + for { + _ <- Async[F].start(pubSub.subscribeSink(JobsNotify()) { _ => + notifyChange + }) + _ <- Async[F].start(pubSub.subscribeSink(CancelJob.topic) { msg => + requestCancel(msg.body.jobId).void + }) + } yield () + /** On startup, get all jobs in state running from this scheduler and put them into * waiting state, so they get picked up again. */ @@ -56,8 +66,23 @@ final class SchedulerImpl[F[_]: Async]( .drain ) - def getRunning: F[Vector[RJob]] = - state.get.flatMap(s => QJob.findAll(s.getRunning, store)) + def getRunning: F[Vector[Job[String]]] = + state.get + .flatMap(s => QJob.findAll(s.getRunning, store)) + .map( + _.map(rj => + Job( + rj.id, + rj.task, + rj.group, + rj.args, + rj.subject, + rj.submitter, + rj.priority, + rj.tracker + ) + ) + ) def requestCancel(jobId: Ident): F[Boolean] = logger.info(s"Scheduler requested to cancel job: ${jobId.id}") *> @@ -164,10 +189,10 @@ final class SchedulerImpl[F[_]: Async]( for { _ <- logger.debug(s"Creating context for job ${job.info} to run cancellation $t") - ctx <- Context[F, String](job, job.args, config, logSink, store) + ctx <- ContextImpl[F, String](job, job.args, config, logSink, store) _ <- t.onCancel.run(ctx) _ <- state.modify(_.markCancelled(job)) - _ <- onFinish(job, Json.Null, JobState.Cancelled) + _ <- onFinish(job, JobTaskResult.empty, JobState.Cancelled) _ <- ctx.logger.warn("Job has been cancelled.") _ <- logger.debug(s"Job ${job.info} has been cancelled.") } yield () @@ -188,7 +213,7 @@ final class SchedulerImpl[F[_]: Async]( case Right(t) => for { _ <- logger.debug(s"Creating context for job ${job.info} to run $t") - ctx <- Context[F, String](job, job.args, config, logSink, store) + ctx <- ContextImpl[F, String](job, job.args, config, logSink, store) jot = wrapTask(job, t.task, ctx) tok <- forkRun(job, jot.run(ctx), t.onCancel.run(ctx), ctx) _ <- state.modify(_.addRunning(job, tok)) @@ -196,7 +221,7 @@ final class SchedulerImpl[F[_]: Async]( } } - def onFinish(job: RJob, result: Json, finishState: JobState): F[Unit] = + def onFinish(job: RJob, result: JobTaskResult, finishState: JobState): F[Unit] = for { _ <- logger.debug(s"Job ${job.info} done $finishState. Releasing resources.") _ <- permits.release *> permits.available.flatMap(a => @@ -220,7 +245,8 @@ final class SchedulerImpl[F[_]: Async]( job.state, job.subject, job.submitter, - result + result.json.getOrElse(Json.Null), + result.message ) ) ) @@ -235,7 +261,7 @@ final class SchedulerImpl[F[_]: Async]( def wrapTask( job: RJob, - task: Task[F, String, Json], + task: Task[F, String, JobTaskResult], ctx: Context[F, String] ): Task[F, String, Unit] = task @@ -250,19 +276,19 @@ final class SchedulerImpl[F[_]: Async]( case true => logger.error(ex)(s"Job ${job.info} execution failed (cancel = true)") ctx.logger.error(ex)("Job execution failed (cancel = true)") *> - (JobState.Cancelled: JobState, Json.Null).pure[F] + (JobState.Cancelled: JobState, JobTaskResult.empty).pure[F] case false => QJob.exceedsRetries(job.id, config.retries, store).flatMap { case true => logger.error(ex)(s"Job ${job.info} execution failed. Retries exceeded.") ctx.logger .error(ex)(s"Job ${job.info} execution failed. Retries exceeded.") - .map(_ => (JobState.Failed: JobState, Json.Null)) + .map(_ => (JobState.Failed: JobState, JobTaskResult.empty)) case false => logger.error(ex)(s"Job ${job.info} execution failed. Retrying later.") ctx.logger .error(ex)(s"Job ${job.info} execution failed. Retrying later.") - .map(_ => (JobState.Stuck: JobState, Json.Null)) + .map(_ => (JobState.Stuck: JobState, JobTaskResult.empty)) } } }) @@ -273,7 +299,7 @@ final class SchedulerImpl[F[_]: Async]( logger.error(ex)(s"Error happened during post-processing of ${job.info}!") // we don't know the real outcome here… // since tasks should be idempotent, set it to stuck. if above has failed, this might fail anyways - onFinish(job, Json.Null, JobState.Stuck) + onFinish(job, JobTaskResult.empty, JobState.Stuck) }) def forkRun( @@ -295,7 +321,7 @@ final class SchedulerImpl[F[_]: Async]( () } *> state.modify(_.markCancelled(job)) *> - onFinish(job, Json.Null, JobState.Cancelled) *> + onFinish(job, JobTaskResult.empty, JobState.Cancelled) *> ctx.logger.warn("Job has been cancelled.") *> logger.debug(s"Job ${job.info} has been cancelled.") ) diff --git a/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/SchedulerModuleBuilder.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/SchedulerModuleBuilder.scala new file mode 100644 index 00000000..e8d68d77 --- /dev/null +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/SchedulerModuleBuilder.scala @@ -0,0 +1,70 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler.impl + +import cats.effect._ + +import docspell.common.Ident +import docspell.scheduler._ + +case class SchedulerModuleBuilder[F[_]: Async] private ( + periodicSchedulerConfig: PeriodicSchedulerConfig, + schedulerBuilder: SchedulerBuilder[F], + jobStoreModule: JobStoreModuleBuilder.Module[F] +) { + + private def configureScheduler( + f: SchedulerBuilder[F] => SchedulerBuilder[F] + ): SchedulerModuleBuilder[F] = + copy(schedulerBuilder = f(schedulerBuilder)) + + def withTaskRegistry(reg: JobTaskRegistry[F]): SchedulerModuleBuilder[F] = + configureScheduler(_.withTaskRegistry(reg)) + + def withSchedulerConfig(cfg: SchedulerConfig): SchedulerModuleBuilder[F] = + configureScheduler(_.withConfig(cfg)) + + def withPeriodicSchedulerConfig( + cfg: PeriodicSchedulerConfig + ): SchedulerModuleBuilder[F] = + copy(periodicSchedulerConfig = cfg) + + def resource: Resource[F, SchedulerModule[F]] = { + val queue = JobQueue(jobStoreModule.store) + for { + schedulerR <- schedulerBuilder + .withPubSub(jobStoreModule.pubSubT) + .withEventSink(jobStoreModule.eventSink) + .withQueue(queue) + .resource + + periodicTaskSchedulerR <- + PeriodicSchedulerBuilder.resource( + periodicSchedulerConfig, + jobStoreModule.periodicTaskStore, + jobStoreModule.pubSubT + ) + } yield new SchedulerModule[F] { + val scheduler = schedulerR + val periodicScheduler = periodicTaskSchedulerR + } + } +} + +object SchedulerModuleBuilder { + + def apply[F[_]: Async]( + jobStoreModule: JobStoreModuleBuilder.Module[F] + ): SchedulerModuleBuilder[F] = { + val id = Ident.unsafe("default-node-id") + new SchedulerModuleBuilder( + PeriodicSchedulerConfig.default(id), + SchedulerBuilder(SchedulerConfig.default(id), jobStoreModule.store), + jobStoreModule + ) + } +} diff --git a/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/UserTaskStoreImpl.scala b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/UserTaskStoreImpl.scala new file mode 100644 index 00000000..21d14b75 --- /dev/null +++ b/modules/scheduler/impl/src/main/scala/docspell/scheduler/impl/UserTaskStoreImpl.scala @@ -0,0 +1,125 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.scheduler.impl + +import cats.data.OptionT +import cats.effect._ +import cats.implicits._ +import fs2.Stream + +import docspell.common._ +import docspell.scheduler.impl.QUserTask.UserTaskCodec +import docspell.scheduler.usertask.UserTaskStore +import docspell.scheduler.usertask._ +import docspell.store.{AddResult, Store} + +import io.circe._ + +final class UserTaskStoreImpl[F[_]: Sync]( + store: Store[F], + periodicTaskStore: PeriodicTaskStore[F] +) extends UserTaskStore[F] { + def getAll(scope: UserTaskScope): Stream[F, UserTask[String]] = + store.transact(QUserTask.findAll(scope.toAccountId)) + + def getByNameRaw(scope: UserTaskScope, name: Ident): Stream[F, UserTask[String]] = + store.transact(QUserTask.findByName(scope.toAccountId, name)) + + def getByIdRaw(scope: UserTaskScope, id: Ident): OptionT[F, UserTask[String]] = + OptionT(store.transact(QUserTask.findById(scope.toAccountId, id))) + + def getByName[A](scope: UserTaskScope, name: Ident)(implicit + D: Decoder[A] + ): Stream[F, UserTask[A]] = + getByNameRaw(scope, name).flatMap(_.decode match { + case Right(ua) => Stream.emit(ua) + case Left(err) => Stream.raiseError[F](new Exception(err)) + }) + + def updateTask[A](scope: UserTaskScope, subject: Option[String], ut: UserTask[A])( + implicit E: Encoder[A] + ): F[Int] = { + val exists = QUserTask.exists(ut.id) + val insert = QUserTask.insert(scope, subject, ut.encode) + store.add(insert, exists).flatMap { + case AddResult.Success => + 1.pure[F] + case AddResult.EntityExists(_) => + store.transact(QUserTask.update(scope, subject, ut.encode)) + case AddResult.Failure(ex) => + Sync[F].raiseError(ex) + } + } + + def deleteTask(scope: UserTaskScope, id: Ident): F[Int] = + store.transact(QUserTask.delete(scope.toAccountId, id)) + + def getOneByNameRaw( + scope: UserTaskScope, + name: Ident + ): OptionT[F, UserTask[String]] = + OptionT( + getByNameRaw(scope, name) + .take(2) + .compile + .toList + .flatMap { + case Nil => (None: Option[UserTask[String]]).pure[F] + case ut :: Nil => ut.some.pure[F] + case _ => Sync[F].raiseError(new Exception("More than one result found")) + } + ) + + def getOneByName[A](scope: UserTaskScope, name: Ident)(implicit + D: Decoder[A] + ): OptionT[F, UserTask[A]] = + getOneByNameRaw(scope, name) + .semiflatMap(_.decode match { + case Right(ua) => ua.pure[F] + case Left(err) => Sync[F].raiseError(new Exception(err)) + }) + + def updateOneTask[A]( + scope: UserTaskScope, + subject: Option[String], + ut: UserTask[A] + )(implicit + E: Encoder[A] + ): F[UserTask[String]] = + getByNameRaw(scope, ut.name).compile.toList.flatMap { + case a :: rest => + val task = ut.copy(id = a.id).encode + for { + _ <- store.transact(QUserTask.update(scope, subject, task)) + _ <- store.transact( + rest.traverse(t => QUserTask.delete(scope.toAccountId, t.id)) + ) + } yield task + case Nil => + val task = ut.encode + store.transact(QUserTask.insert(scope, subject, task)).map(_ => task) + } + + def deleteAll(scope: UserTaskScope, name: Ident): F[Int] = + store.transact(QUserTask.deleteAll(scope.toAccountId, name)) + + def executeNow[A](scope: UserTaskScope, subject: Option[String], task: UserTask[A])( + implicit E: Encoder[A] + ): F[Unit] = + for { + ptask <- task.encode.toPeriodicTask(scope, subject) + _ <- periodicTaskStore.submit(ptask) + } yield () +} + +object UserTaskStoreImpl { + def apply[F[_]: Sync]( + store: Store[F], + periodicTaskStore: PeriodicTaskStore[F] + ): UserTaskStore[F] = + new UserTaskStoreImpl[F](store, periodicTaskStore) +} diff --git a/modules/store/src/main/scala/db/migration/MigrationTasks.scala b/modules/store/src/main/scala/db/migration/MigrationTasks.scala index f8e2bf55..39f53f58 100644 --- a/modules/store/src/main/scala/db/migration/MigrationTasks.scala +++ b/modules/store/src/main/scala/db/migration/MigrationTasks.scala @@ -15,7 +15,7 @@ import docspell.common.syntax.StringSyntax._ import docspell.notification.api._ import docspell.store.records._ -import db.migration.data.{PeriodicDueItemsArgsOld, PeriodicQueryArgsOld} +import db.migration.data._ import doobie._ import doobie.implicits._ import doobie.util.transactor.Strategy diff --git a/modules/store/src/main/scala/db/migration/data/NotifyDueItemsArgs.scala b/modules/store/src/main/scala/db/migration/data/NotifyDueItemsArgs.scala index e664b58f..0e88df9f 100644 --- a/modules/store/src/main/scala/db/migration/data/NotifyDueItemsArgs.scala +++ b/modules/store/src/main/scala/db/migration/data/NotifyDueItemsArgs.scala @@ -4,8 +4,9 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -package docspell.common +package db.migration.data +import docspell.common._ import docspell.common.syntax.all._ import io.circe._ diff --git a/modules/store/src/main/scala/docspell/store/Store.scala b/modules/store/src/main/scala/docspell/store/Store.scala index 3e54a7a2..b7f611e3 100644 --- a/modules/store/src/main/scala/docspell/store/Store.scala +++ b/modules/store/src/main/scala/docspell/store/Store.scala @@ -12,7 +12,7 @@ import cats.effect._ import cats.~> import fs2._ -import docspell.store.file.FileRepository +import docspell.store.file.{FileRepository, FileRepositoryConfig} import docspell.store.impl.StoreImpl import com.zaxxer.hikari.HikariDataSource @@ -28,6 +28,11 @@ trait Store[F[_]] { def fileRepo: FileRepository[F] + def createFileRepository( + cfg: FileRepositoryConfig, + withAttributeStore: Boolean + ): FileRepository[F] + def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult] } @@ -35,7 +40,7 @@ object Store { def create[F[_]: Async]( jdbc: JdbcConfig, - chunkSize: Int, + fileRepoConfig: FileRepositoryConfig, connectEC: ExecutionContext ): Resource[F, Store[F]] = { val acquire = Sync[F].delay(new HikariDataSource()) @@ -50,8 +55,8 @@ object Store { ds.setDriverClassName(jdbc.driverClass) } xa = HikariTransactor(ds, connectEC) - fr = FileRepository.genericJDBC(xa, ds, chunkSize) - st = new StoreImpl[F](fr, jdbc, xa) + fr = FileRepository.apply(xa, ds, fileRepoConfig, true) + st = new StoreImpl[F](fr, jdbc, ds, xa) _ <- Resource.eval(st.migrate) } yield st } diff --git a/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala b/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala index 4814a9dc..127788b2 100644 --- a/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala +++ b/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala @@ -6,6 +6,7 @@ package docspell.store.file +import cats.Applicative import cats.data.OptionT import cats.effect._ import cats.implicits._ @@ -17,40 +18,71 @@ import binny._ import doobie._ import doobie.implicits._ -final private[file] class AttributeStore[F[_]: Sync](xa: Transactor[F]) - extends BinaryAttributeStore[F] { - def saveAttr(id: BinaryId, attrs: F[BinaryAttributes]): F[Unit] = - for { - now <- Timestamp.current[F] - a <- attrs - fileKey <- makeFileKey(id) - fm = RFileMeta( - fileKey, - now, - MimeType.parse(a.contentType.contentType).getOrElse(MimeType.octetStream), - ByteSize(a.length), - a.sha256 - ) - _ <- RFileMeta.insert(fm).transact(xa) - } yield () - - def deleteAttr(id: BinaryId): F[Boolean] = - makeFileKey(id).flatMap(fileKey => RFileMeta.delete(fileKey).transact(xa).map(_ > 0)) - - def findAttr(id: BinaryId): OptionT[F, BinaryAttributes] = - findMeta(id).map(fm => - BinaryAttributes( - fm.checksum, - SimpleContentType(fm.mimetype.asString), - fm.length.bytes - ) - ) - - def findMeta(id: BinaryId): OptionT[F, RFileMeta] = - OptionT(makeFileKey(id).flatMap(fileKey => RFileMeta.findById(fileKey).transact(xa))) - - private def makeFileKey(binaryId: BinaryId): F[FileKey] = - Sync[F] - .pure(BinnyUtils.binaryIdToFileKey(binaryId).left.map(new IllegalStateException(_))) - .rethrow +private[file] trait AttributeStore[F[_]] extends BinaryAttributeStore[F] { + def findMeta(id: BinaryId): OptionT[F, RFileMeta] +} + +private[file] object AttributeStore { + def empty[F[_]: Applicative]: AttributeStore[F] = + new AttributeStore[F] { + val delegate = BinaryAttributeStore.empty[F] + + def findMeta(id: BinaryId) = + OptionT.none + + def saveAttr(id: BinaryId, attrs: F[BinaryAttributes]) = + delegate.saveAttr(id, attrs) + + def deleteAttr(id: BinaryId) = + delegate.deleteAttr(id) + + def findAttr(id: BinaryId) = + delegate.findAttr(id) + } + + def apply[F[_]: Sync](xa: Transactor[F]): AttributeStore[F] = + new Impl[F](xa) + + final private class Impl[F[_]: Sync](xa: Transactor[F]) extends AttributeStore[F] { + def saveAttr(id: BinaryId, attrs: F[BinaryAttributes]): F[Unit] = + for { + now <- Timestamp.current[F] + a <- attrs + fileKey <- makeFileKey(id) + fm = RFileMeta( + fileKey, + now, + MimeType.parse(a.contentType.contentType).getOrElse(MimeType.octetStream), + ByteSize(a.length), + a.sha256 + ) + _ <- RFileMeta.insert(fm).transact(xa) + } yield () + + def deleteAttr(id: BinaryId): F[Boolean] = + makeFileKey(id).flatMap(fileKey => + RFileMeta.delete(fileKey).transact(xa).map(_ > 0) + ) + + def findAttr(id: BinaryId): OptionT[F, BinaryAttributes] = + findMeta(id).map(fm => + BinaryAttributes( + fm.checksum, + SimpleContentType(fm.mimetype.asString), + fm.length.bytes + ) + ) + + def findMeta(id: BinaryId): OptionT[F, RFileMeta] = + OptionT( + makeFileKey(id).flatMap(fileKey => RFileMeta.findById(fileKey).transact(xa)) + ) + + private def makeFileKey(binaryId: BinaryId): F[FileKey] = + Sync[F] + .pure( + BinnyUtils.binaryIdToFileKey(binaryId).left.map(new IllegalStateException(_)) + ) + .rethrow + } } diff --git a/modules/store/src/main/scala/docspell/store/file/BinnyUtils.scala b/modules/store/src/main/scala/docspell/store/file/BinnyUtils.scala index eef07da3..e87d9d3d 100644 --- a/modules/store/src/main/scala/docspell/store/file/BinnyUtils.scala +++ b/modules/store/src/main/scala/docspell/store/file/BinnyUtils.scala @@ -6,19 +6,34 @@ package docspell.store.file -import docspell.common +import javax.sql.DataSource + +import cats.effect._ +import fs2.io.file.Path + import docspell.common._ import docspell.files.TikaMimetype import docspell.logging.Logger import binny._ +import binny.fs.{FsBinaryStore, FsStoreConfig, PathMapping} +import binny.jdbc.{GenericJdbcStore, JdbcStoreConfig} +import binny.minio.{MinioBinaryStore, MinioConfig, S3KeyMapping} import scodec.bits.ByteVector -private[store] object BinnyUtils { +object BinnyUtils { def fileKeyToBinaryId(fk: FileKey): BinaryId = BinaryId(s"${fk.collective.id}/${fk.category.id.id}/${fk.id.id}") + def fileKeyPartToPrefix(fkp: FileKeyPart): Option[String] = + fkp match { + case FileKeyPart.Empty => None + case FileKeyPart.Collective(cid) => Some(s"${cid.id}/%") + case FileKeyPart.Category(cid, cat) => Some(s"${cid.id}/${cat.id.id}/%") + case FileKeyPart.Key(key) => Some(fileKeyToBinaryId(key).id) + } + def binaryIdToFileKey(bid: BinaryId): Either[String, FileKey] = bid.id.split('/').toList match { case cId :: catId :: fId :: Nil => @@ -26,7 +41,7 @@ private[store] object BinnyUtils { coll <- Ident.fromString(cId) cat <- FileCategory.fromString(catId) file <- Ident.fromString(fId) - } yield common.FileKey(coll, cat, file) + } yield FileKey(coll, cat, file) case _ => Left(s"Invalid format for file-key: $bid") } @@ -57,4 +72,55 @@ private[store] object BinnyUtils { .asString ) } + + val pathMapping: PathMapping = { + import binny.fs.PathMapping.syntax._ + + def toPath(base: Path, binaryId: BinaryId): Path = { + val fkey = unsafeBinaryIdToFileKey(binaryId) + base / fkey.collective.id / fkey.category.id.id / fkey.id.id / "file" + } + + def toId(file: Path): Option[BinaryId] = + for { + id <- file.parent + cat <- id.parent + fcat <- FileCategory.fromString(cat.asId.id).toOption + coll <- cat.parent + fkey = FileKey(Ident.unsafe(coll.asId.id), fcat, Ident.unsafe(id.asId.id)) + } yield fileKeyToBinaryId(fkey) + + PathMapping(toPath)(toId) + } + + def binaryStore[F[_]: Async]( + cfg: FileRepositoryConfig, + attrStore: AttributeStore[F], + ds: DataSource, + logger: Logger[F] + ): BinaryStore[F] = + cfg match { + case FileRepositoryConfig.Database(chunkSize) => + val jdbcConfig = + JdbcStoreConfig("filechunk", chunkSize, BinnyUtils.TikaContentTypeDetect) + GenericJdbcStore[F](ds, LoggerAdapter(logger), jdbcConfig, attrStore) + + case FileRepositoryConfig.S3(endpoint, accessKey, secretKey, bucket, chunkSize) => + val keyMapping = S3KeyMapping.constant(bucket) + val minioCfg = MinioConfig + .default(endpoint, accessKey, secretKey, keyMapping) + .copy(chunkSize = chunkSize, detect = BinnyUtils.TikaContentTypeDetect) + + MinioBinaryStore[F](minioCfg, attrStore, LoggerAdapter(logger)) + + case FileRepositoryConfig.Directory(path, chunkSize) => + val fsConfig = FsStoreConfig( + path, + BinnyUtils.TikaContentTypeDetect, + FsStoreConfig.OverwriteMode.Fail, + BinnyUtils.pathMapping, + chunkSize + ) + FsBinaryStore[F](fsConfig, LoggerAdapter(logger), attrStore) + } } diff --git a/modules/store/src/main/scala/docspell/store/file/FileRepository.scala b/modules/store/src/main/scala/docspell/store/file/FileRepository.scala index b3da6da3..48b30b71 100644 --- a/modules/store/src/main/scala/docspell/store/file/FileRepository.scala +++ b/modules/store/src/main/scala/docspell/store/file/FileRepository.scala @@ -13,11 +13,12 @@ import fs2._ import docspell.common._ -import binny.BinaryId -import binny.jdbc.{GenericJdbcStore, JdbcStoreConfig} +import binny.{BinaryAttributeStore, BinaryId, BinaryStore} import doobie.Transactor trait FileRepository[F[_]] { + def config: FileRepositoryConfig + def getBytes(key: FileKey): Stream[F, Byte] def findMeta(key: FileKey): F[Option[FileMetadata]] @@ -33,17 +34,30 @@ trait FileRepository[F[_]] { object FileRepository { - def genericJDBC[F[_]: Sync]( + def apply[F[_]: Async]( xa: Transactor[F], ds: DataSource, - chunkSize: Int + cfg: FileRepositoryConfig, + withAttributeStore: Boolean ): FileRepository[F] = { - val attrStore = new AttributeStore[F](xa) - val cfg = JdbcStoreConfig("filechunk", chunkSize, BinnyUtils.TikaContentTypeDetect) + val attrStore = + if (withAttributeStore) AttributeStore[F](xa) + else AttributeStore.empty[F] val log = docspell.logging.getLogger[F] - val binStore = GenericJdbcStore[F](ds, BinnyUtils.LoggerAdapter(log), cfg, attrStore) val keyFun: FileKey => BinaryId = BinnyUtils.fileKeyToBinaryId + val binStore: BinaryStore[F] = BinnyUtils.binaryStore(cfg, attrStore, ds, log) - new FileRepositoryImpl[F](binStore, attrStore, keyFun) + new FileRepositoryImpl[F](cfg, binStore, attrStore, keyFun) } + + def getDelegate[F[_]]( + repo: FileRepository[F] + ): Option[(BinaryStore[F], BinaryAttributeStore[F])] = + repo match { + case n: FileRepositoryImpl[F] => + Some((n.bs, n.attrStore)) + + case _ => + None + } } diff --git a/modules/store/src/main/scala/docspell/store/file/FileRepositoryConfig.scala b/modules/store/src/main/scala/docspell/store/file/FileRepositoryConfig.scala new file mode 100644 index 00000000..41aec6da --- /dev/null +++ b/modules/store/src/main/scala/docspell/store/file/FileRepositoryConfig.scala @@ -0,0 +1,38 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.store.file + +import fs2.io.file.Path + +import docspell.common.FileStoreConfig + +sealed trait FileRepositoryConfig {} + +object FileRepositoryConfig { + + final case class Database(chunkSize: Int) extends FileRepositoryConfig + + final case class S3( + endpoint: String, + accessKey: String, + secretKey: String, + bucketName: String, + chunkSize: Int + ) extends FileRepositoryConfig + + final case class Directory(path: Path, chunkSize: Int) extends FileRepositoryConfig + + def fromFileStoreConfig(chunkSize: Int, cfg: FileStoreConfig): FileRepositoryConfig = + cfg match { + case FileStoreConfig.DefaultDatabase(_) => + FileRepositoryConfig.Database(chunkSize) + case FileStoreConfig.S3(_, endpoint, accessKey, secretKey, bucket) => + FileRepositoryConfig.S3(endpoint, accessKey, secretKey, bucket, chunkSize) + case FileStoreConfig.FileSystem(_, directory) => + FileRepositoryConfig.Directory(directory, chunkSize) + } +} diff --git a/modules/store/src/main/scala/docspell/store/file/FileRepositoryImpl.scala b/modules/store/src/main/scala/docspell/store/file/FileRepositoryImpl.scala index 7f820244..e605d79c 100644 --- a/modules/store/src/main/scala/docspell/store/file/FileRepositoryImpl.scala +++ b/modules/store/src/main/scala/docspell/store/file/FileRepositoryImpl.scala @@ -16,8 +16,9 @@ import docspell.common._ import binny._ final class FileRepositoryImpl[F[_]: Sync]( - bs: BinaryStore[F], - attrStore: AttributeStore[F], + val config: FileRepositoryConfig, + val bs: BinaryStore[F], + val attrStore: AttributeStore[F], keyFun: FileKey => BinaryId ) extends FileRepository[F] { diff --git a/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala b/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala index 54505649..1c774956 100644 --- a/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala +++ b/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala @@ -6,12 +6,14 @@ package docspell.store.impl +import javax.sql.DataSource + import cats.arrow.FunctionK import cats.effect.Async import cats.implicits._ import cats.~> -import docspell.store.file.FileRepository +import docspell.store.file.{FileRepository, FileRepositoryConfig} import docspell.store.migrate.FlywayMigrate import docspell.store.{AddResult, JdbcConfig, Store} @@ -21,9 +23,16 @@ import doobie.implicits._ final class StoreImpl[F[_]: Async]( val fileRepo: FileRepository[F], jdbc: JdbcConfig, + ds: DataSource, xa: Transactor[F] ) extends Store[F] { + def createFileRepository( + cfg: FileRepositoryConfig, + withAttributeStore: Boolean + ): FileRepository[F] = + FileRepository(xa, ds, cfg, withAttributeStore) + def transform: ConnectionIO ~> F = FunctionK.lift(transact) diff --git a/modules/store/src/main/scala/docspell/store/queries/QUserTask.scala b/modules/store/src/main/scala/docspell/store/queries/QUserTask.scala deleted file mode 100644 index fdf1f220..00000000 --- a/modules/store/src/main/scala/docspell/store/queries/QUserTask.scala +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright 2020 Eike K. & Contributors - * - * SPDX-License-Identifier: AGPL-3.0-or-later - */ - -package docspell.store.queries - -import fs2._ - -import docspell.common._ -import docspell.store.qb.DSL._ -import docspell.store.qb._ -import docspell.store.records._ -import docspell.store.usertask.{UserTask, UserTaskScope} - -import doobie._ - -object QUserTask { - private val RT = RPeriodicTask.T - - def findAll(account: AccountId): Stream[ConnectionIO, UserTask[String]] = - run( - select(RT.all), - from(RT), - RT.group === account.collective && RT.submitter === account.user - ).query[RPeriodicTask].stream.map(makeUserTask) - - def findByName( - account: AccountId, - name: Ident - ): Stream[ConnectionIO, UserTask[String]] = - run( - select(RT.all), - from(RT), - where( - RT.group === account.collective, - RT.submitter === account.user, - RT.task === name - ) - ).query[RPeriodicTask].stream.map(makeUserTask) - - def findById( - account: AccountId, - id: Ident - ): ConnectionIO[Option[UserTask[String]]] = - run( - select(RT.all), - from(RT), - where( - RT.group === account.collective, - RT.submitter === account.user, - RT.id === id - ) - ).query[RPeriodicTask].option.map(_.map(makeUserTask)) - - def insert( - scope: UserTaskScope, - subject: Option[String], - task: UserTask[String] - ): ConnectionIO[Int] = - for { - r <- task.toPeriodicTask[ConnectionIO](scope, subject) - n <- RPeriodicTask.insert(r) - } yield n - - def update( - scope: UserTaskScope, - subject: Option[String], - task: UserTask[String] - ): ConnectionIO[Int] = - for { - r <- task.toPeriodicTask[ConnectionIO](scope, subject) - n <- RPeriodicTask.update(r) - } yield n - - def exists(id: Ident): ConnectionIO[Boolean] = - RPeriodicTask.exists(id) - - def delete(account: AccountId, id: Ident): ConnectionIO[Int] = - DML - .delete( - RT, - where( - RT.group === account.collective, - RT.submitter === account.user, - RT.id === id - ) - ) - - def deleteAll(account: AccountId, name: Ident): ConnectionIO[Int] = - DML.delete( - RT, - where( - RT.group === account.collective, - RT.submitter === account.user, - RT.task === name - ) - ) - - def makeUserTask(r: RPeriodicTask): UserTask[String] = - UserTask(r.id, r.task, r.enabled, r.timer, r.summary, r.args) - -} diff --git a/modules/store/src/main/scala/docspell/store/queue/JobQueue.scala b/modules/store/src/main/scala/docspell/store/queue/JobQueue.scala deleted file mode 100644 index 9b777086..00000000 --- a/modules/store/src/main/scala/docspell/store/queue/JobQueue.scala +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright 2020 Eike K. & Contributors - * - * SPDX-License-Identifier: AGPL-3.0-or-later - */ - -package docspell.store.queue - -import cats.effect._ -import cats.implicits._ - -import docspell.common._ -import docspell.store.Store -import docspell.store.queries.QJob -import docspell.store.records.RJob - -trait JobQueue[F[_]] { - - /** Inserts the job into the queue to get picked up as soon as possible. The job must - * have a new unique id. - */ - def insert(job: RJob): F[Unit] - - /** Inserts the job into the queue only, if there is no job with the same tracker-id - * running at the moment. The job id must be a new unique id. - * - * If the job has no tracker defined, it is simply inserted. - */ - def insertIfNew(job: RJob): F[Boolean] - - def insertAll(jobs: Seq[RJob]): F[List[Boolean]] - - def insertAllIfNew(jobs: Seq[RJob]): F[List[Boolean]] - - def nextJob( - prio: Ident => F[Priority], - worker: Ident, - retryPause: Duration - ): F[Option[RJob]] -} - -object JobQueue { - def apply[F[_]: Async](store: Store[F]): Resource[F, JobQueue[F]] = - Resource.pure[F, JobQueue[F]](new JobQueue[F] { - private[this] val logger = docspell.logging.getLogger[F] - - def nextJob( - prio: Ident => F[Priority], - worker: Ident, - retryPause: Duration - ): F[Option[RJob]] = - logger - .trace("Select next job") *> QJob.takeNextJob(store)(prio, worker, retryPause) - - def insert(job: RJob): F[Unit] = - store - .transact(RJob.insert(job)) - .flatMap { n => - if (n != 1) - Async[F] - .raiseError(new Exception(s"Inserting job failed. Update count: $n")) - else ().pure[F] - } - - def insertIfNew(job: RJob): F[Boolean] = - for { - rj <- job.tracker match { - case Some(tid) => - store.transact(RJob.findNonFinalByTracker(tid)) - case None => - None.pure[F] - } - ret <- - if (rj.isDefined) false.pure[F] - else insert(job).as(true) - } yield ret - - def insertAll(jobs: Seq[RJob]): F[List[Boolean]] = - jobs.toList - .traverse(j => insert(j).attempt) - .flatMap(_.traverse { - case Right(()) => true.pure[F] - case Left(ex) => - logger.error(ex)("Could not insert job. Skipping it.").as(false) - - }) - - def insertAllIfNew(jobs: Seq[RJob]): F[List[Boolean]] = - jobs.toList - .traverse(j => insertIfNew(j).attempt) - .flatMap(_.traverse { - case Right(true) => true.pure[F] - case Right(false) => false.pure[F] - case Left(ex) => - logger.error(ex)("Could not insert job. Skipping it.").as(false) - }) - }) -} diff --git a/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala b/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala index 8263e875..fb26272d 100644 --- a/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala +++ b/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala @@ -8,8 +8,10 @@ package docspell.store.records import cats.data.NonEmptyList import cats.implicits._ +import fs2.Stream import docspell.common.{FileKey, _} +import docspell.store.file.BinnyUtils import docspell.store.qb.DSL._ import docspell.store.qb._ @@ -44,6 +46,18 @@ object RFileMeta { def as(alias: String): Table = Table(Some(alias)) + def findAll(part: FileKeyPart, chunkSize: Int): Stream[ConnectionIO, RFileMeta] = { + val cond = BinnyUtils + .fileKeyPartToPrefix(part) + .map(prefix => T.id.cast[String].like(prefix)) + + Select( + select(T.all), + from(T), + cond.getOrElse(Condition.unit) + ).build.query[RFileMeta].streamWithChunkSize(chunkSize) + } + def insert(r: RFileMeta): ConnectionIO[Int] = DML.insert(T, T.all, fr"${r.id},${r.created},${r.mimetype},${r.length},${r.checksum}") diff --git a/modules/store/src/main/scala/docspell/store/records/RJob.scala b/modules/store/src/main/scala/docspell/store/records/RJob.scala index 89852623..4804232b 100644 --- a/modules/store/src/main/scala/docspell/store/records/RJob.scala +++ b/modules/store/src/main/scala/docspell/store/records/RJob.scala @@ -48,22 +48,22 @@ case class RJob( object RJob { - def newJob[A]( + def newJob( id: Ident, task: Ident, group: Ident, - args: A, + args: String, subject: String, submitted: Timestamp, submitter: Ident, priority: Priority, tracker: Option[Ident] - )(implicit E: Encoder[A]): RJob = + ): RJob = RJob( id, task, group, - E(args).noSpaces, + args, subject, submitted, submitter, @@ -77,6 +77,29 @@ object RJob { None ) + def fromJson[A]( + id: Ident, + task: Ident, + group: Ident, + args: A, + subject: String, + submitted: Timestamp, + submitter: Ident, + priority: Priority, + tracker: Option[Ident] + )(implicit E: Encoder[A]): RJob = + newJob( + id, + task, + group, + E(args).noSpaces, + subject, + submitted, + submitter, + priority, + tracker + ) + final case class Table(alias: Option[String]) extends TableDef { val tableName = "job" diff --git a/modules/store/src/main/scala/docspell/store/records/RPeriodicTask.scala b/modules/store/src/main/scala/docspell/store/records/RPeriodicTask.scala index d0e27f07..5be5ca08 100644 --- a/modules/store/src/main/scala/docspell/store/records/RPeriodicTask.scala +++ b/modules/store/src/main/scala/docspell/store/records/RPeriodicTask.scala @@ -13,12 +13,10 @@ import cats.implicits._ import docspell.common._ import docspell.store.qb.DSL._ import docspell.store.qb._ -import docspell.store.usertask.UserTaskScope import com.github.eikek.calev.CalEvent import doobie._ import doobie.implicits._ -import io.circe.Encoder /** A periodic task is a special job description, that shares a few properties of a * `RJob`. It must provide all information to create a `RJob` value eventually. @@ -65,66 +63,6 @@ case class RPeriodicTask( object RPeriodicTask { - def create[F[_]: Sync]( - enabled: Boolean, - scope: UserTaskScope, - task: Ident, - args: String, - subject: String, - priority: Priority, - timer: CalEvent, - summary: Option[String] - ): F[RPeriodicTask] = - Ident - .randomId[F] - .flatMap(id => - Timestamp - .current[F] - .map { now => - RPeriodicTask( - id, - enabled, - task, - scope.collective, - args, - subject, - scope.fold(_.user, identity), - priority, - None, - None, - timer, - timer - .nextElapse(now.atZone(Timestamp.UTC)) - .map(_.toInstant) - .map(Timestamp.apply) - .getOrElse(Timestamp.Epoch), - now, - summary - ) - } - ) - - def createJson[F[_]: Sync, A]( - enabled: Boolean, - scope: UserTaskScope, - task: Ident, - args: A, - subject: String, - priority: Priority, - timer: CalEvent, - summary: Option[String] - )(implicit E: Encoder[A]): F[RPeriodicTask] = - create[F]( - enabled, - scope, - task, - E(args).noSpaces, - subject, - priority, - timer, - summary - ) - final case class Table(alias: Option[String]) extends TableDef { val tableName = "periodic_task" diff --git a/modules/store/src/main/scala/docspell/store/usertask/UserTask.scala b/modules/store/src/main/scala/docspell/store/usertask/UserTask.scala deleted file mode 100644 index 13685aca..00000000 --- a/modules/store/src/main/scala/docspell/store/usertask/UserTask.scala +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2020 Eike K. & Contributors - * - * SPDX-License-Identifier: AGPL-3.0-or-later - */ - -package docspell.store.usertask - -import cats.effect._ -import cats.implicits._ - -import docspell.common._ -import docspell.common.syntax.all._ -import docspell.store.records.RPeriodicTask - -import com.github.eikek.calev.CalEvent -import io.circe.Decoder -import io.circe.Encoder - -case class UserTask[A]( - id: Ident, - name: Ident, - enabled: Boolean, - timer: CalEvent, - summary: Option[String], - args: A -) { - - def encode(implicit E: Encoder[A]): UserTask[String] = - copy(args = E(args).noSpaces) - - def withArgs[B](newArgs: B): UserTask[B] = - copy(args = newArgs) - - def mapArgs[B](f: A => B): UserTask[B] = - withArgs(f(args)) -} - -object UserTask { - - implicit final class UserTaskCodec(ut: UserTask[String]) { - - def decode[A](implicit D: Decoder[A]): Either[String, UserTask[A]] = - ut.args - .parseJsonAs[A] - .left - .map(_.getMessage) - .map(a => ut.copy(args = a)) - - def toPeriodicTask[F[_]: Sync]( - scope: UserTaskScope, - subject: Option[String] - ): F[RPeriodicTask] = - RPeriodicTask - .create[F]( - ut.enabled, - scope, - ut.name, - ut.args, - subject.getOrElse(s"${scope.fold(_.user.id, _.id)}: ${ut.name.id}"), - Priority.Low, - ut.timer, - ut.summary - ) - .map(r => r.copy(id = ut.id)) - } -} diff --git a/modules/store/src/main/scala/docspell/store/usertask/UserTaskStore.scala b/modules/store/src/main/scala/docspell/store/usertask/UserTaskStore.scala deleted file mode 100644 index 96a08351..00000000 --- a/modules/store/src/main/scala/docspell/store/usertask/UserTaskStore.scala +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright 2020 Eike K. & Contributors - * - * SPDX-License-Identifier: AGPL-3.0-or-later - */ - -package docspell.store.usertask - -import cats.data.OptionT -import cats.effect._ -import cats.implicits._ -import fs2.Stream - -import docspell.common._ -import docspell.store.queries.QUserTask -import docspell.store.{AddResult, Store} - -import io.circe._ - -/** User tasks are `RPeriodicTask`s that can be managed by the user. The user can change - * arguments, enable/disable it or run it just once. - * - * This class defines methods at a higher level, dealing with `UserTask` and - * `UserTaskScope` instead of directly using `RPeriodicTask`. A user task is associated - * to a specific user (not just the collective). But it can be associated to the whole - * collective by using the collective as submitter, too. This is abstracted in - * `UserTaskScope`. - * - * implNote: The mapping is as follows: The collective is the task group. The submitter - * property contains the username. Once a task is saved to the database, it can only be - * referenced uniquely by its id. A user may submit multiple same tasks (with different - * properties). - */ -trait UserTaskStore[F[_]] { - - /** Return all tasks of the given user. */ - def getAll(scope: UserTaskScope): Stream[F, UserTask[String]] - - /** Return all tasks of the given name and user. The task's arguments are returned as - * stored in the database. - */ - def getByNameRaw(scope: UserTaskScope, name: Ident): Stream[F, UserTask[String]] - - /** Return all tasks of the given name and user. The task's arguments are decoded using - * the given json decoder. - */ - def getByName[A](scope: UserTaskScope, name: Ident)(implicit - D: Decoder[A] - ): Stream[F, UserTask[A]] - - /** Return a user-task with the given id. */ - def getByIdRaw(scope: UserTaskScope, id: Ident): OptionT[F, UserTask[String]] - - /** Updates or inserts the given task. - * - * The task is identified by its id. If no task with this id exists, a new one is - * created. Otherwise the existing task is updated. - */ - def updateTask[A](scope: UserTaskScope, subject: Option[String], ut: UserTask[A])( - implicit E: Encoder[A] - ): F[Int] - - /** Delete the task with the given id of the given user. */ - def deleteTask(scope: UserTaskScope, id: Ident): F[Int] - - /** Return the task of the given user and name. If multiple exists, an error is - * returned. The task's arguments are returned as stored in the database. - */ - def getOneByNameRaw(scope: UserTaskScope, name: Ident): OptionT[F, UserTask[String]] - - /** Return the task of the given user and name. If multiple exists, an error is - * returned. The task's arguments are decoded using the given json decoder. - */ - def getOneByName[A](scope: UserTaskScope, name: Ident)(implicit - D: Decoder[A] - ): OptionT[F, UserTask[A]] - - /** Updates or inserts the given task. - * - * Unlike `updateTask`, this ensures that there is at most one task of some name in the - * db. Multiple same tasks (task with same name) may not be allowed to run, depending - * on what they do. This is not ensured by the database, though. The task is identified - * by task name, submitter and group. - * - * If there are currently multiple tasks with same name as `ut` for the user `account`, - * they will all be removed and the given task inserted! - */ - def updateOneTask[A](scope: UserTaskScope, subject: Option[String], ut: UserTask[A])( - implicit E: Encoder[A] - ): F[UserTask[String]] - - /** Delete all tasks of the given user that have name `name'. */ - def deleteAll(scope: UserTaskScope, name: Ident): F[Int] -} - -object UserTaskStore { - - def apply[F[_]: Async](store: Store[F]): Resource[F, UserTaskStore[F]] = - Resource.pure[F, UserTaskStore[F]](new UserTaskStore[F] { - - def getAll(scope: UserTaskScope): Stream[F, UserTask[String]] = - store.transact(QUserTask.findAll(scope.toAccountId)) - - def getByNameRaw(scope: UserTaskScope, name: Ident): Stream[F, UserTask[String]] = - store.transact(QUserTask.findByName(scope.toAccountId, name)) - - def getByIdRaw(scope: UserTaskScope, id: Ident): OptionT[F, UserTask[String]] = - OptionT(store.transact(QUserTask.findById(scope.toAccountId, id))) - - def getByName[A](scope: UserTaskScope, name: Ident)(implicit - D: Decoder[A] - ): Stream[F, UserTask[A]] = - getByNameRaw(scope, name).flatMap(_.decode match { - case Right(ua) => Stream.emit(ua) - case Left(err) => Stream.raiseError[F](new Exception(err)) - }) - - def updateTask[A](scope: UserTaskScope, subject: Option[String], ut: UserTask[A])( - implicit E: Encoder[A] - ): F[Int] = { - val exists = QUserTask.exists(ut.id) - val insert = QUserTask.insert(scope, subject, ut.encode) - store.add(insert, exists).flatMap { - case AddResult.Success => - 1.pure[F] - case AddResult.EntityExists(_) => - store.transact(QUserTask.update(scope, subject, ut.encode)) - case AddResult.Failure(ex) => - Async[F].raiseError(ex) - } - } - - def deleteTask(scope: UserTaskScope, id: Ident): F[Int] = - store.transact(QUserTask.delete(scope.toAccountId, id)) - - def getOneByNameRaw( - scope: UserTaskScope, - name: Ident - ): OptionT[F, UserTask[String]] = - OptionT( - getByNameRaw(scope, name) - .take(2) - .compile - .toList - .flatMap { - case Nil => (None: Option[UserTask[String]]).pure[F] - case ut :: Nil => ut.some.pure[F] - case _ => Async[F].raiseError(new Exception("More than one result found")) - } - ) - - def getOneByName[A](scope: UserTaskScope, name: Ident)(implicit - D: Decoder[A] - ): OptionT[F, UserTask[A]] = - getOneByNameRaw(scope, name) - .semiflatMap(_.decode match { - case Right(ua) => ua.pure[F] - case Left(err) => Async[F].raiseError(new Exception(err)) - }) - - def updateOneTask[A]( - scope: UserTaskScope, - subject: Option[String], - ut: UserTask[A] - )(implicit - E: Encoder[A] - ): F[UserTask[String]] = - getByNameRaw(scope, ut.name).compile.toList.flatMap { - case a :: rest => - val task = ut.copy(id = a.id).encode - for { - _ <- store.transact(QUserTask.update(scope, subject, task)) - _ <- store.transact( - rest.traverse(t => QUserTask.delete(scope.toAccountId, t.id)) - ) - } yield task - case Nil => - val task = ut.encode - store.transact(QUserTask.insert(scope, subject, task)).map(_ => task) - } - - def deleteAll(scope: UserTaskScope, name: Ident): F[Int] = - store.transact(QUserTask.deleteAll(scope.toAccountId, name)) - }) - -} diff --git a/modules/store/src/test/scala/docspell/store/StoreFixture.scala b/modules/store/src/test/scala/docspell/store/StoreFixture.scala index 91441701..2933b51f 100644 --- a/modules/store/src/test/scala/docspell/store/StoreFixture.scala +++ b/modules/store/src/test/scala/docspell/store/StoreFixture.scala @@ -11,7 +11,7 @@ import javax.sql.DataSource import cats.effect._ import docspell.common.LenientUri -import docspell.store.file.FileRepository +import docspell.store.file.{FileRepository, FileRepositoryConfig} import docspell.store.impl.StoreImpl import docspell.store.migrate.FlywayMigrate @@ -67,8 +67,9 @@ object StoreFixture { for { ds <- dataSource(jdbc) xa <- makeXA(ds) - fr = FileRepository.genericJDBC[IO](xa, ds, 64 * 1024) - store = new StoreImpl[IO](fr, jdbc, xa) + cfg = FileRepositoryConfig.Database(64 * 1024) + fr = FileRepository[IO](xa, ds, cfg, true) + store = new StoreImpl[IO](fr, jdbc, ds, xa) _ <- Resource.eval(store.migrate) } yield store } diff --git a/modules/store/src/test/scala/docspell/store/queries/QJobTest.scala b/modules/store/src/test/scala/docspell/store/queries/QJobTest.scala index 8c60f240..adf6facd 100644 --- a/modules/store/src/test/scala/docspell/store/queries/QJobTest.scala +++ b/modules/store/src/test/scala/docspell/store/queries/QJobTest.scala @@ -30,7 +30,7 @@ class QJobTest extends CatsEffectSuite with StoreFixture with TestLoggingConfig private val group2 = Ident.unsafe("group2") def createJob(group: Ident): RJob = - RJob.newJob[Unit]( + RJob.fromJson[Unit]( Ident.unsafe(s"job-${c.incrementAndGet()}"), Ident.unsafe("task"), group, diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 34d7f735..a9093d36 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -307,7 +307,8 @@ object Dependencies { val binny = Seq( "com.github.eikek" %% "binny-core" % BinnyVersion, "com.github.eikek" %% "binny-jdbc" % BinnyVersion, - "com.github.eikek" %% "binny-minio" % BinnyVersion + "com.github.eikek" %% "binny-minio" % BinnyVersion, + "com.github.eikek" %% "binny-fs" % BinnyVersion ) // https://github.com/flyway/flyway diff --git a/website/site/content/docs/configure/_index.md b/website/site/content/docs/configure/_index.md index b57cf1ea..4c09fce3 100644 --- a/website/site/content/docs/configure/_index.md +++ b/website/site/content/docs/configure/_index.md @@ -160,6 +160,22 @@ enabled by providing a secret: This secret must be provided to all requests to a `/api/v1/admin/` endpoint. +The most convenient way to execute admin tasks is to use the +[cli](@/docs/tools/cli.md). You get a list of possible admin commands +via `dsc admin help`. + +To see the output of the commands, there are these ways: + +1. looking at the joex logs, which gives most details. +2. Use the job-queue page when logged in as `docspell-system` +3. setup a [webhook](@/docs/webapp/notification.md) to be notified + when a job finishes. This way you get a small message. + +All admin tasks (and also some other system tasks) are run under the +account `docspell-system` (collective and user). You need to create +this account and setup the notification hooks in there - not in your +normal account. + ## Full-Text Search: SOLR @@ -204,6 +220,12 @@ a call: $ curl -XPOST -H "Docspell-Admin-Secret: test123" http://localhost:7880/api/v1/admin/fts/reIndexAll ``` +or use the [cli](@/docs/tools/cli.md): + +```bash +dsc admin -a test123 recreate-index +``` + Here the `test123` is the key defined with `admin-endpoint.secret`. If it is empty (the default), this call is disabled (all admin routes). Otherwise, the POST request will submit a system task that is executed @@ -445,6 +467,147 @@ If you find that these methods do not suffice for your case, please open an issue. +## File Backends + +Docspell allows to choose from different storage backends for binary +files. You can choose between: + +1. *Database (the recommended default)* + + The database can be used to store the files as well. It is the + default. It doesn't require any other configuration and works well + with multiple instances of restservers and joex nodes. +2. *S3* + + The S3 backend allows to store files in an S3 compatible storage. + It was tested with MinIO, which is possible to self host. + +3. *Filesystem* + + The filesystem can also be used directly, by specifying a + directory. Be aware that _all_ nodes must have read and write + access into this directory! When running multiple nodes over a + network, consider using one of the above instead. Docspell uses a + fixed structure for storing the files below the given directory, it + cannot be configured. + +When using S3 or filesystem, remember to backup the database *and* the +files! + +Note that Docspell not only stores the file that are uploaded, but +also some other files for internal use. + +### Configuring + +{% warningbubble(title="Note") %} + +Each node must have the same config for its file backend! When using +the filesystem, make sure all processes can access the directory with +read and write permissions. + +{% end %} + +The file storage backend can be configured inside the `files` section +(see the default configs below): + +```conf +files { + … + default-store = "database" + + stores = { + database = + { enabled = true + type = "default-database" + } + + filesystem = + { enabled = false + type = "file-system" + directory = "/some/directory" + } + + minio = + { enabled = false + type = "s3" + endpoint = "http://localhost:9000" + access-key = "username" + secret-key = "password" + bucket = "docspell" + } + } +} +``` + +The `stores` object defines a set of stores and the `default-store` +selects the one that should be used. All disabled store configurations +are removed from the list. Thus the `default-store` must be enabled. +Other enabled stores can be used as the target when copying files (see +below). + +A store configuration requires a `enabled` and `type` property. +Depending on the `type` property, other properties are required, they +are presented above. The available storage types are +`default-database`, `file-system` and `s3`. + +If you use the docker setup, you can find the corresponding +environment variables to the above config snippet +[below](#environment-variables). + +### Change Backends + +It is possible to change backends with a bit of manual effort. When +doing this, please make sure that the application is not used. It is +important that no file is uploaded during the following steps. + +The [cli](@/docs/tools/cli.md) will be used, please set it up first +and you need to enable the [admin endpoint](#admin-endpoint). Config +changes mentioned here must be applied to all nodes - joex and +restserver! + +1. In the config, enable a second file backend (besides the default) + you want to change to and start docspell as normal. Don't change + `default-store` yet. +2. Run the file integrity check in order to see whether all files are + ok as they are in the current store. This can be done using the + [cli](@/docs/tools/cli.md) by running: + + ```bash + dsc admin file-integrity-check + ``` +3. Run the copy files admin command which will copy all files from the + current `default-store` to all other enabled stores. + + ```bash + dsc admin clone-file-repository + ``` + + And wait until it's done :-). You can see the progress in the jobs + page when logged in as `docspell-system` or just look at the logs. +4. In the config, change the `default-store` to the one you just + copied all the files to and restart docspell. +5. Login and do some smoke tests. Then run the file integrity check + again: + + ```bash + dsc admin file-integrity-check + ``` + +If all is fine, then you are done and are now using the new file +backend. If the second integrity check fails, please open an issue. +You need then to revert the config change of step 4 to use the +previous `default-store` again. + +If you want to delete the files from the database, you can do so by +running the following SQL against the database: + +```sql +DELETE FROM filechunk +``` + +You can copy them back into the database using the steps above. + + ## File Processing Files are being processed by the joex component. So all the respective @@ -517,9 +680,14 @@ setting has significant impact, especially when your documents are in German. Here are some rough numbers on jvm heap usage (the same file was used for all tries): -<table class="table is-hoverable is-striped"> +<table class="striped-basic"> <thead> - <tr><th>nlp.mode</th><th>English</th><th>German</th><th>French</th></tr> + <tr> + <th>nlp.mode</th> + <th>English</th> + <th>German</th> + <th>French</th> + </tr> </thead> <tfoot> </tfoot> diff --git a/website/styles/content.css b/website/styles/content.css index e936f10e..0e2ffac6 100644 --- a/website/styles/content.css +++ b/website/styles/content.css @@ -3,6 +3,13 @@ @apply leading-relaxed text-left; } + .content table.striped-basic tbody tr { + @apply border-t dark:border-stone-600; + } + .content table { + @apply w-full my-2 px-4; + } + .content h1:not(.no-default) { @apply text-4xl font-serif font-bold mt-6 mb-3 py-1 border-b dark:border-stone-800 text-stone-700 dark:text-stone-200; }