From cd3db6ea08bea5c9562cf99c3cff03570b6716e1 Mon Sep 17 00:00:00 2001 From: eikek Date: Sat, 12 Mar 2022 12:06:36 +0100 Subject: [PATCH] Run file integrity check in house keeping tasks --- .../joex/src/main/resources/reference.conf | 5 ++ .../scala/docspell/joex/JoexAppImpl.scala | 2 +- .../scala/docspell/joex/fts/ReIndexTask.scala | 3 +- .../docspell/joex/hk/CheckNodesTask.scala | 9 ++-- .../docspell/joex/hk/CleanupInvitesTask.scala | 9 ++-- .../docspell/joex/hk/CleanupJobsTask.scala | 9 ++-- .../joex/hk/CleanupRememberMeTask.scala | 10 ++-- .../docspell/joex/hk/CleanupResult.scala | 21 ++++++++ .../docspell/joex/hk/HouseKeepingConfig.scala | 4 +- .../docspell/joex/hk/HouseKeepingTask.scala | 52 ++++++++++++++++--- .../docspell/joex/hk/IntegrityCheckTask.scala | 32 ++++++++++++ .../joex/learn/LearnItemEntities.scala | 2 +- .../docspell/joex/process/ItemHandler.scala | 6 +-- .../docspell/joex/process/ProcessItem.scala | 1 + .../scala/docspell/joex/scheduler/Task.scala | 19 ++++--- 15 files changed, 150 insertions(+), 34 deletions(-) create mode 100644 modules/joex/src/main/scala/docspell/joex/hk/CleanupResult.scala create mode 100644 modules/joex/src/main/scala/docspell/joex/hk/IntegrityCheckTask.scala diff --git a/modules/joex/src/main/resources/reference.conf b/modules/joex/src/main/resources/reference.conf index 444ef5ef..318bdeff 100644 --- a/modules/joex/src/main/resources/reference.conf +++ b/modules/joex/src/main/resources/reference.conf @@ -194,6 +194,11 @@ docspell.joex { # How often the node must be unreachable, before it is removed. min-not-found = 2 } + + # Checks all files against their checksum + integrity-check { + enabled = true + } } # A periodic task to check for new releases of docspell. It can diff --git a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala index fc668f0f..ce34e43c 100644 --- a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala +++ b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala @@ -194,7 +194,7 @@ object JoexAppImpl extends MailAddressCodec { .withTask( JobTask.json( HouseKeepingTask.taskName, - HouseKeepingTask[F](cfg), + HouseKeepingTask[F](cfg, fileRepo), HouseKeepingTask.onCancel[F] ) ) diff --git a/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala b/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala index 37c1326b..7c7f81e0 100644 --- a/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala @@ -7,6 +7,7 @@ package docspell.joex.fts import cats.effect._ +import cats.implicits._ import docspell.backend.fulltext.CreateIndex import docspell.common._ @@ -42,7 +43,7 @@ object ReIndexTask { (collective match { case Some(_) => FtsWork - .clearIndex(collective) + .clearIndex[F](collective) .recoverWith( FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing.")) ) ++ diff --git a/modules/joex/src/main/scala/docspell/joex/hk/CheckNodesTask.scala b/modules/joex/src/main/scala/docspell/joex/hk/CheckNodesTask.scala index 8e0bdb1c..09651893 100644 --- a/modules/joex/src/main/scala/docspell/joex/hk/CheckNodesTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/hk/CheckNodesTask.scala @@ -18,10 +18,9 @@ import org.http4s.blaze.client.BlazeClientBuilder import org.http4s.client.Client object CheckNodesTask { - def apply[F[_]: Async]( cfg: HouseKeepingConfig.CheckNodes - ): Task[F, Unit, Unit] = + ): Task[F, Unit, CleanupResult] = Task { ctx => if (cfg.enabled) for { @@ -35,9 +34,11 @@ object CheckNodesTask { ) n <- removeNodes(ctx, cfg) _ <- ctx.logger.info(s"Removed $n nodes") - } yield () + } yield CleanupResult.of(n) else - ctx.logger.info("CheckNodes task is disabled in the configuration") + ctx.logger.info("CheckNodes task is disabled in the configuration") *> + CleanupResult.disabled.pure[F] + } def checkNodes[F[_]: Async](ctx: Context[F, _], client: Client[F]): F[Unit] = diff --git a/modules/joex/src/main/scala/docspell/joex/hk/CleanupInvitesTask.scala b/modules/joex/src/main/scala/docspell/joex/hk/CleanupInvitesTask.scala index ae7b59e8..de23eb71 100644 --- a/modules/joex/src/main/scala/docspell/joex/hk/CleanupInvitesTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/hk/CleanupInvitesTask.scala @@ -15,7 +15,9 @@ import docspell.store.records._ object CleanupInvitesTask { - def apply[F[_]: Sync](cfg: HouseKeepingConfig.CleanupInvites): Task[F, Unit, Unit] = + def apply[F[_]: Sync]( + cfg: HouseKeepingConfig.CleanupInvites + ): Task[F, Unit, CleanupResult] = Task { ctx => if (cfg.enabled) for { @@ -24,8 +26,9 @@ object CleanupInvitesTask { _ <- ctx.logger.info(s"Cleanup invitations older than $ts") n <- ctx.store.transact(RInvitation.deleteOlderThan(ts)) _ <- ctx.logger.info(s"Removed $n invitations") - } yield () + } yield CleanupResult.of(n) else - ctx.logger.info("CleanupInvites task is disabled in the configuration") + ctx.logger.info("CleanupInvites task is disabled in the configuration") *> + CleanupResult.disabled.pure[F] } } diff --git a/modules/joex/src/main/scala/docspell/joex/hk/CleanupJobsTask.scala b/modules/joex/src/main/scala/docspell/joex/hk/CleanupJobsTask.scala index 4ae1c9e1..67a1e1a2 100644 --- a/modules/joex/src/main/scala/docspell/joex/hk/CleanupJobsTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/hk/CleanupJobsTask.scala @@ -17,7 +17,9 @@ import docspell.store.records._ object CleanupJobsTask { - def apply[F[_]: Sync](cfg: HouseKeepingConfig.CleanupJobs): Task[F, Unit, Unit] = + def apply[F[_]: Sync]( + cfg: HouseKeepingConfig.CleanupJobs + ): Task[F, Unit, CleanupResult] = Task { ctx => if (cfg.enabled) for { @@ -26,9 +28,10 @@ object CleanupJobsTask { _ <- ctx.logger.info(s"Cleanup jobs older than $ts") n <- deleteDoneJobs(ctx.store, ts, cfg.deleteBatch) _ <- ctx.logger.info(s"Removed $n jobs") - } yield () + } yield CleanupResult.of(n) else - ctx.logger.info("CleanupJobs task is disabled in the configuration") + ctx.logger.info("CleanupJobs task is disabled in the configuration") *> + CleanupResult.disabled.pure[F] } def deleteDoneJobs[F[_]: Sync](store: Store[F], ts: Timestamp, batch: Int): F[Int] = diff --git a/modules/joex/src/main/scala/docspell/joex/hk/CleanupRememberMeTask.scala b/modules/joex/src/main/scala/docspell/joex/hk/CleanupRememberMeTask.scala index b9cfdc98..120b8cc8 100644 --- a/modules/joex/src/main/scala/docspell/joex/hk/CleanupRememberMeTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/hk/CleanupRememberMeTask.scala @@ -14,8 +14,9 @@ import docspell.joex.scheduler.Task import docspell.store.records._ object CleanupRememberMeTask { - - def apply[F[_]: Sync](cfg: HouseKeepingConfig.CleanupRememberMe): Task[F, Unit, Unit] = + def apply[F[_]: Sync]( + cfg: HouseKeepingConfig.CleanupRememberMe + ): Task[F, Unit, CleanupResult] = Task { ctx => if (cfg.enabled) for { @@ -24,8 +25,9 @@ object CleanupRememberMeTask { _ <- ctx.logger.info(s"Cleanup remember-me tokens older than $ts") n <- ctx.store.transact(RRememberMe.deleteOlderThan(ts)) _ <- ctx.logger.info(s"Removed $n tokens") - } yield () + } yield CleanupResult.of(n) else - ctx.logger.info("CleanupRememberMe task is disabled in the configuration") + ctx.logger.info("CleanupRememberMe task is disabled in the configuration") *> + CleanupResult.disabled.pure[F] } } diff --git a/modules/joex/src/main/scala/docspell/joex/hk/CleanupResult.scala b/modules/joex/src/main/scala/docspell/joex/hk/CleanupResult.scala new file mode 100644 index 00000000..931f6714 --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/hk/CleanupResult.scala @@ -0,0 +1,21 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.joex.hk + +import io.circe.Encoder +import io.circe.generic.semiauto.deriveEncoder + +case class CleanupResult(removed: Int, disabled: Boolean) { + def asString = if (disabled) "disabled" else s"$removed" +} +object CleanupResult { + def of(n: Int): CleanupResult = CleanupResult(n, false) + def disabled: CleanupResult = CleanupResult(0, true) + + implicit val jsonEncoder: Encoder[CleanupResult] = + deriveEncoder +} diff --git a/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingConfig.scala b/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingConfig.scala index 0d6b4778..2b5bc355 100644 --- a/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingConfig.scala +++ b/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingConfig.scala @@ -16,7 +16,8 @@ case class HouseKeepingConfig( cleanupInvites: CleanupInvites, cleanupJobs: CleanupJobs, cleanupRememberMe: CleanupRememberMe, - checkNodes: CheckNodes + checkNodes: CheckNodes, + integrityCheck: IntegrityCheck ) object HouseKeepingConfig { @@ -29,4 +30,5 @@ object HouseKeepingConfig { case class CheckNodes(enabled: Boolean, minNotFound: Int) + case class IntegrityCheck(enabled: Boolean) } diff --git a/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingTask.scala b/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingTask.scala index 20b1b2e6..c0c3d084 100644 --- a/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingTask.scala @@ -8,26 +8,41 @@ package docspell.joex.hk import cats.effect._ import cats.implicits._ + +import docspell.backend.ops.OFileRepository import docspell.common._ import docspell.joex.Config -import docspell.joex.scheduler.Task +import docspell.joex.filecopy.FileIntegrityCheckTask +import docspell.joex.scheduler.{JobTaskResultEncoder, Task} import docspell.store.records._ import docspell.store.usertask.UserTaskScope + import com.github.eikek.calev._ -import docspell.backend.ops.OFileRepository +import io.circe.Encoder +import io.circe.generic.semiauto.deriveEncoder object HouseKeepingTask { private val periodicId = Ident.unsafe("docspell-houskeeping") val taskName: Ident = Ident.unsafe("housekeeping") - def apply[F[_]: Async](cfg: Config, fileRepo: OFileRepository[F]): Task[F, Unit, Unit] = + def apply[F[_]: Async]( + cfg: Config, + fileRepo: OFileRepository[F] + ): Task[F, Unit, Result] = { + val combined = + ( + CheckNodesTask(cfg.houseKeeping.checkNodes), + CleanupInvitesTask(cfg.houseKeeping.cleanupInvites), + CleanupJobsTask(cfg.houseKeeping.cleanupJobs), + CleanupRememberMeTask(cfg.houseKeeping.cleanupRememberMe), + IntegrityCheckTask(cfg.houseKeeping.integrityCheck, fileRepo) + ).mapN(Result.apply) + Task .log[F, Unit](_.info(s"Running house-keeping task now")) - .flatMap(_ => CleanupInvitesTask(cfg.houseKeeping.cleanupInvites)) - .flatMap(_ => CleanupRememberMeTask(cfg.houseKeeping.cleanupRememberMe)) - .flatMap(_ => CleanupJobsTask(cfg.houseKeeping.cleanupJobs)) - .flatMap(_ => CheckNodesTask(cfg.houseKeeping.checkNodes)) + .flatMap(_ => combined) + } def onCancel[F[_]]: Task[F, Unit, Unit] = Task.log[F, Unit](_.warn("Cancelling house-keeping task")) @@ -45,4 +60,27 @@ object HouseKeepingTask { None ) .map(_.copy(id = periodicId)) + + case class Result( + checkNodes: CleanupResult, + cleanupInvites: CleanupResult, + cleanupJobs: CleanupResult, + cleanupRememberMe: CleanupResult, + integrityCheck: FileIntegrityCheckTask.Result + ) + + object Result { + implicit val jsonEncoder: Encoder[Result] = + deriveEncoder + + implicit val jobTaskResultEncoder: JobTaskResultEncoder[Result] = + JobTaskResultEncoder.fromJson[Result].withMessage { r => + s"- Nodes removed: ${r.checkNodes.asString}\n" + + s"- Invites removed: ${r.cleanupInvites.asString}\n" + + s"- Jobs removed: ${r.cleanupJobs.asString}\n" + + s"- RememberMe removed: ${r.cleanupRememberMe.asString}\n" + + s"- Integrity check: ok=${r.integrityCheck.ok}, failed=${r.integrityCheck.failedKeys.size}, notFound=${r.integrityCheck.notFoundKeys.size}" + } + + } } diff --git a/modules/joex/src/main/scala/docspell/joex/hk/IntegrityCheckTask.scala b/modules/joex/src/main/scala/docspell/joex/hk/IntegrityCheckTask.scala new file mode 100644 index 00000000..42bb3333 --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/hk/IntegrityCheckTask.scala @@ -0,0 +1,32 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.joex.hk + +import cats.effect._ +import cats.implicits._ + +import docspell.backend.ops.OFileRepository +import docspell.common._ +import docspell.joex.filecopy.FileIntegrityCheckTask +import docspell.joex.scheduler.Task + +object IntegrityCheckTask { + + def apply[F[_]: Sync]( + cfg: HouseKeepingConfig.IntegrityCheck, + fileRepo: OFileRepository[F] + ): Task[F, Unit, FileIntegrityCheckTask.Result] = + Task { ctx => + if (cfg.enabled) + FileIntegrityCheckTask(fileRepo).run( + ctx.map(_ => FileIntegrityCheckArgs(FileKeyPart.Empty)) + ) + else + ctx.logger.info("Integrity check task is disabled in the configuration") *> + FileIntegrityCheckTask.Result.empty.pure[F] + } +} diff --git a/modules/joex/src/main/scala/docspell/joex/learn/LearnItemEntities.scala b/modules/joex/src/main/scala/docspell/joex/learn/LearnItemEntities.scala index b0d4f181..9fc1b502 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/LearnItemEntities.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/LearnItemEntities.scala @@ -23,7 +23,7 @@ object LearnItemEntities { maxItems: Int, maxTextLen: Int ): Task[F, A, Unit] = - learnCorrOrg(analyser, collective, maxItems, maxTextLen) + learnCorrOrg[F, A](analyser, collective, maxItems, maxTextLen) .flatMap(_ => learnCorrPerson[F, A](analyser, collective, maxItems, maxTextLen)) .flatMap(_ => learnConcPerson(analyser, collective, maxItems, maxTextLen)) .flatMap(_ => learnConcEquip(analyser, collective, maxItems, maxTextLen)) diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala index fa1774e0..5da1e6d6 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala @@ -25,7 +25,7 @@ object ItemHandler { type Args = ProcessItemArgs def onCancel[F[_]: Sync]: Task[F, Args, Unit] = - logWarn("Now cancelling.").flatMap(_ => + logWarn[F]("Now cancelling.").flatMap(_ => markItemCreated.flatMap { case true => Task.pure(()) @@ -41,10 +41,10 @@ object ItemHandler { analyser: TextAnalyser[F], regexNer: RegexNerFile[F] ): Task[F, Args, Option[ItemData]] = - logBeginning.flatMap(_ => + logBeginning[F].flatMap(_ => DuplicateCheck[F] .flatMap(args => - if (args.files.isEmpty) logNoFiles.map(_ => None) + if (args.files.isEmpty) logNoFiles[F].map(_ => None) else { val create: Task[F, Args, ItemData] = CreateItem[F].contramap(_ => args.pure[F]) diff --git a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala index 18cb7a31..735d13be 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala @@ -7,6 +7,7 @@ package docspell.joex.process import cats.effect._ +import cats.implicits._ import docspell.analysis.TextAnalyser import docspell.backend.ops.OItem diff --git a/modules/joex/src/main/scala/docspell/joex/scheduler/Task.scala b/modules/joex/src/main/scala/docspell/joex/scheduler/Task.scala index d211d5a0..4ba4d84d 100644 --- a/modules/joex/src/main/scala/docspell/joex/scheduler/Task.scala +++ b/modules/joex/src/main/scala/docspell/joex/scheduler/Task.scala @@ -18,12 +18,6 @@ trait Task[F[_], A, B] { def run(ctx: Context[F, A]): F[B] - def map[C](f: B => C)(implicit F: Functor[F]): Task[F, A, C] = - Task(Task.toKleisli(this).map(f)) - - def flatMap[C](f: B => Task[F, A, C])(implicit F: FlatMap[F]): Task[F, A, C] = - Task(Task.toKleisli(this).flatMap(a => Task.toKleisli(f(a)))) - def andThen[C](f: B => F[C])(implicit F: FlatMap[F]): Task[F, A, C] = Task(Task.toKleisli(this).andThen(f)) @@ -62,4 +56,17 @@ object Task { def log[F[_], A](f: Logger[F] => F[Unit]): Task[F, A, Unit] = Task(ctx => f(ctx.logger)) + + implicit def taskMonad[F[_]: Monad, T]: Monad[Task[F, T, *]] = + new Monad[Task[F, T, *]] { + def pure[A](x: A) = Task(_ => Monad[F].pure(x)) + def flatMap[A, B](fa: Task[F, T, A])(f: A => Task[F, T, B]) = + Task(Task.toKleisli(fa).flatMap(a => Task.toKleisli(f(a)))) + + def tailRecM[A, B](a: A)(f: A => Task[F, T, Either[A, B]]) = { + val monadK = Monad[Kleisli[F, Context[F, T], *]] + val r = monadK.tailRecM(a)(x => Task.toKleisli(f(x))) + Task(r) + } + } }