Remove store dependency from scheduler api

This commit is contained in:
eikek
2022-03-13 14:25:09 +01:00
parent 3a05dc56cb
commit 90e52efbff
51 changed files with 614 additions and 482 deletions

View File

@ -530,7 +530,7 @@ val schedulerApi = project
Dependencies.fs2Core ++ Dependencies.fs2Core ++
Dependencies.circeCore Dependencies.circeCore
) )
.dependsOn(loggingApi, common, store, pubsubApi) .dependsOn(loggingApi, common, pubsubApi)
val schedulerImpl = project val schedulerImpl = project
.in(file("modules/scheduler/impl")) .in(file("modules/scheduler/impl"))
@ -540,7 +540,7 @@ val schedulerImpl = project
.settings( .settings(
name := "docspell-scheduler-impl" name := "docspell-scheduler-impl"
) )
.dependsOn(schedulerApi, notificationApi, pubsubApi) .dependsOn(store, schedulerApi, notificationApi, pubsubApi)
val extract = project val extract = project
.in(file("modules/extract")) .in(file("modules/extract"))

View File

@ -149,91 +149,91 @@ object JoexAppImpl extends MailAddressCodec {
.withTask( .withTask(
JobTask.json( JobTask.json(
ProcessItemArgs.taskName, ProcessItemArgs.taskName,
ItemHandler.newItem[F](cfg, itemOps, fts, analyser, regexNer), ItemHandler.newItem[F](cfg,store, itemOps, fts, analyser, regexNer),
ItemHandler.onCancel[F] ItemHandler.onCancel[F](store)
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
ReProcessItemArgs.taskName, ReProcessItemArgs.taskName,
ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer), ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer, store),
ReProcessItem.onCancel[F] ReProcessItem.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
ScanMailboxArgs.taskName, ScanMailboxArgs.taskName,
ScanMailboxTask[F](cfg.userTasks.scanMailbox, javaEmil, upload, joex), ScanMailboxTask[F](cfg.userTasks.scanMailbox, store, javaEmil, upload, joex),
ScanMailboxTask.onCancel[F] ScanMailboxTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
MigrationTask.taskName, MigrationTask.taskName,
MigrationTask[F](cfg.fullTextSearch, fts, createIndex), MigrationTask[F](cfg.fullTextSearch, store, fts, createIndex),
MigrationTask.onCancel[F] MigrationTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
ReIndexTask.taskName, ReIndexTask.taskName,
ReIndexTask[F](cfg.fullTextSearch, fts, createIndex), ReIndexTask[F](cfg.fullTextSearch, store, fts, createIndex),
ReIndexTask.onCancel[F] ReIndexTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
HouseKeepingTask.taskName, HouseKeepingTask.taskName,
HouseKeepingTask[F](cfg, fileRepo), HouseKeepingTask[F](cfg, store, fileRepo),
HouseKeepingTask.onCancel[F] HouseKeepingTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
PdfConvTask.taskName, PdfConvTask.taskName,
PdfConvTask[F](cfg), PdfConvTask[F](cfg, store),
PdfConvTask.onCancel[F] PdfConvTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
ConvertAllPdfArgs.taskName, ConvertAllPdfArgs.taskName,
ConvertAllPdfTask[F](jobStoreModule.jobs, joex), ConvertAllPdfTask[F](jobStoreModule.jobs, joex, store),
ConvertAllPdfTask.onCancel[F] ConvertAllPdfTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
LearnClassifierArgs.taskName, LearnClassifierArgs.taskName,
LearnClassifierTask[F](cfg.textAnalysis, analyser), LearnClassifierTask[F](cfg.textAnalysis, store, analyser),
LearnClassifierTask.onCancel[F] LearnClassifierTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
MakePreviewArgs.taskName, MakePreviewArgs.taskName,
MakePreviewTask[F](cfg.extraction.preview), MakePreviewTask[F](cfg.extraction.preview, store),
MakePreviewTask.onCancel[F] MakePreviewTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
AllPreviewsArgs.taskName, AllPreviewsArgs.taskName,
AllPreviewsTask[F](jobStoreModule.jobs, joex), AllPreviewsTask[F](jobStoreModule.jobs, joex, store),
AllPreviewsTask.onCancel[F] AllPreviewsTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
MakePageCountArgs.taskName, MakePageCountArgs.taskName,
MakePageCountTask[F](), MakePageCountTask[F](store),
MakePageCountTask.onCancel[F] MakePageCountTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
AllPageCountTask.taskName, AllPageCountTask.taskName,
AllPageCountTask[F](jobStoreModule.jobs, joex), AllPageCountTask[F](store, jobStoreModule.jobs, joex),
AllPageCountTask.onCancel[F] AllPageCountTask.onCancel[F]
) )
) )
@ -250,6 +250,7 @@ object JoexAppImpl extends MailAddressCodec {
UpdateCheckTask[F]( UpdateCheckTask[F](
cfg.updateCheck, cfg.updateCheck,
cfg.sendMail, cfg.sendMail,
store,
javaEmil, javaEmil,
updateCheck, updateCheck,
ThisVersion.default ThisVersion.default
@ -260,28 +261,28 @@ object JoexAppImpl extends MailAddressCodec {
.withTask( .withTask(
JobTask.json( JobTask.json(
PeriodicQueryTask.taskName, PeriodicQueryTask.taskName,
PeriodicQueryTask[F](notification), PeriodicQueryTask[F](store, notification),
PeriodicQueryTask.onCancel[F] PeriodicQueryTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
PeriodicDueItemsTask.taskName, PeriodicDueItemsTask.taskName,
PeriodicDueItemsTask[F](notification), PeriodicDueItemsTask[F](store, notification),
PeriodicDueItemsTask.onCancel[F] PeriodicDueItemsTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
FileCopyTaskArgs.taskName, FileCopyTaskArgs.taskName,
FileCopyTask[F](cfg), FileCopyTask[F](cfg, store),
FileCopyTask.onCancel[F] FileCopyTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
FileIntegrityCheckArgs.taskName, FileIntegrityCheckArgs.taskName,
FileIntegrityCheckTask[F](fileRepo), FileIntegrityCheckTask[F](fileRepo, store),
FileIntegrityCheckTask.onCancel[F] FileIntegrityCheckTask.onCancel[F]
) )
) )

View File

@ -17,6 +17,7 @@ import docspell.logging.Logger
import docspell.store.file.{BinnyUtils, FileRepository, FileRepositoryConfig} import docspell.store.file.{BinnyUtils, FileRepository, FileRepositoryConfig}
import binny.CopyTool.Counter import binny.CopyTool.Counter
import binny.{BinaryId, BinaryStore, CopyTool} import binny.{BinaryId, BinaryStore, CopyTool}
import docspell.store.Store
import io.circe.generic.semiauto.deriveCodec import io.circe.generic.semiauto.deriveCodec
import io.circe.{Codec, Decoder, Encoder} import io.circe.{Codec, Decoder, Encoder}
@ -69,7 +70,7 @@ object FileCopyTask {
def onCancel[F[_]]: Task[F, Args, Unit] = def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn(s"Cancelling ${FileCopyTaskArgs.taskName.id} task")) Task.log(_.warn(s"Cancelling ${FileCopyTaskArgs.taskName.id} task"))
def apply[F[_]: Async](cfg: Config): Task[F, Args, CopyResult] = def apply[F[_]: Async](cfg: Config, store: Store[F]): Task[F, Args, CopyResult] =
Task { ctx => Task { ctx =>
val src = ctx.args.from val src = ctx.args.from
.map(id => .map(id =>
@ -93,8 +94,8 @@ object FileCopyTask {
.fromList(targets.filter(_ != srcConfig)) .fromList(targets.filter(_ != srcConfig))
.toRight(CopyResult.noTargetStore) .toRight(CopyResult.noTargetStore)
srcRepo = ctx.store.createFileRepository(srcConfig, true) srcRepo = store.createFileRepository(srcConfig, true)
targetRepos = trgConfig.map(ctx.store.createFileRepository(_, false)) targetRepos = trgConfig.map(store.createFileRepository(_, false))
} yield (srcRepo, targetRepos) } yield (srcRepo, targetRepos)
data match { data match {

View File

@ -13,6 +13,7 @@ import docspell.backend.ops.OFileRepository
import docspell.backend.ops.OFileRepository.IntegrityResult import docspell.backend.ops.OFileRepository.IntegrityResult
import docspell.common.{FileIntegrityCheckArgs, FileKey} import docspell.common.{FileIntegrityCheckArgs, FileKey}
import docspell.scheduler.{JobTaskResultEncoder, Task} import docspell.scheduler.{JobTaskResultEncoder, Task}
import docspell.store.Store
import docspell.store.records.RFileMeta import docspell.store.records.RFileMeta
import io.circe.Encoder import io.circe.Encoder
import io.circe.generic.semiauto.deriveEncoder import io.circe.generic.semiauto.deriveEncoder
@ -54,9 +55,9 @@ object FileIntegrityCheckTask {
} }
} }
def apply[F[_]: Sync](ops: OFileRepository[F]): Task[F, Args, Result] = def apply[F[_]: Sync](ops: OFileRepository[F], store: Store[F]): Task[F, Args, Result] =
Task { ctx => Task { ctx =>
ctx.store store
.transact( .transact(
RFileMeta RFileMeta
.findAll(ctx.args.pattern, 50) .findAll(ctx.args.pattern, 50)

View File

@ -9,25 +9,13 @@ package docspell.joex.fts
import docspell.backend.fulltext.CreateIndex import docspell.backend.fulltext.CreateIndex
import docspell.ftsclient.FtsClient import docspell.ftsclient.FtsClient
import docspell.joex.Config import docspell.joex.Config
import docspell.scheduler.Context
import docspell.logging.Logger import docspell.logging.Logger
import docspell.store.Store import docspell.store.Store
case class FtsContext[F[_]]( final case class FtsContext[F[_]](
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
store: Store[F], store: Store[F],
fulltext: CreateIndex[F], fulltext: CreateIndex[F],
fts: FtsClient[F], fts: FtsClient[F],
logger: Logger[F] logger: Logger[F]
) )
object FtsContext {
def apply[F[_]](
cfg: Config.FullTextSearch,
fts: FtsClient[F],
fulltext: CreateIndex[F],
ctx: Context[F, _]
): FtsContext[F] =
FtsContext(cfg, ctx.store, fulltext, fts, ctx.logger)
}

View File

@ -9,13 +9,13 @@ package docspell.joex.fts
import cats._ import cats._
import cats.data.{Kleisli, NonEmptyList} import cats.data.{Kleisli, NonEmptyList}
import cats.implicits._ import cats.implicits._
import docspell.backend.fulltext.CreateIndex import docspell.backend.fulltext.CreateIndex
import docspell.common._ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.joex.Config import docspell.joex.Config
import docspell.scheduler.Context import docspell.scheduler.Context
import docspell.logging.Logger import docspell.logging.Logger
import docspell.store.Store
object FtsWork { object FtsWork {
import syntax._ import syntax._
@ -106,10 +106,11 @@ object FtsWork {
def forContext( def forContext(
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
store: Store[F],
fts: FtsClient[F], fts: FtsClient[F],
fulltext: CreateIndex[F] fulltext: CreateIndex[F]
): Kleisli[F, Context[F, _], Unit] = ): Kleisli[F, Context[F, _], Unit] =
mt.local(ctx => FtsContext(cfg, fts, fulltext, ctx)) mt.local(ctx => FtsContext(cfg, store, fulltext, fts, ctx.logger))
} }
} }
} }

View File

@ -13,12 +13,14 @@ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.joex.Config import docspell.joex.Config
import docspell.scheduler.{Job, Task} import docspell.scheduler.{Job, Task}
import docspell.store.Store
object MigrationTask { object MigrationTask {
val taskName = Ident.unsafe("full-text-index") val taskName = Ident.unsafe("full-text-index")
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
store: Store[F],
fts: FtsClient[F], fts: FtsClient[F],
createIndex: CreateIndex[F] createIndex: CreateIndex[F]
): Task[F, Unit, Unit] = ): Task[F, Unit, Unit] =
@ -28,7 +30,7 @@ object MigrationTask {
Task(ctx => Task(ctx =>
for { for {
migs <- migrationTasks[F](fts) migs <- migrationTasks[F](fts)
res <- Migration[F](cfg, fts, ctx.store, createIndex, ctx.logger).run(migs) res <- Migration[F](cfg, fts, store, createIndex, ctx.logger).run(migs)
} yield res } yield res
) )
) )

View File

@ -8,13 +8,13 @@ package docspell.joex.fts
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.backend.fulltext.CreateIndex import docspell.backend.fulltext.CreateIndex
import docspell.common._ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.fts.FtsWork.syntax._ import docspell.joex.fts.FtsWork.syntax._
import docspell.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
object ReIndexTask { object ReIndexTask {
type Args = ReIndexTaskArgs type Args = ReIndexTaskArgs
@ -24,6 +24,7 @@ object ReIndexTask {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
store: Store[F],
fts: FtsClient[F], fts: FtsClient[F],
fulltext: CreateIndex[F] fulltext: CreateIndex[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
@ -31,7 +32,7 @@ object ReIndexTask {
.log[F, Args](_.info(s"Running full-text re-index now")) .log[F, Args](_.info(s"Running full-text re-index now"))
.flatMap(_ => .flatMap(_ =>
Task(ctx => Task(ctx =>
clearData[F](ctx.args.collective).forContext(cfg, fts, fulltext).run(ctx) clearData[F](ctx.args.collective).forContext(cfg, store, fts, fulltext).run(ctx)
) )
) )

View File

@ -8,18 +8,18 @@ package docspell.joex.hk
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.scheduler.{Context, Task} import docspell.scheduler.Task
import docspell.logging.Logger import docspell.logging.Logger
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
import org.http4s.blaze.client.BlazeClientBuilder import org.http4s.blaze.client.BlazeClientBuilder
import org.http4s.client.Client import org.http4s.client.Client
object CheckNodesTask { object CheckNodesTask {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: HouseKeepingConfig.CheckNodes cfg: HouseKeepingConfig.CheckNodes,
store: Store[F]
): Task[F, Unit, CleanupResult] = ): Task[F, Unit, CleanupResult] =
Task { ctx => Task { ctx =>
if (cfg.enabled) if (cfg.enabled)
@ -27,12 +27,12 @@ object CheckNodesTask {
_ <- ctx.logger.info("Check nodes reachability") _ <- ctx.logger.info("Check nodes reachability")
ec = scala.concurrent.ExecutionContext.global ec = scala.concurrent.ExecutionContext.global
_ <- BlazeClientBuilder[F].withExecutionContext(ec).resource.use { client => _ <- BlazeClientBuilder[F].withExecutionContext(ec).resource.use { client =>
checkNodes(ctx, client) checkNodes(ctx.logger, store, client)
} }
_ <- ctx.logger.info( _ <- ctx.logger.info(
s"Remove nodes not found more than ${cfg.minNotFound} times" s"Remove nodes not found more than ${cfg.minNotFound} times"
) )
n <- removeNodes(ctx, cfg) n <- removeNodes(store, cfg)
_ <- ctx.logger.info(s"Removed $n nodes") _ <- ctx.logger.info(s"Removed $n nodes")
} yield CleanupResult.of(n) } yield CleanupResult.of(n)
else else
@ -41,14 +41,18 @@ object CheckNodesTask {
} }
def checkNodes[F[_]: Async](ctx: Context[F, _], client: Client[F]): F[Unit] = def checkNodes[F[_]: Async](
ctx.store logger: Logger[F],
store: Store[F],
client: Client[F]
): F[Unit] =
store
.transact(RNode.streamAll) .transact(RNode.streamAll)
.evalMap(node => .evalMap(node =>
checkNode(ctx.logger, client)(node.url) checkNode(logger, client)(node.url)
.flatMap(seen => .flatMap(seen =>
if (seen) ctx.store.transact(RNode.resetNotFound(node.id)) if (seen) store.transact(RNode.resetNotFound(node.id))
else ctx.store.transact(RNode.incrementNotFound(node.id)) else store.transact(RNode.incrementNotFound(node.id))
) )
) )
.compile .compile
@ -68,9 +72,9 @@ object CheckNodesTask {
} }
def removeNodes[F[_]]( def removeNodes[F[_]](
ctx: Context[F, _], store: Store[F],
cfg: HouseKeepingConfig.CheckNodes cfg: HouseKeepingConfig.CheckNodes
): F[Int] = ): F[Int] =
ctx.store.transact(RNode.deleteNotFound(cfg.minNotFound)) store.transact(RNode.deleteNotFound(cfg.minNotFound))
} }

View File

@ -8,15 +8,16 @@ package docspell.joex.hk
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
object CleanupInvitesTask { object CleanupInvitesTask {
def apply[F[_]: Sync]( def apply[F[_]: Sync](
cfg: HouseKeepingConfig.CleanupInvites cfg: HouseKeepingConfig.CleanupInvites,
store: Store[F]
): Task[F, Unit, CleanupResult] = ): Task[F, Unit, CleanupResult] =
Task { ctx => Task { ctx =>
if (cfg.enabled) if (cfg.enabled)
@ -24,7 +25,7 @@ object CleanupInvitesTask {
now <- Timestamp.current[F] now <- Timestamp.current[F]
ts = now - cfg.olderThan ts = now - cfg.olderThan
_ <- ctx.logger.info(s"Cleanup invitations older than $ts") _ <- ctx.logger.info(s"Cleanup invitations older than $ts")
n <- ctx.store.transact(RInvitation.deleteOlderThan(ts)) n <- store.transact(RInvitation.deleteOlderThan(ts))
_ <- ctx.logger.info(s"Removed $n invitations") _ <- ctx.logger.info(s"Removed $n invitations")
} yield CleanupResult.of(n) } yield CleanupResult.of(n)
else else

View File

@ -18,7 +18,8 @@ import docspell.store.records._
object CleanupJobsTask { object CleanupJobsTask {
def apply[F[_]: Sync]( def apply[F[_]: Sync](
cfg: HouseKeepingConfig.CleanupJobs cfg: HouseKeepingConfig.CleanupJobs,
store: Store[F]
): Task[F, Unit, CleanupResult] = ): Task[F, Unit, CleanupResult] =
Task { ctx => Task { ctx =>
if (cfg.enabled) if (cfg.enabled)
@ -26,7 +27,7 @@ object CleanupJobsTask {
now <- Timestamp.current[F] now <- Timestamp.current[F]
ts = now - cfg.olderThan ts = now - cfg.olderThan
_ <- ctx.logger.info(s"Cleanup jobs older than $ts") _ <- ctx.logger.info(s"Cleanup jobs older than $ts")
n <- deleteDoneJobs(ctx.store, ts, cfg.deleteBatch) n <- deleteDoneJobs(store, ts, cfg.deleteBatch)
_ <- ctx.logger.info(s"Removed $n jobs") _ <- ctx.logger.info(s"Removed $n jobs")
} yield CleanupResult.of(n) } yield CleanupResult.of(n)
else else

View File

@ -8,14 +8,15 @@ package docspell.joex.hk
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
object CleanupRememberMeTask { object CleanupRememberMeTask {
def apply[F[_]: Sync]( def apply[F[_]: Sync](
cfg: HouseKeepingConfig.CleanupRememberMe cfg: HouseKeepingConfig.CleanupRememberMe,
store: Store[F]
): Task[F, Unit, CleanupResult] = ): Task[F, Unit, CleanupResult] =
Task { ctx => Task { ctx =>
if (cfg.enabled) if (cfg.enabled)
@ -23,7 +24,7 @@ object CleanupRememberMeTask {
now <- Timestamp.current[F] now <- Timestamp.current[F]
ts = now - cfg.olderThan ts = now - cfg.olderThan
_ <- ctx.logger.info(s"Cleanup remember-me tokens older than $ts") _ <- ctx.logger.info(s"Cleanup remember-me tokens older than $ts")
n <- ctx.store.transact(RRememberMe.deleteOlderThan(ts)) n <- store.transact(RRememberMe.deleteOlderThan(ts))
_ <- ctx.logger.info(s"Removed $n tokens") _ <- ctx.logger.info(s"Removed $n tokens")
} yield CleanupResult.of(n) } yield CleanupResult.of(n)
else else

View File

@ -15,6 +15,7 @@ import docspell.joex.filecopy.FileIntegrityCheckTask
import docspell.scheduler.{JobTaskResultEncoder, Task} import docspell.scheduler.{JobTaskResultEncoder, Task}
import com.github.eikek.calev._ import com.github.eikek.calev._
import docspell.scheduler.usertask.UserTask import docspell.scheduler.usertask.UserTask
import docspell.store.Store
import io.circe.Encoder import io.circe.Encoder
import io.circe.generic.semiauto.deriveEncoder import io.circe.generic.semiauto.deriveEncoder
@ -25,15 +26,16 @@ object HouseKeepingTask {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: Config, cfg: Config,
store: Store[F],
fileRepo: OFileRepository[F] fileRepo: OFileRepository[F]
): Task[F, Unit, Result] = { ): Task[F, Unit, Result] = {
val combined = val combined =
( (
CheckNodesTask(cfg.houseKeeping.checkNodes), CheckNodesTask(cfg.houseKeeping.checkNodes, store),
CleanupInvitesTask(cfg.houseKeeping.cleanupInvites), CleanupInvitesTask(cfg.houseKeeping.cleanupInvites, store),
CleanupJobsTask(cfg.houseKeeping.cleanupJobs), CleanupJobsTask(cfg.houseKeeping.cleanupJobs, store),
CleanupRememberMeTask(cfg.houseKeeping.cleanupRememberMe), CleanupRememberMeTask(cfg.houseKeeping.cleanupRememberMe, store),
IntegrityCheckTask(cfg.houseKeeping.integrityCheck, fileRepo) IntegrityCheckTask(cfg.houseKeeping.integrityCheck, store, fileRepo)
).mapN(Result.apply) ).mapN(Result.apply)
Task Task

View File

@ -8,21 +8,22 @@ package docspell.joex.hk
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.backend.ops.OFileRepository import docspell.backend.ops.OFileRepository
import docspell.common._ import docspell.common._
import docspell.joex.filecopy.FileIntegrityCheckTask import docspell.joex.filecopy.FileIntegrityCheckTask
import docspell.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
object IntegrityCheckTask { object IntegrityCheckTask {
def apply[F[_]: Sync]( def apply[F[_]: Sync](
cfg: HouseKeepingConfig.IntegrityCheck, cfg: HouseKeepingConfig.IntegrityCheck,
store: Store[F],
fileRepo: OFileRepository[F] fileRepo: OFileRepository[F]
): Task[F, Unit, FileIntegrityCheckTask.Result] = ): Task[F, Unit, FileIntegrityCheckTask.Result] =
Task { ctx => Task { ctx =>
if (cfg.enabled) if (cfg.enabled)
FileIntegrityCheckTask(fileRepo).run( FileIntegrityCheckTask(fileRepo, store).run(
ctx.map(_ => FileIntegrityCheckArgs(FileKeyPart.Empty)) ctx.map(_ => FileIntegrityCheckArgs(FileKeyPart.Empty))
) )
else else

View File

@ -9,13 +9,13 @@ package docspell.joex.learn
import cats.data.OptionT import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.analysis.TextAnalyser import docspell.analysis.TextAnalyser
import docspell.backend.ops.OCollective import docspell.backend.ops.OCollective
import docspell.common._ import docspell.common._
import docspell.joex.Config import docspell.joex.Config
import docspell.scheduler._ import docspell.scheduler._
import docspell.logging.Logger import docspell.logging.Logger
import docspell.store.Store
import docspell.store.records.{RClassifierModel, RClassifierSetting} import docspell.store.records.{RClassifierModel, RClassifierSetting}
object LearnClassifierTask { object LearnClassifierTask {
@ -29,14 +29,16 @@ object LearnClassifierTask {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
store: Store[F],
analyser: TextAnalyser[F] analyser: TextAnalyser[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
learnTags(cfg, analyser) learnTags(cfg, store, analyser)
.flatMap(_ => learnItemEntities(cfg, analyser)) .flatMap(_ => learnItemEntities(cfg, store, analyser))
.flatMap(_ => Task(_ => Sync[F].delay(System.gc()))) .flatMap(_ => Task(_ => Sync[F].delay(System.gc())))
private def learnItemEntities[F[_]: Async]( private def learnItemEntities[F[_]: Async](
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
store: Store[F],
analyser: TextAnalyser[F] analyser: TextAnalyser[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
@ -44,6 +46,7 @@ object LearnClassifierTask {
LearnItemEntities LearnItemEntities
.learnAll( .learnAll(
analyser, analyser,
store,
ctx.args.collective, ctx.args.collective,
cfg.classification.itemCount, cfg.classification.itemCount,
cfg.maxLength cfg.maxLength
@ -54,16 +57,17 @@ object LearnClassifierTask {
private def learnTags[F[_]: Async]( private def learnTags[F[_]: Async](
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
store: Store[F],
analyser: TextAnalyser[F] analyser: TextAnalyser[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
val learnTags = val learnTags =
for { for {
sett <- findActiveSettings[F](ctx, cfg) sett <- findActiveSettings[F](ctx, store, cfg)
maxItems = cfg.classification.itemCountOrWhenLower(sett.itemCount) maxItems = cfg.classification.itemCountOrWhenLower(sett.itemCount)
_ <- OptionT.liftF( _ <- OptionT.liftF(
LearnTags LearnTags
.learnAllTagCategories(analyser)( .learnAllTagCategories(analyser, store)(
ctx.args.collective, ctx.args.collective,
maxItems, maxItems,
cfg.maxLength cfg.maxLength
@ -74,34 +78,38 @@ object LearnClassifierTask {
// learn classifier models from active tag categories // learn classifier models from active tag categories
learnTags.getOrElseF(logInactiveWarning(ctx.logger)) *> learnTags.getOrElseF(logInactiveWarning(ctx.logger)) *>
// delete classifier model files for categories that have been removed // delete classifier model files for categories that have been removed
clearObsoleteTagModels(ctx) *> clearObsoleteTagModels(ctx, store) *>
// when tags are deleted, categories may get removed. fix the json array // when tags are deleted, categories may get removed. fix the json array
ctx.store store
.transact(RClassifierSetting.fixCategoryList(ctx.args.collective)) .transact(RClassifierSetting.fixCategoryList(ctx.args.collective))
.map(_ => ()) .map(_ => ())
} }
private def clearObsoleteTagModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] = private def clearObsoleteTagModels[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F]
): F[Unit] =
for { for {
list <- ctx.store.transact( list <- store.transact(
ClassifierName.findOrphanTagModels(ctx.args.collective) ClassifierName.findOrphanTagModels(ctx.args.collective)
) )
_ <- ctx.logger.info( _ <- ctx.logger.info(
s"Found ${list.size} obsolete model files that are deleted now." s"Found ${list.size} obsolete model files that are deleted now."
) )
n <- ctx.store.transact(RClassifierModel.deleteAll(list.map(_.id))) n <- store.transact(RClassifierModel.deleteAll(list.map(_.id)))
_ <- list _ <- list
.map(_.fileId) .map(_.fileId)
.traverse(id => ctx.store.fileRepo.delete(id)) .traverse(id => store.fileRepo.delete(id))
_ <- ctx.logger.debug(s"Deleted $n model files.") _ <- ctx.logger.debug(s"Deleted $n model files.")
} yield () } yield ()
private def findActiveSettings[F[_]: Sync]( private def findActiveSettings[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis cfg: Config.TextAnalysis
): OptionT[F, OCollective.Classifier] = ): OptionT[F, OCollective.Classifier] =
if (cfg.classification.enabled) if (cfg.classification.enabled)
OptionT(ctx.store.transact(RClassifierSetting.findById(ctx.args.collective))) OptionT(store.transact(RClassifierSetting.findById(ctx.args.collective)))
.filter(_.autoTagEnabled) .filter(_.autoTagEnabled)
.map(OCollective.Classifier.fromRecord) .map(OCollective.Classifier.fromRecord)
else else

View File

@ -10,76 +10,84 @@ import cats.data.Kleisli
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
import docspell.analysis.TextAnalyser import docspell.analysis.TextAnalyser
import docspell.analysis.classifier.TextClassifier.Data import docspell.analysis.classifier.TextClassifier.Data
import docspell.common._ import docspell.common._
import docspell.scheduler._ import docspell.scheduler._
import docspell.store.Store
object LearnItemEntities { object LearnItemEntities {
def learnAll[F[_]: Async, A]( def learnAll[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
store: Store[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
): Task[F, A, Unit] = ): Task[F, A, Unit] =
learnCorrOrg[F, A](analyser, collective, maxItems, maxTextLen) learnCorrOrg[F, A](analyser, store, collective, maxItems, maxTextLen)
.flatMap(_ => learnCorrPerson[F, A](analyser, collective, maxItems, maxTextLen)) .flatMap(_ =>
.flatMap(_ => learnConcPerson(analyser, collective, maxItems, maxTextLen)) learnCorrPerson[F, A](analyser, store, collective, maxItems, maxTextLen)
.flatMap(_ => learnConcEquip(analyser, collective, maxItems, maxTextLen)) )
.flatMap(_ => learnConcPerson(analyser, store, collective, maxItems, maxTextLen))
.flatMap(_ => learnConcEquip(analyser, store, collective, maxItems, maxTextLen))
def learnCorrOrg[F[_]: Async, A]( def learnCorrOrg[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
store: Store[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
): Task[F, A, Unit] = ): Task[F, A, Unit] =
learn(analyser, collective)( learn(store, analyser, collective)(
ClassifierName.correspondentOrg, ClassifierName.correspondentOrg,
ctx => SelectItems.forCorrOrg(ctx.store, collective, maxItems, maxTextLen) _ => SelectItems.forCorrOrg(store, collective, maxItems, maxTextLen)
) )
def learnCorrPerson[F[_]: Async, A]( def learnCorrPerson[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
store: Store[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
): Task[F, A, Unit] = ): Task[F, A, Unit] =
learn(analyser, collective)( learn(store, analyser, collective)(
ClassifierName.correspondentPerson, ClassifierName.correspondentPerson,
ctx => SelectItems.forCorrPerson(ctx.store, collective, maxItems, maxTextLen) _ => SelectItems.forCorrPerson(store, collective, maxItems, maxTextLen)
) )
def learnConcPerson[F[_]: Async, A]( def learnConcPerson[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
store: Store[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
): Task[F, A, Unit] = ): Task[F, A, Unit] =
learn(analyser, collective)( learn(store, analyser, collective)(
ClassifierName.concernedPerson, ClassifierName.concernedPerson,
ctx => SelectItems.forConcPerson(ctx.store, collective, maxItems, maxTextLen) _ => SelectItems.forConcPerson(store, collective, maxItems, maxTextLen)
) )
def learnConcEquip[F[_]: Async, A]( def learnConcEquip[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
store: Store[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
): Task[F, A, Unit] = ): Task[F, A, Unit] =
learn(analyser, collective)( learn(store, analyser, collective)(
ClassifierName.concernedEquip, ClassifierName.concernedEquip,
ctx => SelectItems.forConcEquip(ctx.store, collective, maxItems, maxTextLen) _ => SelectItems.forConcEquip(store, collective, maxItems, maxTextLen)
) )
private def learn[F[_]: Async, A]( private def learn[F[_]: Async, A](
store: Store[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
collective: Ident collective: Ident
)(cname: ClassifierName, data: Context[F, _] => Stream[F, Data]): Task[F, A, Unit] = )(cname: ClassifierName, data: Context[F, _] => Stream[F, Data]): Task[F, A, Unit] =
Task { ctx => Task { ctx =>
ctx.logger.info(s"Learn classifier ${cname.name}") *> ctx.logger.info(s"Learn classifier ${cname.name}") *>
analyser.classifier.trainClassifier(ctx.logger, data(ctx))( analyser.classifier.trainClassifier(ctx.logger, data(ctx))(
Kleisli(StoreClassifierModel.handleModel(ctx, collective, cname)) Kleisli(StoreClassifierModel.handleModel(store, ctx.logger, collective, cname))
) )
} }
} }

View File

@ -9,16 +9,17 @@ package docspell.joex.learn
import cats.data.Kleisli import cats.data.Kleisli
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.analysis.TextAnalyser import docspell.analysis.TextAnalyser
import docspell.common._ import docspell.common._
import docspell.scheduler._ import docspell.scheduler._
import docspell.store.Store
import docspell.store.records.RClassifierSetting import docspell.store.records.RClassifierSetting
object LearnTags { object LearnTags {
def learnTagCategory[F[_]: Async, A]( def learnTagCategory[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
store: Store[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
@ -26,12 +27,14 @@ object LearnTags {
category: String category: String
): Task[F, A, Unit] = ): Task[F, A, Unit] =
Task { ctx => Task { ctx =>
val data = SelectItems.forCategory(ctx, collective)(maxItems, category, maxTextLen) val data =
SelectItems.forCategory(store, collective)(maxItems, category, maxTextLen)
ctx.logger.info(s"Learn classifier for tag category: $category") *> ctx.logger.info(s"Learn classifier for tag category: $category") *>
analyser.classifier.trainClassifier(ctx.logger, data)( analyser.classifier.trainClassifier(ctx.logger, data)(
Kleisli( Kleisli(
StoreClassifierModel.handleModel( StoreClassifierModel.handleModel(
ctx, store,
ctx.logger,
collective, collective,
ClassifierName.tagCategory(category) ClassifierName.tagCategory(category)
) )
@ -39,15 +42,15 @@ object LearnTags {
) )
} }
def learnAllTagCategories[F[_]: Async, A](analyser: TextAnalyser[F])( def learnAllTagCategories[F[_]: Async, A](analyser: TextAnalyser[F], store: Store[F])(
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int
): Task[F, A, Unit] = ): Task[F, A, Unit] =
Task { ctx => Task { ctx =>
for { for {
cats <- ctx.store.transact(RClassifierSetting.getActiveCategories(collective)) cats <- store.transact(RClassifierSetting.getActiveCategories(collective))
task = learnTagCategory[F, A](analyser, collective, maxItems, maxTextLen) _ task = learnTagCategory[F, A](analyser, store, collective, maxItems, maxTextLen) _
_ <- cats.map(task).traverse(_.run(ctx)) _ <- cats.map(task).traverse(_.run(ctx))
} yield () } yield ()
} }

View File

@ -10,7 +10,6 @@ import fs2.{Pipe, Stream}
import docspell.analysis.classifier.TextClassifier.Data import docspell.analysis.classifier.TextClassifier.Data
import docspell.common._ import docspell.common._
import docspell.scheduler.Context
import docspell.store.Store import docspell.store.Store
import docspell.store.qb.Batch import docspell.store.qb.Batch
import docspell.store.queries.{QItem, TextAndTag} import docspell.store.queries.{QItem, TextAndTag}
@ -21,16 +20,7 @@ object SelectItems {
val pageSep = LearnClassifierTask.pageSep val pageSep = LearnClassifierTask.pageSep
val noClass = LearnClassifierTask.noClass val noClass = LearnClassifierTask.noClass
def forCategory[F[_]](ctx: Context[F, _], collective: Ident)( def forCategory[F[_]](store: Store[F], collective: Ident)(
maxItems: Int,
category: String,
maxTextLen: Int
): Stream[F, Data] =
forCategory(ctx.store, collective, maxItems, category, maxTextLen)
def forCategory[F[_]](
store: Store[F],
collective: Ident,
maxItems: Int, maxItems: Int,
category: String, category: String,
maxTextLen: Int maxTextLen: Int

View File

@ -12,7 +12,6 @@ import fs2.io.file.Files
import docspell.analysis.classifier.ClassifierModel import docspell.analysis.classifier.ClassifierModel
import docspell.common._ import docspell.common._
import docspell.scheduler._
import docspell.logging.Logger import docspell.logging.Logger
import docspell.store.Store import docspell.store.Store
import docspell.store.records.RClassifierModel import docspell.store.records.RClassifierModel
@ -20,21 +19,12 @@ import docspell.store.records.RClassifierModel
object StoreClassifierModel { object StoreClassifierModel {
def handleModel[F[_]: Async]( def handleModel[F[_]: Async](
ctx: Context[F, _], store: Store[F],
logger: Logger[F],
collective: Ident, collective: Ident,
modelName: ClassifierName modelName: ClassifierName
)( )(
trainedModel: ClassifierModel trainedModel: ClassifierModel
): F[Unit] =
handleModel(ctx.store, ctx.logger)(collective, modelName, trainedModel)
def handleModel[F[_]: Async](
store: Store[F],
logger: Logger[F]
)(
collective: Ident,
modelName: ClassifierName,
trainedModel: ClassifierModel
): F[Unit] = ): F[Unit] =
for { for {
oldFile <- store.transact( oldFile <- store.transact(

View File

@ -70,7 +70,7 @@ object ReadMail {
HtmlBodyViewConfig.default.copy( HtmlBodyViewConfig.default.copy(
textToHtml = MarkdownBody.makeHtml(markdownCfg) textToHtml = MarkdownBody.makeHtml(markdownCfg)
) )
).map(makeHtmlBinary[F] _).map(b => Some(b)) ).map(makeHtmlBinary[F]).map(b => Some(b))
} }
for { for {

View File

@ -9,7 +9,6 @@ package docspell.joex.notify
import cats.data.NonEmptyList import cats.data.NonEmptyList
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.backend.ops.ONotification import docspell.backend.ops.ONotification
import docspell.common._ import docspell.common._
import docspell.scheduler.Context import docspell.scheduler.Context
@ -20,6 +19,7 @@ import docspell.notification.api.PeriodicDueItemsArgs
import docspell.query.Date import docspell.query.Date
import docspell.query.ItemQuery._ import docspell.query.ItemQuery._
import docspell.query.ItemQueryDsl._ import docspell.query.ItemQueryDsl._
import docspell.store.Store
import docspell.store.qb.Batch import docspell.store.qb.Batch
import docspell.store.queries.ListItem import docspell.store.queries.ListItem
import docspell.store.queries.{QItem, Query} import docspell.store.queries.{QItem, Query}
@ -32,11 +32,14 @@ object PeriodicDueItemsTask {
def onCancel[F[_]]: Task[F, Args, Unit] = def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn(s"Cancelling ${taskName.id} task")) Task.log(_.warn(s"Cancelling ${taskName.id} task"))
def apply[F[_]: Sync](notificationOps: ONotification[F]): Task[F, Args, Unit] = def apply[F[_]: Sync](
store: Store[F],
notificationOps: ONotification[F]
): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
val limit = 7 val limit = 7
Timestamp.current[F].flatMap { now => Timestamp.current[F].flatMap { now =>
withItems(ctx, limit, now) { items => withItems(ctx, store, limit, now) { items =>
withEventContext(ctx, items, limit, now) { eventCtx => withEventContext(ctx, items, limit, now) { eventCtx =>
withChannel(ctx, notificationOps) { channels => withChannel(ctx, notificationOps) { channels =>
notificationOps.sendMessage(ctx.logger, eventCtx, channels) notificationOps.sendMessage(ctx.logger, eventCtx, channels)
@ -51,7 +54,12 @@ object PeriodicDueItemsTask {
): F[Unit] = ): F[Unit] =
TaskOperations.withChannel(ctx.logger, ctx.args.channels, ctx.args.account, ops)(cont) TaskOperations.withChannel(ctx.logger, ctx.args.channels, ctx.args.account, ops)(cont)
def withItems[F[_]: Sync](ctx: Context[F, Args], limit: Int, now: Timestamp)( def withItems[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
limit: Int,
now: Timestamp
)(
cont: Vector[ListItem] => F[Unit] cont: Vector[ListItem] => F[Unit]
): F[Unit] = { ): F[Unit] = {
val rightDate = Date((now + Duration.days(ctx.args.remindDays.toLong)).toMillis) val rightDate = Date((now + Duration.days(ctx.args.remindDays.toLong)).toMillis)
@ -77,7 +85,7 @@ object PeriodicDueItemsTask {
for { for {
res <- res <-
ctx.store store
.transact( .transact(
QItem QItem
.findItems(q, now.toUtcDate, 0, Batch.limit(limit)) .findItems(q, now.toUtcDate, 0, Batch.limit(limit))

View File

@ -10,7 +10,6 @@ import cats.data.OptionT
import cats.data.{NonEmptyList => Nel} import cats.data.{NonEmptyList => Nel}
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.backend.ops.ONotification import docspell.backend.ops.ONotification
import docspell.common._ import docspell.common._
import docspell.scheduler.Context import docspell.scheduler.Context
@ -22,6 +21,7 @@ import docspell.query.ItemQuery
import docspell.query.ItemQuery.Expr import docspell.query.ItemQuery.Expr
import docspell.query.ItemQuery.Expr.AndExpr import docspell.query.ItemQuery.Expr.AndExpr
import docspell.query.ItemQueryParser import docspell.query.ItemQueryParser
import docspell.store.Store
import docspell.store.qb.Batch import docspell.store.qb.Batch
import docspell.store.queries.ListItem import docspell.store.queries.ListItem
import docspell.store.queries.{QItem, Query} import docspell.store.queries.{QItem, Query}
@ -36,11 +36,14 @@ object PeriodicQueryTask {
def onCancel[F[_]]: Task[F, Args, Unit] = def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn(s"Cancelling ${taskName.id} task")) Task.log(_.warn(s"Cancelling ${taskName.id} task"))
def apply[F[_]: Sync](notificationOps: ONotification[F]): Task[F, Args, Unit] = def apply[F[_]: Sync](
store: Store[F],
notificationOps: ONotification[F]
): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
val limit = 7 val limit = 7
Timestamp.current[F].flatMap { now => Timestamp.current[F].flatMap { now =>
withItems(ctx, limit, now) { items => withItems(ctx, store, limit, now) { items =>
withEventContext(ctx, items, limit, now) { eventCtx => withEventContext(ctx, items, limit, now) { eventCtx =>
withChannel(ctx, notificationOps) { channels => withChannel(ctx, notificationOps) { channels =>
notificationOps.sendMessage(ctx.logger, eventCtx, channels) notificationOps.sendMessage(ctx.logger, eventCtx, channels)
@ -58,9 +61,11 @@ object PeriodicQueryTask {
private def queryString(q: ItemQuery.Expr) = private def queryString(q: ItemQuery.Expr) =
ItemQueryParser.asString(q) ItemQueryParser.asString(q)
def withQuery[F[_]: Sync](ctx: Context[F, Args])(cont: Query => F[Unit]): F[Unit] = { def withQuery[F[_]: Sync](ctx: Context[F, Args], store: Store[F])(
cont: Query => F[Unit]
): F[Unit] = {
def fromBookmark(id: String) = def fromBookmark(id: String) =
ctx.store store
.transact(RQueryBookmark.findByNameOrId(ctx.args.account, id)) .transact(RQueryBookmark.findByNameOrId(ctx.args.account, id))
.map(_.map(_.query)) .map(_.map(_.query))
.flatTap(q => .flatTap(q =>
@ -68,7 +73,7 @@ object PeriodicQueryTask {
) )
def fromShare(id: String) = def fromShare(id: String) =
ctx.store store
.transact(RShare.findOneByCollective(ctx.args.account.collective, Some(true), id)) .transact(RShare.findOneByCollective(ctx.args.account.collective, Some(true), id))
.map(_.map(_.query)) .map(_.map(_.query))
.flatTap(q => .flatTap(q =>
@ -120,11 +125,16 @@ object PeriodicQueryTask {
} }
} }
def withItems[F[_]: Sync](ctx: Context[F, Args], limit: Int, now: Timestamp)( def withItems[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
limit: Int,
now: Timestamp
)(
cont: Vector[ListItem] => F[Unit] cont: Vector[ListItem] => F[Unit]
): F[Unit] = ): F[Unit] =
withQuery(ctx) { query => withQuery(ctx, store) { query =>
val items = ctx.store val items = store
.transact(QItem.findItems(query, now.toUtcDate, 0, Batch.limit(limit))) .transact(QItem.findItems(query, now.toUtcDate, 0, Batch.limit(limit)))
.compile .compile
.to(Vector) .to(Vector)

View File

@ -13,6 +13,7 @@ import docspell.backend.JobFactory
import docspell.backend.ops.OJoex import docspell.backend.ops.OJoex
import docspell.common._ import docspell.common._
import docspell.scheduler.{Context, Job, JobStore, Task} import docspell.scheduler.{Context, Job, JobStore, Task}
import docspell.store.Store
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
object AllPageCountTask { object AllPageCountTask {
@ -20,11 +21,15 @@ object AllPageCountTask {
val taskName = Ident.unsafe("all-page-count") val taskName = Ident.unsafe("all-page-count")
type Args = Unit type Args = Unit
def apply[F[_]: Sync](jobStore: JobStore[F], joex: OJoex[F]): Task[F, Args, Unit] = def apply[F[_]: Sync](
store: Store[F],
jobStore: JobStore[F],
joex: OJoex[F]
): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
for { for {
_ <- ctx.logger.info("Generating previews for attachments") _ <- ctx.logger.info("Generating previews for attachments")
n <- submitConversionJobs(ctx, jobStore) n <- submitConversionJobs(ctx, store, jobStore)
_ <- ctx.logger.info(s"Submitted $n jobs") _ <- ctx.logger.info(s"Submitted $n jobs")
_ <- joex.notifyAllNodes _ <- joex.notifyAllNodes
} yield () } yield ()
@ -35,9 +40,10 @@ object AllPageCountTask {
def submitConversionJobs[F[_]: Sync]( def submitConversionJobs[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
jobStore: JobStore[F] jobStore: JobStore[F]
): F[Int] = ): F[Int] =
ctx.store store
.transact(findAttachments) .transact(findAttachments)
.chunks .chunks
.flatMap(createJobs[F]) .flatMap(createJobs[F])

View File

@ -8,11 +8,11 @@ package docspell.joex.pagecount
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.joex.process.AttachmentPageCount import docspell.joex.process.AttachmentPageCount
import docspell.scheduler.Context import docspell.scheduler.Context
import docspell.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
import docspell.store.records.RAttachmentMeta import docspell.store.records.RAttachmentMeta
@ -20,10 +20,10 @@ object MakePageCountTask {
type Args = MakePageCountArgs type Args = MakePageCountArgs
def apply[F[_]: Sync](): Task[F, Args, Unit] = def apply[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
for { for {
exists <- pageCountExists(ctx) exists <- pageCountExists(ctx, store)
_ <- _ <-
if (exists) if (exists)
ctx.logger.info( ctx.logger.info(
@ -32,7 +32,7 @@ object MakePageCountTask {
else else
ctx.logger.info( ctx.logger.info(
s"Reading page-count for attachment ${ctx.args.attachment}" s"Reading page-count for attachment ${ctx.args.attachment}"
) *> generatePageCount(ctx) ) *> generatePageCount(ctx, store)
} yield () } yield ()
} }
@ -40,19 +40,20 @@ object MakePageCountTask {
Task.log(_.warn("Cancelling make-page-count task")) Task.log(_.warn("Cancelling make-page-count task"))
private def generatePageCount[F[_]: Sync]( private def generatePageCount[F[_]: Sync](
ctx: Context[F, Args] ctx: Context[F, Args],
store: Store[F]
): F[Unit] = ): F[Unit] =
for { for {
ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment)) ra <- store.transact(RAttachment.findById(ctx.args.attachment))
_ <- ra _ <- ra
.map(AttachmentPageCount.createPageCount(ctx)) .map(AttachmentPageCount.createPageCount(ctx, store))
.getOrElse( .getOrElse(
ctx.logger.warn(s"No attachment found with id: ${ctx.args.attachment}") ctx.logger.warn(s"No attachment found with id: ${ctx.args.attachment}")
) )
} yield () } yield ()
private def pageCountExists[F[_]](ctx: Context[F, Args]): F[Boolean] = private def pageCountExists[F[_]](ctx: Context[F, Args], store: Store[F]): F[Boolean] =
ctx.store.transact( store.transact(
RAttachmentMeta RAttachmentMeta
.findPageCountById(ctx.args.attachment) .findPageCountById(ctx.args.attachment)
.map(_.exists(_ > 0)) .map(_.exists(_ > 0))

View File

@ -12,6 +12,7 @@ import fs2.{Chunk, Stream}
import docspell.backend.ops.OJoex import docspell.backend.ops.OJoex
import docspell.common._ import docspell.common._
import docspell.scheduler.{Context, Job, JobStore, Task} import docspell.scheduler.{Context, Job, JobStore, Task}
import docspell.store.Store
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
/* A task to find all non-converted pdf files (of a collective, or /* A task to find all non-converted pdf files (of a collective, or
@ -21,11 +22,15 @@ import docspell.store.records.RAttachment
object ConvertAllPdfTask { object ConvertAllPdfTask {
type Args = ConvertAllPdfArgs type Args = ConvertAllPdfArgs
def apply[F[_]: Sync](jobStore: JobStore[F], joex: OJoex[F]): Task[F, Args, Unit] = def apply[F[_]: Sync](
jobStore: JobStore[F],
joex: OJoex[F],
store: Store[F]
): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
for { for {
_ <- ctx.logger.info("Converting pdfs using ocrmypdf") _ <- ctx.logger.info("Converting pdfs using ocrmypdf")
n <- submitConversionJobs(ctx, jobStore) n <- submitConversionJobs(ctx, store, jobStore)
_ <- ctx.logger.info(s"Submitted $n file conversion jobs") _ <- ctx.logger.info(s"Submitted $n file conversion jobs")
_ <- joex.notifyAllNodes _ <- joex.notifyAllNodes
} yield () } yield ()
@ -36,9 +41,10 @@ object ConvertAllPdfTask {
def submitConversionJobs[F[_]: Sync]( def submitConversionJobs[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
jobStore: JobStore[F] jobStore: JobStore[F]
): F[Int] = ): F[Int] =
ctx.store store
.transact(RAttachment.findNonConvertedPdf(ctx.args.collective, 50)) .transact(RAttachment.findNonConvertedPdf(ctx.args.collective, 50))
.chunks .chunks
.flatMap(createJobs[F](ctx)) .flatMap(createJobs[F](ctx))

View File

@ -11,14 +11,13 @@ import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
import docspell.common._ import docspell.common._
import docspell.convert.ConversionResult import docspell.convert.ConversionResult
import docspell.convert.extern.OcrMyPdf import docspell.convert.extern.OcrMyPdf
import docspell.joex.Config import docspell.joex.Config
import docspell.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
import io.circe.generic.semiauto._ import io.circe.generic.semiauto._
import io.circe.{Decoder, Encoder} import io.circe.{Decoder, Encoder}
@ -36,12 +35,12 @@ object PdfConvTask {
val taskName = Ident.unsafe("pdf-files-migration") val taskName = Ident.unsafe("pdf-files-migration")
def apply[F[_]: Async](cfg: Config): Task[F, Args, Unit] = def apply[F[_]: Async](cfg: Config, store: Store[F]): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
for { for {
_ <- ctx.logger.info(s"Converting pdf file ${ctx.args} using ocrmypdf") _ <- ctx.logger.info(s"Converting pdf file ${ctx.args} using ocrmypdf")
meta <- checkInputs(cfg, ctx) meta <- checkInputs(cfg, ctx, store)
_ <- meta.traverse(fm => convert(cfg, ctx, fm)) _ <- meta.traverse(fm => convert(cfg, ctx, store, fm))
} yield () } yield ()
} }
@ -53,19 +52,20 @@ object PdfConvTask {
// check if file exists and if it is pdf and if source id is the same and if ocrmypdf is enabled // check if file exists and if it is pdf and if source id is the same and if ocrmypdf is enabled
def checkInputs[F[_]: Sync]( def checkInputs[F[_]: Sync](
cfg: Config, cfg: Config,
ctx: Context[F, Args] ctx: Context[F, Args],
store: Store[F]
): F[Option[RFileMeta]] = { ): F[Option[RFileMeta]] = {
val none: Option[RFileMeta] = None val none: Option[RFileMeta] = None
val checkSameFiles = val checkSameFiles =
(for { (for {
ra <- OptionT(ctx.store.transact(RAttachment.findById(ctx.args.attachId))) ra <- OptionT(store.transact(RAttachment.findById(ctx.args.attachId)))
isSame <- OptionT.liftF( isSame <- OptionT.liftF(
ctx.store.transact(RAttachmentSource.isSameFile(ra.id, ra.fileId)) store.transact(RAttachmentSource.isSameFile(ra.id, ra.fileId))
) )
} yield isSame).getOrElse(false) } yield isSame).getOrElse(false)
val existsPdf = val existsPdf =
for { for {
meta <- ctx.store.transact(RAttachment.findMeta(ctx.args.attachId)) meta <- store.transact(RAttachment.findMeta(ctx.args.attachId))
res = meta.filter(_.mimetype.matches(MimeType.pdf)) res = meta.filter(_.mimetype.matches(MimeType.pdf))
_ <- _ <-
if (res.isEmpty) if (res.isEmpty)
@ -90,18 +90,19 @@ object PdfConvTask {
def convert[F[_]: Async]( def convert[F[_]: Async](
cfg: Config, cfg: Config,
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
in: RFileMeta in: RFileMeta
): F[Unit] = { ): F[Unit] = {
val fs = ctx.store.fileRepo val fs = store.fileRepo
val data = fs.getBytes(in.id) val data = fs.getBytes(in.id)
val storeResult: ConversionResult.Handler[F, Unit] = val storeResult: ConversionResult.Handler[F, Unit] =
Kleisli { Kleisli {
case ConversionResult.SuccessPdf(file) => case ConversionResult.SuccessPdf(file) =>
storeToAttachment(ctx, in, file) storeToAttachment(ctx, store, in, file)
case ConversionResult.SuccessPdfTxt(file, _) => case ConversionResult.SuccessPdfTxt(file, _) =>
storeToAttachment(ctx, in, file) storeToAttachment(ctx, store, in, file)
case ConversionResult.UnsupportedFormat(mime) => case ConversionResult.UnsupportedFormat(mime) =>
ctx.logger.warn( ctx.logger.warn(
@ -124,19 +125,20 @@ object PdfConvTask {
)(data, storeResult) )(data, storeResult)
for { for {
lang <- getLanguage(ctx) lang <- getLanguage(ctx, store)
_ <- ocrMyPdf(lang) _ <- ocrMyPdf(lang)
} yield () } yield ()
} }
def getLanguage[F[_]: Sync](ctx: Context[F, Args]): F[Language] = def getLanguage[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): F[Language] =
(for { (for {
coll <- OptionT(ctx.store.transact(RCollective.findByAttachment(ctx.args.attachId))) coll <- OptionT(store.transact(RCollective.findByAttachment(ctx.args.attachId)))
lang = coll.language lang = coll.language
} yield lang).getOrElse(Language.German) } yield lang).getOrElse(Language.German)
def storeToAttachment[F[_]: Sync]( def storeToAttachment[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
meta: RFileMeta, meta: RFileMeta,
newFile: Stream[F, Byte] newFile: Stream[F, Byte]
): F[Unit] = { ): F[Unit] = {
@ -146,10 +148,10 @@ object PdfConvTask {
for { for {
fid <- fid <-
newFile newFile
.through(ctx.store.fileRepo.save(collective, cat, mimeHint)) .through(store.fileRepo.save(collective, cat, mimeHint))
.compile .compile
.lastOrError .lastOrError
_ <- ctx.store.transact(RAttachment.updateFileId(ctx.args.attachId, fid)) _ <- store.transact(RAttachment.updateFileId(ctx.args.attachId, fid))
} yield () } yield ()
} }
} }

View File

@ -14,17 +14,22 @@ import docspell.backend.ops.OJoex
import docspell.common.MakePreviewArgs.StoreMode import docspell.common.MakePreviewArgs.StoreMode
import docspell.common._ import docspell.common._
import docspell.scheduler.{Context, Job, JobStore, Task} import docspell.scheduler.{Context, Job, JobStore, Task}
import docspell.store.Store
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
object AllPreviewsTask { object AllPreviewsTask {
type Args = AllPreviewsArgs type Args = AllPreviewsArgs
def apply[F[_]: Sync](jobStore: JobStore[F], joex: OJoex[F]): Task[F, Args, Unit] = def apply[F[_]: Sync](
jobStore: JobStore[F],
joex: OJoex[F],
store: Store[F]
): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
for { for {
_ <- ctx.logger.info("Generating previews for attachments") _ <- ctx.logger.info("Generating previews for attachments")
n <- submitConversionJobs(ctx, jobStore) n <- submitConversionJobs(ctx, store, jobStore)
_ <- ctx.logger.info(s"Submitted $n jobs") _ <- ctx.logger.info(s"Submitted $n jobs")
_ <- joex.notifyAllNodes _ <- joex.notifyAllNodes
} yield () } yield ()
@ -35,9 +40,10 @@ object AllPreviewsTask {
def submitConversionJobs[F[_]: Sync]( def submitConversionJobs[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
jobStore: JobStore[F] jobStore: JobStore[F]
): F[Int] = ): F[Int] =
ctx.store store
.transact(findAttachments(ctx)) .transact(findAttachments(ctx))
.chunks .chunks
.flatMap(createJobs[F](ctx)) .flatMap(createJobs[F](ctx))

View File

@ -8,13 +8,13 @@ package docspell.joex.preview
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.extract.pdfbox.PdfboxPreview import docspell.extract.pdfbox.PdfboxPreview
import docspell.extract.pdfbox.PreviewConfig import docspell.extract.pdfbox.PreviewConfig
import docspell.joex.process.AttachmentPreview import docspell.joex.process.AttachmentPreview
import docspell.scheduler.Context import docspell.scheduler.Context
import docspell.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
import docspell.store.records.RAttachmentPreview import docspell.store.records.RAttachmentPreview
@ -22,10 +22,10 @@ object MakePreviewTask {
type Args = MakePreviewArgs type Args = MakePreviewArgs
def apply[F[_]: Sync](pcfg: PreviewConfig): Task[F, Args, Unit] = def apply[F[_]: Sync](pcfg: PreviewConfig, store: Store[F]): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
for { for {
exists <- previewExists(ctx) exists <- previewExists(ctx, store)
preview <- PdfboxPreview(pcfg) preview <- PdfboxPreview(pcfg)
_ <- _ <-
if (exists) if (exists)
@ -35,7 +35,7 @@ object MakePreviewTask {
else else
ctx.logger.info( ctx.logger.info(
s"Generating preview image for attachment ${ctx.args.attachment}" s"Generating preview image for attachment ${ctx.args.attachment}"
) *> generatePreview(ctx, preview) ) *> generatePreview(ctx, store, preview)
} yield () } yield ()
} }
@ -44,20 +44,24 @@ object MakePreviewTask {
private def generatePreview[F[_]: Sync]( private def generatePreview[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
preview: PdfboxPreview[F] preview: PdfboxPreview[F]
): F[Unit] = ): F[Unit] =
for { for {
ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment)) ra <- store.transact(RAttachment.findById(ctx.args.attachment))
_ <- ra _ <- ra
.map(AttachmentPreview.createPreview(ctx, preview)) .map(AttachmentPreview.createPreview(ctx, store, preview))
.getOrElse( .getOrElse(
ctx.logger.error(s"No attachment found with id: ${ctx.args.attachment}") ctx.logger.error(s"No attachment found with id: ${ctx.args.attachment}")
) )
} yield () } yield ()
private def previewExists[F[_]: Sync](ctx: Context[F, Args]): F[Boolean] = private def previewExists[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F]
): F[Boolean] =
if (ctx.args.store == MakePreviewArgs.StoreMode.WhenMissing) if (ctx.args.store == MakePreviewArgs.StoreMode.WhenMissing)
ctx.store.transact( store.transact(
RAttachmentPreview.findById(ctx.args.attachment).map(_.isDefined) RAttachmentPreview.findById(ctx.args.attachment).map(_.isDefined)
) )
else else

View File

@ -11,11 +11,11 @@ import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
import docspell.common._ import docspell.common._
import docspell.extract.pdfbox.PdfMetaData import docspell.extract.pdfbox.PdfMetaData
import docspell.extract.pdfbox.PdfboxExtract import docspell.extract.pdfbox.PdfboxExtract
import docspell.scheduler._ import docspell.scheduler._
import docspell.store.Store
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
import docspell.store.records._ import docspell.store.records._
@ -24,7 +24,7 @@ import docspell.store.records._
*/ */
object AttachmentPageCount { object AttachmentPageCount {
def apply[F[_]: Sync]()( def apply[F[_]: Sync](store: Store[F])(
item: ItemData item: ItemData
): Task[F, ProcessItemArgs, ItemData] = ): Task[F, ProcessItemArgs, ItemData] =
Task { ctx => Task { ctx =>
@ -33,7 +33,7 @@ object AttachmentPageCount {
s"Retrieving page count for ${item.attachments.size} files…" s"Retrieving page count for ${item.attachments.size} files…"
) )
_ <- item.attachments _ <- item.attachments
.traverse(createPageCount(ctx)) .traverse(createPageCount(ctx, store))
.attempt .attempt
.flatMap { .flatMap {
case Right(_) => ().pure[F] case Right(_) => ().pure[F]
@ -46,14 +46,15 @@ object AttachmentPageCount {
} }
def createPageCount[F[_]: Sync]( def createPageCount[F[_]: Sync](
ctx: Context[F, _] ctx: Context[F, _],
store: Store[F]
)(ra: RAttachment): F[Option[PdfMetaData]] = )(ra: RAttachment): F[Option[PdfMetaData]] =
findMime[F](ctx)(ra).flatMap { findMime[F](store)(ra).flatMap {
case MimeType.PdfMatch(_) => case MimeType.PdfMatch(_) =>
PdfboxExtract.getMetaData(loadFile(ctx)(ra)).flatMap { PdfboxExtract.getMetaData(loadFile(store)(ra)).flatMap {
case Right(md) => case Right(md) =>
ctx.logger.debug(s"Found number of pages: ${md.pageCount}") *> ctx.logger.debug(s"Found number of pages: ${md.pageCount}") *>
updatePageCount(ctx, md, ra).map(_.some) updatePageCount(ctx, store, md, ra).map(_.some)
case Left(ex) => case Left(ex) =>
ctx.logger.warn(s"Error obtaining pages count: ${ex.getMessage}") *> ctx.logger.warn(s"Error obtaining pages count: ${ex.getMessage}") *>
(None: Option[PdfMetaData]).pure[F] (None: Option[PdfMetaData]).pure[F]
@ -66,6 +67,7 @@ object AttachmentPageCount {
private def updatePageCount[F[_]: Sync]( private def updatePageCount[F[_]: Sync](
ctx: Context[F, _], ctx: Context[F, _],
store: Store[F],
md: PdfMetaData, md: PdfMetaData,
ra: RAttachment ra: RAttachment
): F[PdfMetaData] = ): F[PdfMetaData] =
@ -73,12 +75,12 @@ object AttachmentPageCount {
_ <- ctx.logger.debug( _ <- ctx.logger.debug(
s"Update attachment ${ra.id.id} with page count ${md.pageCount.some}" s"Update attachment ${ra.id.id} with page count ${md.pageCount.some}"
) )
n <- ctx.store.transact(RAttachmentMeta.updatePageCount(ra.id, md.pageCount.some)) n <- store.transact(RAttachmentMeta.updatePageCount(ra.id, md.pageCount.some))
m <- m <-
if (n == 0) if (n == 0)
ctx.logger.warn( ctx.logger.warn(
s"No attachmentmeta record exists for ${ra.id.id}. Creating new." s"No attachmentmeta record exists for ${ra.id.id}. Creating new."
) *> ctx.store.transact( ) *> store.transact(
RAttachmentMeta.insert( RAttachmentMeta.insert(
RAttachmentMeta( RAttachmentMeta(
ra.id, ra.id,
@ -94,11 +96,11 @@ object AttachmentPageCount {
_ <- ctx.logger.debug(s"Stored page count (${n + m}).") _ <- ctx.logger.debug(s"Stored page count (${n + m}).")
} yield md } yield md
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = def findMime[F[_]: Functor](store: Store[F])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) OptionT(store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(MimeType.octetStream) .getOrElse(MimeType.octetStream)
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] = def loadFile[F[_]](store: Store[F])(ra: RAttachment): Stream[F, Byte] =
ctx.store.fileRepo.getBytes(ra.fileId) store.fileRepo.getBytes(ra.fileId)
} }

View File

@ -11,11 +11,11 @@ import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
import docspell.common._ import docspell.common._
import docspell.extract.pdfbox.PdfboxPreview import docspell.extract.pdfbox.PdfboxPreview
import docspell.extract.pdfbox.PreviewConfig import docspell.extract.pdfbox.PreviewConfig
import docspell.scheduler._ import docspell.scheduler._
import docspell.store.Store
import docspell.store.queries.QAttachment import docspell.store.queries.QAttachment
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
import docspell.store.records._ import docspell.store.records._
@ -26,7 +26,7 @@ import docspell.store.records._
*/ */
object AttachmentPreview { object AttachmentPreview {
def apply[F[_]: Sync](pcfg: PreviewConfig)( def apply[F[_]: Sync](pcfg: PreviewConfig, store: Store[F])(
item: ItemData item: ItemData
): Task[F, ProcessItemArgs, ItemData] = ): Task[F, ProcessItemArgs, ItemData] =
Task { ctx => Task { ctx =>
@ -36,7 +36,7 @@ object AttachmentPreview {
) )
preview <- PdfboxPreview(pcfg) preview <- PdfboxPreview(pcfg)
_ <- item.attachments _ <- item.attachments
.traverse(createPreview(ctx, preview)) .traverse(createPreview(ctx, store, preview))
.attempt .attempt
.flatMap { .flatMap {
case Right(_) => ().pure[F] case Right(_) => ().pure[F]
@ -50,16 +50,17 @@ object AttachmentPreview {
def createPreview[F[_]: Sync]( def createPreview[F[_]: Sync](
ctx: Context[F, _], ctx: Context[F, _],
store: Store[F],
preview: PdfboxPreview[F] preview: PdfboxPreview[F]
)( )(
ra: RAttachment ra: RAttachment
): F[Option[RAttachmentPreview]] = ): F[Option[RAttachmentPreview]] =
findMime[F](ctx)(ra).flatMap { findMime[F](store)(ra).flatMap {
case MimeType.PdfMatch(_) => case MimeType.PdfMatch(_) =>
preview.previewPNG(loadFile(ctx)(ra)).flatMap { preview.previewPNG(loadFile(store)(ra)).flatMap {
case Some(out) => case Some(out) =>
ctx.logger.debug("Preview generated, saving to database…") *> ctx.logger.debug("Preview generated, saving to database…") *>
createRecord(ctx, ra.fileId.collective, out, ra).map(_.some) createRecord(store, ra.fileId.collective, out, ra).map(_.some)
case None => case None =>
ctx.logger ctx.logger
.info(s"Preview could not be generated. Maybe the pdf has no pages?") *> .info(s"Preview could not be generated. Maybe the pdf has no pages?") *>
@ -72,7 +73,7 @@ object AttachmentPreview {
} }
private def createRecord[F[_]: Sync]( private def createRecord[F[_]: Sync](
ctx: Context[F, _], store: Store[F],
collective: Ident, collective: Ident,
png: Stream[F, Byte], png: Stream[F, Byte],
ra: RAttachment ra: RAttachment
@ -83,7 +84,7 @@ object AttachmentPreview {
for { for {
fileId <- png fileId <- png
.through( .through(
ctx.store.fileRepo.save( store.fileRepo.save(
collective, collective,
FileCategory.PreviewImage, FileCategory.PreviewImage,
MimeTypeHint(name.map(_.fullName), Some("image/png")) MimeTypeHint(name.map(_.fullName), Some("image/png"))
@ -93,16 +94,16 @@ object AttachmentPreview {
.lastOrError .lastOrError
now <- Timestamp.current[F] now <- Timestamp.current[F]
rp = RAttachmentPreview(ra.id, fileId, name.map(_.fullName), now) rp = RAttachmentPreview(ra.id, fileId, name.map(_.fullName), now)
_ <- QAttachment.deletePreview(ctx.store)(ra.id) _ <- QAttachment.deletePreview(store)(ra.id)
_ <- ctx.store.transact(RAttachmentPreview.insert(rp)) _ <- store.transact(RAttachmentPreview.insert(rp))
} yield rp } yield rp
} }
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = def findMime[F[_]: Functor](store: Store[F])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) OptionT(store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(MimeType.octetStream) .getOrElse(MimeType.octetStream)
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] = def loadFile[F[_]](store: Store[F])(ra: RAttachment): Stream[F, Byte] =
ctx.store.fileRepo.getBytes(ra.fileId) store.fileRepo.getBytes(ra.fileId)
} }

View File

@ -11,13 +11,13 @@ import cats.data.{Kleisli, OptionT}
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
import docspell.common._ import docspell.common._
import docspell.convert.ConversionResult.Handler import docspell.convert.ConversionResult.Handler
import docspell.convert.SanitizeHtml import docspell.convert.SanitizeHtml
import docspell.convert._ import docspell.convert._
import docspell.joex.extract.JsoupSanitizer import docspell.joex.extract.JsoupSanitizer
import docspell.scheduler._ import docspell.scheduler._
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
/** Goes through all attachments and creates a PDF version of it where supported. /** Goes through all attachments and creates a PDF version of it where supported.
@ -36,21 +36,22 @@ object ConvertPdf {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: ConvertConfig, cfg: ConvertConfig,
store: Store[F],
item: ItemData item: ItemData
): Task[F, Args, ItemData] = ): Task[F, Args, ItemData] =
Task { ctx => Task { ctx =>
def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] = def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] =
isConverted(ctx)(ra).flatMap { isConverted(store)(ra).flatMap {
case true if ctx.args.isNormalProcessing => case true if ctx.args.isNormalProcessing =>
ctx.logger.info( ctx.logger.info(
s"Conversion to pdf already done for attachment ${ra.name}." s"Conversion to pdf already done for attachment ${ra.name}."
) *> ) *>
ctx.store store
.transact(RAttachmentMeta.findById(ra.id)) .transact(RAttachmentMeta.findById(ra.id))
.map(rmOpt => (ra, rmOpt)) .map(rmOpt => (ra, rmOpt))
case _ => case _ =>
findMime(ctx)(ra).flatMap(m => findMime(store)(ra).flatMap(m =>
convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m) convertSafe(cfg, JsoupSanitizer.clean, ctx, store, item)(ra, m)
) )
} }
@ -62,13 +63,15 @@ object ConvertPdf {
} }
def isConverted[F[_]](ctx: Context[F, Args])( def isConverted[F[_]](store: Store[F])(
ra: RAttachment ra: RAttachment
): F[Boolean] = ): F[Boolean] =
ctx.store.transact(RAttachmentSource.isConverted(ra.id)) store.transact(RAttachmentSource.isConverted(ra.id))
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = def findMime[F[_]: Functor](store: Store[F])(
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) ra: RAttachment
): F[MimeType] =
OptionT(store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(MimeType.octetStream) .getOrElse(MimeType.octetStream)
@ -76,14 +79,15 @@ object ConvertPdf {
cfg: ConvertConfig, cfg: ConvertConfig,
sanitizeHtml: SanitizeHtml, sanitizeHtml: SanitizeHtml,
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
item: ItemData item: ItemData
)(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] = )(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] =
loadCollectivePasswords(ctx).flatMap(collPass => loadCollectivePasswords(ctx, store).flatMap(collPass =>
Conversion.create[F](cfg, sanitizeHtml, collPass, ctx.logger).use { conv => Conversion.create[F](cfg, sanitizeHtml, collPass, ctx.logger).use { conv =>
mime match { mime match {
case mt => case mt =>
val data = ctx.store.fileRepo.getBytes(ra.fileId) val data = store.fileRepo.getBytes(ra.fileId)
val handler = conversionHandler[F](ctx, cfg, ra, item) val handler = conversionHandler[F](ctx, store, cfg, ra, item)
ctx.logger ctx.logger
.info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *> .info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *>
conv.toPDF(DataType(mt), ctx.args.meta.language, handler)( conv.toPDF(DataType(mt), ctx.args.meta.language, handler)(
@ -94,14 +98,16 @@ object ConvertPdf {
) )
private def loadCollectivePasswords[F[_]: Async]( private def loadCollectivePasswords[F[_]: Async](
ctx: Context[F, Args] ctx: Context[F, Args],
store: Store[F]
): F[List[Password]] = ): F[List[Password]] =
ctx.store store
.transact(RCollectivePassword.findAll(ctx.args.meta.collective)) .transact(RCollectivePassword.findAll(ctx.args.meta.collective))
.map(_.map(_.password).distinct) .map(_.map(_.password).distinct)
private def conversionHandler[F[_]: Sync]( private def conversionHandler[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: ConvertConfig, cfg: ConvertConfig,
ra: RAttachment, ra: RAttachment,
item: ItemData item: ItemData
@ -109,12 +115,12 @@ object ConvertPdf {
Kleisli { Kleisli {
case ConversionResult.SuccessPdf(pdf) => case ConversionResult.SuccessPdf(pdf) =>
ctx.logger.info(s"Conversion to pdf successful. Saving file.") *> ctx.logger.info(s"Conversion to pdf successful. Saving file.") *>
storePDF(ctx, cfg, ra, pdf) storePDF(ctx, store, cfg, ra, pdf)
.map(r => (r, None)) .map(r => (r, None))
case ConversionResult.SuccessPdfTxt(pdf, txt) => case ConversionResult.SuccessPdfTxt(pdf, txt) =>
ctx.logger.info(s"Conversion to pdf+txt successful. Saving file.") *> ctx.logger.info(s"Conversion to pdf+txt successful. Saving file.") *>
storePDF(ctx, cfg, ra, pdf) storePDF(ctx, store, cfg, ra, pdf)
.flatMap(r => .flatMap(r =>
txt.map(t => txt.map(t =>
( (
@ -148,6 +154,7 @@ object ConvertPdf {
private def storePDF[F[_]: Sync]( private def storePDF[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: ConvertConfig, cfg: ConvertConfig,
ra: RAttachment, ra: RAttachment,
pdf: Stream[F, Byte] pdf: Stream[F, Byte]
@ -162,7 +169,7 @@ object ConvertPdf {
pdf pdf
.through( .through(
ctx.store.fileRepo.save( store.fileRepo.save(
ctx.args.meta.collective, ctx.args.meta.collective,
FileCategory.AttachmentConvert, FileCategory.AttachmentConvert,
MimeTypeHint(hint.filename, hint.advertised) MimeTypeHint(hint.filename, hint.advertised)
@ -170,32 +177,33 @@ object ConvertPdf {
) )
.compile .compile
.lastOrError .lastOrError
.flatMap(fmId => updateAttachment[F](ctx, ra, fmId, newName).map(_ => fmId)) .flatMap(fmId => updateAttachment[F](ctx, store, ra, fmId, newName).map(_ => fmId))
.map(fmId => ra.copy(fileId = fmId, name = newName)) .map(fmId => ra.copy(fileId = fmId, name = newName))
} }
private def updateAttachment[F[_]: Sync]( private def updateAttachment[F[_]: Sync](
ctx: Context[F, _], ctx: Context[F, _],
store: Store[F],
ra: RAttachment, ra: RAttachment,
fmId: FileKey, fmId: FileKey,
newName: Option[String] newName: Option[String]
): F[Unit] = ): F[Unit] =
for { for {
oldFile <- ctx.store.transact(RAttachment.findById(ra.id)) oldFile <- store.transact(RAttachment.findById(ra.id))
_ <- _ <-
ctx.store store
.transact(RAttachment.updateFileIdAndName(ra.id, fmId, newName)) .transact(RAttachment.updateFileIdAndName(ra.id, fmId, newName))
_ <- oldFile match { _ <- oldFile match {
case Some(raPrev) => case Some(raPrev) =>
for { for {
sameFile <- sameFile <-
ctx.store store
.transact(RAttachmentSource.isSameFile(ra.id, raPrev.fileId)) .transact(RAttachmentSource.isSameFile(ra.id, raPrev.fileId))
_ <- _ <-
if (sameFile) ().pure[F] if (sameFile) ().pure[F]
else else
ctx.logger.info("Deleting previous attachment file") *> ctx.logger.info("Deleting previous attachment file") *>
ctx.store.fileRepo store.fileRepo
.delete(raPrev.fileId) .delete(raPrev.fileId)
.attempt .attempt
.flatMap { .flatMap {

View File

@ -11,9 +11,9 @@ import cats.data.OptionT
import cats.effect.Sync import cats.effect.Sync
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
import docspell.common._ import docspell.common._
import docspell.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.file.FileMetadata import docspell.store.file.FileMetadata
import docspell.store.queries.QItem import docspell.store.queries.QItem
import docspell.store.records._ import docspell.store.records._
@ -21,13 +21,13 @@ import docspell.store.records._
/** Task that creates the item. */ /** Task that creates the item. */
object CreateItem { object CreateItem {
def apply[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] = def apply[F[_]: Sync](store: Store[F]): Task[F, ProcessItemArgs, ItemData] =
findExisting[F].flatMap { findExisting[F](store).flatMap {
case Some(ri) => Task.pure(ri) case Some(ri) => Task.pure(ri)
case None => createNew[F] case None => createNew[F](store)
} }
def createNew[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] = def createNew[F[_]: Sync](store: Store[F]): Task[F, ProcessItemArgs, ItemData] =
Task { ctx => Task { ctx =>
def isValidFile(fm: FileMetadata) = def isValidFile(fm: FileMetadata) =
ctx.args.meta.validFileTypes.isEmpty || ctx.args.meta.validFileTypes.isEmpty ||
@ -36,11 +36,11 @@ object CreateItem {
def fileMetas(itemId: Ident, now: Timestamp) = def fileMetas(itemId: Ident, now: Timestamp) =
Stream Stream
.eval(ctx.store.transact(RAttachment.nextPosition(itemId))) .eval(store.transact(RAttachment.nextPosition(itemId)))
.flatMap { offset => .flatMap { offset =>
Stream Stream
.emits(ctx.args.files) .emits(ctx.args.files)
.evalMap(f => ctx.store.fileRepo.findMeta(f.fileMetaId).map(fm => (f, fm))) .evalMap(f => store.fileRepo.findMeta(f.fileMetaId).map(fm => (f, fm)))
.collect { case (f, Some(fm)) if isValidFile(fm) => f } .collect { case (f, Some(fm)) if isValidFile(fm) => f }
.zipWithIndex .zipWithIndex
.evalMap { case (f, index) => .evalMap { case (f, index) =>
@ -67,11 +67,11 @@ object CreateItem {
(for { (for {
_ <- OptionT.liftF( _ <- OptionT.liftF(
ctx.logger.info( ctx.logger.info(
s"Loading item with id ${id.id} to ammend" s"Loading item with id ${id.id} to amend"
) )
) )
item <- OptionT( item <- OptionT(
ctx.store store
.transact(RItem.findByIdAndCollective(id, ctx.args.meta.collective)) .transact(RItem.findByIdAndCollective(id, ctx.args.meta.collective))
) )
} yield (1, item)) } yield (1, item))
@ -88,7 +88,7 @@ object CreateItem {
ctx.args.meta.direction.getOrElse(Direction.Incoming), ctx.args.meta.direction.getOrElse(Direction.Incoming),
ItemState.Premature ItemState.Premature
) )
n <- ctx.store.transact(RItem.insert(item)) n <- store.transact(RItem.insert(item))
} yield (n, item) } yield (n, item)
} }
@ -98,7 +98,7 @@ object CreateItem {
_ <- if (it._1 != 1) storeItemError[F](ctx) else ().pure[F] _ <- if (it._1 != 1) storeItemError[F](ctx) else ().pure[F]
now <- Timestamp.current[F] now <- Timestamp.current[F]
fm <- fileMetas(it._2.id, now) fm <- fileMetas(it._2.id, now)
k <- fm.traverse(insertAttachment(ctx)) k <- fm.traverse(insertAttachment(store))
_ <- logDifferences(ctx, fm, k.sum) _ <- logDifferences(ctx, fm, k.sum)
dur <- time dur <- time
_ <- ctx.logger.info(s"Creating item finished in ${dur.formatExact}") _ <- ctx.logger.info(s"Creating item finished in ${dur.formatExact}")
@ -115,25 +115,27 @@ object CreateItem {
) )
} }
def insertAttachment[F[_]](ctx: Context[F, _])(ra: RAttachment): F[Int] = { def insertAttachment[F[_]](store: Store[F])(ra: RAttachment): F[Int] = {
val rs = RAttachmentSource.of(ra) val rs = RAttachmentSource.of(ra)
ctx.store.transact(for { store.transact(for {
n <- RAttachment.insert(ra) n <- RAttachment.insert(ra)
_ <- RAttachmentSource.insert(rs) _ <- RAttachmentSource.insert(rs)
} yield n) } yield n)
} }
private def findExisting[F[_]: Sync]: Task[F, ProcessItemArgs, Option[ItemData]] = private def findExisting[F[_]: Sync](
store: Store[F]
): Task[F, ProcessItemArgs, Option[ItemData]] =
Task { ctx => Task { ctx =>
val states = ItemState.invalidStates val states = ItemState.invalidStates
val fileMetaIds = ctx.args.files.map(_.fileMetaId).toSet val fileMetaIds = ctx.args.files.map(_.fileMetaId).toSet
for { for {
cand <- ctx.store.transact(QItem.findByFileIds(fileMetaIds.toSeq, states)) cand <- store.transact(QItem.findByFileIds(fileMetaIds.toSeq, states))
_ <- _ <-
if (cand.nonEmpty) if (cand.nonEmpty)
ctx.logger.warn(s"Found ${cand.size} existing item with these files.") ctx.logger.warn(s"Found ${cand.size} existing item with these files.")
else ().pure[F] else ().pure[F]
ht <- cand.drop(1).traverse(ri => QItem.delete(ctx.store)(ri.id, ri.cid)) ht <- cand.drop(1).traverse(ri => QItem.delete(store)(ri.id, ri.cid))
_ <- _ <-
if (ht.sum > 0) if (ht.sum > 0)
ctx.logger.warn(s"Removed ${ht.sum} items with same attachments") ctx.logger.warn(s"Removed ${ht.sum} items with same attachments")
@ -144,7 +146,7 @@ object CreateItem {
OptionT( OptionT(
// load attachments but only those mentioned in the task's arguments // load attachments but only those mentioned in the task's arguments
cand.headOption.traverse(ri => cand.headOption.traverse(ri =>
ctx.store store
.transact(RAttachment.findByItemCollectiveSource(ri.id, ri.cid, fids)) .transact(RAttachment.findByItemCollectiveSource(ri.id, ri.cid, fids))
.flatTap(ats => .flatTap(ats =>
ctx.logger.debug( ctx.logger.debug(
@ -156,7 +158,7 @@ object CreateItem {
) )
.getOrElse(Vector.empty) .getOrElse(Vector.empty)
orig <- rms.traverse(a => orig <- rms.traverse(a =>
ctx.store.transact(RAttachmentSource.findById(a.id)).map(s => (a, s)) store.transact(RAttachmentSource.findById(a.id)).map(s => (a, s))
) )
origMap = origMap =
orig orig

View File

@ -10,10 +10,10 @@ import cats.data.NonEmptyList
import cats.data.OptionT import cats.data.OptionT
import cats.effect.Sync import cats.effect.Sync
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.scheduler.Task import docspell.scheduler.Task
import docspell.logging.Logger import docspell.logging.Logger
import docspell.store.Store
/** After candidates have been determined, the set is reduced by doing some cross checks. /** After candidates have been determined, the set is reduced by doing some cross checks.
* For example: if a organization is suggested as correspondent, the correspondent person * For example: if a organization is suggested as correspondent, the correspondent person
@ -22,13 +22,15 @@ import docspell.logging.Logger
*/ */
object CrossCheckProposals { object CrossCheckProposals {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = def apply[F[_]: Sync](
store: Store[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { ctx => Task { ctx =>
val proposals = data.finalProposals val proposals = data.finalProposals
val corrOrg = proposals.find(MetaProposalType.CorrOrg) val corrOrg = proposals.find(MetaProposalType.CorrOrg)
(for { (for {
orgRef <- OptionT.fromOption[F](corrOrg) orgRef <- OptionT.fromOption[F](corrOrg)
persRefs <- OptionT.liftF(EvalProposals.findOrganizationRelation(data, ctx)) persRefs <- OptionT.liftF(EvalProposals.findOrganizationRelation(data, store))
clProps <- OptionT.liftF( clProps <- OptionT.liftF(
personOrgCheck[F](ctx.logger, data.classifyProposals, persRefs)(orgRef) personOrgCheck[F](ctx.logger, data.classifyProposals, persRefs)(orgRef)
) )
@ -53,7 +55,7 @@ object CrossCheckProposals {
mpl.find(MetaProposalType.CorrPerson) match { mpl.find(MetaProposalType.CorrPerson) match {
case Some(ppl) => case Some(ppl) =>
val list = ppl.values.filter(c => val list = ppl.values.filter(c =>
persRefs.get(c.ref.id).exists(_.organization == Some(orgId)) persRefs.get(c.ref.id).exists(_.organization.contains(orgId))
) )
if (ppl.values.toList == list) mpl.pure[F] if (ppl.values.toList == list) mpl.pure[F]

View File

@ -8,58 +8,63 @@ package docspell.joex.process
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.queries.QItem import docspell.store.queries.QItem
import docspell.store.records.RFileMeta import docspell.store.records.RFileMeta
import docspell.store.records.RJob import docspell.store.records.RJob
import doobie._ import doobie._
object DuplicateCheck { object DuplicateCheck {
type Args = ProcessItemArgs type Args = ProcessItemArgs
def apply[F[_]: Sync]: Task[F, Args, Args] = def apply[F[_]: Sync](store: Store[F]): Task[F, Args, Args] =
Task { ctx => Task { ctx =>
if (ctx.args.meta.skipDuplicate) if (ctx.args.meta.skipDuplicate)
for { for {
retries <- getRetryCount(ctx) retries <- getRetryCount(ctx, store)
res <- res <-
if (retries == 0) if (retries == 0)
ctx.logger.debug("Checking for duplicate files") *> removeDuplicates(ctx) ctx.logger
.debug("Checking for duplicate files") *> removeDuplicates(ctx, store)
else ctx.args.pure[F] else ctx.args.pure[F]
} yield res } yield res
else ctx.logger.debug("Not checking for duplicates") *> ctx.args.pure[F] else ctx.logger.debug("Not checking for duplicates") *> ctx.args.pure[F]
} }
def removeDuplicates[F[_]: Sync](ctx: Context[F, Args]): F[ProcessItemArgs] = def removeDuplicates[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F]
): F[ProcessItemArgs] =
for { for {
fileMetas <- findDuplicates(ctx) fileMetas <- findDuplicates(ctx, store)
_ <- fileMetas.traverse(deleteDuplicate(ctx)) _ <- fileMetas.traverse(deleteDuplicate(ctx, store))
ids = fileMetas.filter(_.exists).map(_.fm.id).toSet ids = fileMetas.filter(_.exists).map(_.fm.id).toSet
} yield ctx.args.copy(files = } yield ctx.args.copy(files =
ctx.args.files.filterNot(f => ids.contains(f.fileMetaId)) ctx.args.files.filterNot(f => ids.contains(f.fileMetaId))
) )
private def getRetryCount[F[_]: Sync](ctx: Context[F, Args]): F[Int] = private def getRetryCount[F[_]: Sync](ctx: Context[F, _], store: Store[F]): F[Int] =
ctx.store.transact(RJob.getRetries(ctx.jobId)).map(_.getOrElse(0)) store.transact(RJob.getRetries(ctx.jobId)).map(_.getOrElse(0))
private def deleteDuplicate[F[_]: Sync]( private def deleteDuplicate[F[_]: Sync](
ctx: Context[F, Args] ctx: Context[F, Args],
store: Store[F]
)(fd: FileMetaDupes): F[Unit] = { )(fd: FileMetaDupes): F[Unit] = {
val fname = ctx.args.files.find(_.fileMetaId == fd.fm.id).flatMap(_.name) val fname = ctx.args.files.find(_.fileMetaId == fd.fm.id).flatMap(_.name)
if (fd.exists) if (fd.exists)
ctx.logger ctx.logger
.info(s"Deleting duplicate file $fname!") *> ctx.store.fileRepo .info(s"Deleting duplicate file $fname!") *> store.fileRepo
.delete(fd.fm.id) .delete(fd.fm.id)
else ().pure[F] else ().pure[F]
} }
private def findDuplicates[F[_]]( private def findDuplicates[F[_]](
ctx: Context[F, Args] ctx: Context[F, Args],
store: Store[F]
): F[Vector[FileMetaDupes]] = ): F[Vector[FileMetaDupes]] =
ctx.store.transact(for { store.transact(for {
fileMetas <- RFileMeta.findByIds(ctx.args.files.map(_.fileMetaId)) fileMetas <- RFileMeta.findByIds(ctx.args.files.map(_.fileMetaId))
dupes <- fileMetas.traverse(checkDuplicate(ctx)) dupes <- fileMetas.traverse(checkDuplicate(ctx))
} yield dupes) } yield dupes)

View File

@ -7,30 +7,31 @@
package docspell.joex.process package docspell.joex.process
import java.time.{LocalDate, Period} import java.time.{LocalDate, Period}
import cats.effect.Sync import cats.effect.Sync
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.scheduler.{Context, Task} import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records.{RAttachmentMeta, RPerson} import docspell.store.records.{RAttachmentMeta, RPerson}
/** Calculate weights for candidates that adds the most likely candidate a lower number. /** Calculate weights for candidates that adds the most likely candidate a lower number.
*/ */
object EvalProposals { object EvalProposals {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = def apply[F[_]: Sync](
Task { ctx => store: Store[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { _ =>
for { for {
now <- Timestamp.current[F] now <- Timestamp.current[F]
personRefs <- findOrganizationRelation[F](data, ctx) personRefs <- findOrganizationRelation[F](data, store)
metas = data.metas.map(calcCandidateWeight(now.toUtcDate, personRefs)) metas = data.metas.map(calcCandidateWeight(now.toUtcDate, personRefs))
} yield data.copy(metas = metas) } yield data.copy(metas = metas)
} }
def findOrganizationRelation[F[_]: Sync]( def findOrganizationRelation[F[_]: Sync](
data: ItemData, data: ItemData,
ctx: Context[F, _] store: Store[F]
): F[Map[Ident, PersonRef]] = { ): F[Map[Ident, PersonRef]] = {
val corrPersIds = data.metas val corrPersIds = data.metas
.map(_.proposals) .map(_.proposals)
@ -38,7 +39,7 @@ object EvalProposals {
.flatMap(_.find(MetaProposalType.CorrPerson)) .flatMap(_.find(MetaProposalType.CorrPerson))
.flatMap(_.values.toList.map(_.ref.id)) .flatMap(_.values.toList.map(_.ref.id))
.toSet .toSet
ctx.store store
.transact(RPerson.findOrganization(corrPersIds)) .transact(RPerson.findOrganization(corrPersIds))
.map(_.map(p => (p.id, p)).toMap) .map(_.map(p => (p.id, p)).toMap)
} }

View File

@ -14,13 +14,12 @@ import cats.implicits._
import cats.kernel.Monoid import cats.kernel.Monoid
import cats.kernel.Order import cats.kernel.Order
import fs2.Stream import fs2.Stream
import docspell.common._ import docspell.common._
import docspell.files.Zip import docspell.files.Zip
import docspell.joex.mail._ import docspell.joex.mail._
import docspell.scheduler._ import docspell.scheduler._
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
import emil.Mail import emil.Mail
/** Goes through all attachments and extracts archive files, like zip files. The process /** Goes through all attachments and extracts archive files, like zip files. The process
@ -34,39 +33,41 @@ import emil.Mail
object ExtractArchive { object ExtractArchive {
type Args = ProcessItemArgs type Args = ProcessItemArgs
def apply[F[_]: Async]( def apply[F[_]: Async](store: Store[F])(
item: ItemData item: ItemData
): Task[F, Args, ItemData] = ): Task[F, Args, ItemData] =
multiPass(item, None).map(_._2) multiPass(store, item, None).map(_._2)
def multiPass[F[_]: Async]( def multiPass[F[_]: Async](
store: Store[F],
item: ItemData, item: ItemData,
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
): Task[F, Args, (Option[RAttachmentArchive], ItemData)] = ): Task[F, Args, (Option[RAttachmentArchive], ItemData)] =
singlePass(item, archive).flatMap { t => singlePass(store, item, archive).flatMap { t =>
if (t._1.isEmpty) Task.pure(t) if (t._1.isEmpty) Task.pure(t)
else multiPass(t._2, t._1) else multiPass(store, t._2, t._1)
} }
def singlePass[F[_]: Async]( def singlePass[F[_]: Async](
store: Store[F],
item: ItemData, item: ItemData,
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
): Task[F, Args, (Option[RAttachmentArchive], ItemData)] = ): Task[F, Args, (Option[RAttachmentArchive], ItemData)] =
Task { ctx => Task { ctx =>
def extract(ra: RAttachment, pos: Int): F[Extracted] = def extract(ra: RAttachment, pos: Int): F[Extracted] =
findMime(ctx)(ra).flatMap(m => extractSafe(ctx, archive)(ra, pos, m)) findMime(store)(ra).flatMap(m => extractSafe(ctx, store, archive)(ra, pos, m))
for { for {
lastPos <- ctx.store.transact(RAttachment.nextPosition(item.item.id)) lastPos <- store.transact(RAttachment.nextPosition(item.item.id))
extracts <- extracts <-
item.attachments.zipWithIndex item.attachments.zipWithIndex
.traverse(t => extract(t._1, lastPos + t._2)) .traverse(t => extract(t._1, lastPos + t._2))
.map(Monoid[Extracted].combineAll) .map(Monoid[Extracted].combineAll)
.map(fixPositions) .map(fixPositions)
nra = extracts.files nra = extracts.files
_ <- extracts.files.traverse(storeAttachment(ctx)) _ <- extracts.files.traverse(storeAttachment(store))
naa = extracts.archives naa = extracts.archives
_ <- naa.traverse(storeArchive(ctx)) _ <- naa.traverse(storeArchive(store))
} yield naa.headOption -> item.copy( } yield naa.headOption -> item.copy(
attachments = nra, attachments = nra,
originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap, originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap,
@ -83,25 +84,26 @@ object ExtractArchive {
if (extract.archives.isEmpty) extract if (extract.archives.isEmpty) extract
else extract.updatePositions else extract.updatePositions
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = def findMime[F[_]: Functor](store: Store[F])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) OptionT(store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(MimeType.octetStream) .getOrElse(MimeType.octetStream)
def extractSafe[F[_]: Async]( def extractSafe[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int, mime: MimeType): F[Extracted] = )(ra: RAttachment, pos: Int, mime: MimeType): F[Extracted] =
mime match { mime match {
case MimeType.ZipMatch(_) if ra.name.exists(_.toLowerCase.endsWith(".zip")) => case MimeType.ZipMatch(_) if ra.name.exists(_.toLowerCase.endsWith(".zip")) =>
ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("<noname>")}.") *> ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("<noname>")}.") *>
extractZip(ctx, archive)(ra, pos) extractZip(ctx, store, archive)(ra, pos)
.flatMap(cleanupParents(ctx, ra, archive)) .flatMap(cleanupParents(ctx, store, ra, archive))
case MimeType.EmailMatch(_) => case MimeType.EmailMatch(_) =>
ctx.logger.info(s"Reading e-mail ${ra.name.getOrElse("<noname>")}") *> ctx.logger.info(s"Reading e-mail ${ra.name.getOrElse("<noname>")}") *>
extractMail(ctx, archive)(ra, pos) extractMail(ctx, store, archive)(ra, pos)
.flatMap(cleanupParents(ctx, ra, archive)) .flatMap(cleanupParents(ctx, store, ra, archive))
case _ => case _ =>
ctx.logger.debug(s"Not an archive: ${mime.asString}") *> ctx.logger.debug(s"Not an archive: ${mime.asString}") *>
@ -110,6 +112,7 @@ object ExtractArchive {
def cleanupParents[F[_]: Sync]( def cleanupParents[F[_]: Sync](
ctx: Context[F, _], ctx: Context[F, _],
store: Store[F],
ra: RAttachment, ra: RAttachment,
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
)(extracted: Extracted): F[Extracted] = )(extracted: Extracted): F[Extracted] =
@ -119,30 +122,31 @@ object ExtractArchive {
_ <- ctx.logger.debug( _ <- ctx.logger.debug(
s"Extracted inner attachment ${ra.name}. Remove it completely." s"Extracted inner attachment ${ra.name}. Remove it completely."
) )
_ <- ctx.store.transact(RAttachmentArchive.delete(ra.id)) _ <- store.transact(RAttachmentArchive.delete(ra.id))
_ <- ctx.store.transact(RAttachment.delete(ra.id)) _ <- store.transact(RAttachment.delete(ra.id))
_ <- ctx.store.fileRepo.delete(ra.fileId) _ <- store.fileRepo.delete(ra.fileId)
} yield extracted } yield extracted
case None => case None =>
for { for {
_ <- ctx.logger.debug( _ <- ctx.logger.debug(
s"Extracted attachment ${ra.name}. Remove it from the item." s"Extracted attachment ${ra.name}. Remove it from the item."
) )
_ <- ctx.store.transact(RAttachment.delete(ra.id)) _ <- store.transact(RAttachment.delete(ra.id))
} yield extracted.copy(files = extracted.files.filter(_.id != ra.id)) } yield extracted.copy(files = extracted.files.filter(_.id != ra.id))
} }
def extractZip[F[_]: Async]( def extractZip[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int): F[Extracted] = { )(ra: RAttachment, pos: Int): F[Extracted] = {
val zipData = ctx.store.fileRepo.getBytes(ra.fileId) val zipData = store.fileRepo.getBytes(ra.fileId)
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *> ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *>
zipData zipData
.through(Zip.unzipP[F](8192, glob)) .through(Zip.unzipP[F](8192, glob))
.zipWithIndex .zipWithIndex
.flatMap(handleEntry(ctx, ra, pos, archive, None)) .flatMap(handleEntry(ctx, store, ra, pos, archive, None))
.foldMonoid .foldMonoid
.compile .compile
.lastOrError .lastOrError
@ -150,9 +154,10 @@ object ExtractArchive {
def extractMail[F[_]: Async]( def extractMail[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int): F[Extracted] = { )(ra: RAttachment, pos: Int): F[Extracted] = {
val email: Stream[F, Byte] = ctx.store.fileRepo.getBytes(ra.fileId) val email: Stream[F, Byte] = store.fileRepo.getBytes(ra.fileId)
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
val attachOnly = ctx.args.meta.attachmentsOnly.getOrElse(false) val attachOnly = ctx.args.meta.attachmentsOnly.getOrElse(false)
@ -170,7 +175,9 @@ object ExtractArchive {
ReadMail ReadMail
.mailToEntries(ctx.logger, glob, attachOnly)(mail) .mailToEntries(ctx.logger, glob, attachOnly)(mail)
.zipWithIndex .zipWithIndex
.flatMap(handleEntry(ctx, ra, pos, archive, mId)) ++ Stream.eval(givenMeta) .flatMap(handleEntry(ctx, store, ra, pos, archive, mId)) ++ Stream.eval(
givenMeta
)
} }
.foldMonoid .foldMonoid
.compile .compile
@ -185,6 +192,7 @@ object ExtractArchive {
def handleEntry[F[_]: Sync]( def handleEntry[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
ra: RAttachment, ra: RAttachment,
pos: Int, pos: Int,
archive: Option[RAttachmentArchive], archive: Option[RAttachmentArchive],
@ -195,7 +203,7 @@ object ExtractArchive {
val (entry, subPos) = tentry val (entry, subPos) = tentry
val mimeHint = MimeTypeHint.filename(entry.name).withAdvertised(entry.mime.asString) val mimeHint = MimeTypeHint.filename(entry.name).withAdvertised(entry.mime.asString)
val fileId = entry.data.through( val fileId = entry.data.through(
ctx.store.fileRepo store.fileRepo
.save(ctx.args.meta.collective, FileCategory.AttachmentSource, mimeHint) .save(ctx.args.meta.collective, FileCategory.AttachmentSource, mimeHint)
) )
@ -217,16 +225,16 @@ object ExtractArchive {
} }
def storeAttachment[F[_]: Sync](ctx: Context[F, _])(ra: RAttachment): F[Int] = { def storeAttachment[F[_]: Sync](store: Store[F])(ra: RAttachment): F[Int] = {
val insert = CreateItem.insertAttachment(ctx)(ra) val insert = CreateItem.insertAttachment(store)(ra)
for { for {
n1 <- ctx.store.transact(RAttachment.updatePosition(ra.id, ra.position)) n1 <- store.transact(RAttachment.updatePosition(ra.id, ra.position))
n2 <- if (n1 > 0) 0.pure[F] else insert n2 <- if (n1 > 0) 0.pure[F] else insert
} yield n1 + n2 } yield n1 + n2
} }
def storeArchive[F[_]](ctx: Context[F, _])(aa: RAttachmentArchive): F[Int] = def storeArchive[F[_]](store: Store[F])(aa: RAttachmentArchive): F[Int] =
ctx.store.transact(RAttachmentArchive.insert(aa)) store.transact(RAttachmentArchive.insert(aa))
case class Extracted( case class Extracted(
files: Vector[RAttachment], files: Vector[RAttachment],

View File

@ -7,16 +7,15 @@
package docspell.joex.process package docspell.joex.process
import java.time.ZoneId import java.time.ZoneId
import cats.effect.Sync import cats.effect.Sync
import cats.implicits._ import cats.implicits._
import cats.{Applicative, FlatMap} import cats.{Applicative, FlatMap}
import docspell.analysis.contact._ import docspell.analysis.contact._
import docspell.common.MetaProposal.Candidate import docspell.common.MetaProposal.Candidate
import docspell.common._ import docspell.common._
import docspell.joex.Config import docspell.joex.Config
import docspell.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
/** Super simple approach to find corresponding meta data to an item by looking up values /** Super simple approach to find corresponding meta data to an item by looking up values
@ -26,7 +25,8 @@ object FindProposal {
type Args = ProcessItemArgs type Args = ProcessItemArgs
def apply[F[_]: Sync]( def apply[F[_]: Sync](
cfg: Config.TextAnalysis cfg: Config.TextAnalysis,
store: Store[F]
)(data: ItemData): Task[F, Args, ItemData] = )(data: ItemData): Task[F, Args, ItemData] =
Task { ctx => Task { ctx =>
val rmas = data.metas.map(rm => rm.copy(nerlabels = removeDuplicates(rm.nerlabels))) val rmas = data.metas.map(rm => rm.copy(nerlabels = removeDuplicates(rm.nerlabels)))
@ -34,15 +34,16 @@ object FindProposal {
_ <- ctx.logger.info("Starting find-proposal") _ <- ctx.logger.info("Starting find-proposal")
rmv <- rmas rmv <- rmas
.traverse(rm => .traverse(rm =>
processAttachment(cfg, rm, data.findDates(rm), ctx) processAttachment(cfg, rm, data.findDates(rm), ctx, store)
.map(ml => rm.copy(proposals = ml)) .map(ml => rm.copy(proposals = ml))
) )
clp <- lookupClassifierProposals(ctx, data.classifyProposals) clp <- lookupClassifierProposals(ctx, store, data.classifyProposals)
} yield data.copy(metas = rmv, classifyProposals = clp) } yield data.copy(metas = rmv, classifyProposals = clp)
} }
def lookupClassifierProposals[F[_]: Sync]( def lookupClassifierProposals[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
mpList: MetaProposalList mpList: MetaProposalList
): F[MetaProposalList] = { ): F[MetaProposalList] = {
val coll = ctx.args.meta.collective val coll = ctx.args.meta.collective
@ -50,7 +51,7 @@ object FindProposal {
def lookup(mp: MetaProposal): F[Option[IdRef]] = def lookup(mp: MetaProposal): F[Option[IdRef]] =
mp.proposalType match { mp.proposalType match {
case MetaProposalType.CorrOrg => case MetaProposalType.CorrOrg =>
ctx.store store
.transact( .transact(
ROrganization ROrganization
.findLike(coll, mp.values.head.ref.name.toLowerCase, OrgUse.notDisabled) .findLike(coll, mp.values.head.ref.name.toLowerCase, OrgUse.notDisabled)
@ -60,7 +61,7 @@ object FindProposal {
ctx.logger.debug(s"Found classifier organization for $mp: $oref") ctx.logger.debug(s"Found classifier organization for $mp: $oref")
) )
case MetaProposalType.CorrPerson => case MetaProposalType.CorrPerson =>
ctx.store store
.transact( .transact(
RPerson RPerson
.findLike( .findLike(
@ -74,7 +75,7 @@ object FindProposal {
ctx.logger.debug(s"Found classifier corr-person for $mp: $oref") ctx.logger.debug(s"Found classifier corr-person for $mp: $oref")
) )
case MetaProposalType.ConcPerson => case MetaProposalType.ConcPerson =>
ctx.store store
.transact( .transact(
RPerson RPerson
.findLike( .findLike(
@ -88,7 +89,7 @@ object FindProposal {
ctx.logger.debug(s"Found classifier conc-person for $mp: $oref") ctx.logger.debug(s"Found classifier conc-person for $mp: $oref")
) )
case MetaProposalType.ConcEquip => case MetaProposalType.ConcEquip =>
ctx.store store
.transact( .transact(
REquipment REquipment
.findLike( .findLike(
@ -123,9 +124,10 @@ object FindProposal {
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
rm: RAttachmentMeta, rm: RAttachmentMeta,
rd: Vector[NerDateLabel], rd: Vector[NerDateLabel],
ctx: Context[F, ProcessItemArgs] ctx: Context[F, Args],
store: Store[F]
): F[MetaProposalList] = { ): F[MetaProposalList] = {
val finder = Finder.searchExact(ctx).next(Finder.searchFuzzy(ctx)) val finder = Finder.searchExact(ctx, store).next(Finder.searchFuzzy(ctx, store))
List(finder.find(rm.nerlabels), makeDateProposal(cfg, rd)) List(finder.find(rm.nerlabels), makeDateProposal(cfg, rd))
.traverse(identity) .traverse(identity)
.map(MetaProposalList.flatten) .map(MetaProposalList.flatten)
@ -215,19 +217,24 @@ object FindProposal {
def unit[F[_]: Applicative](value: MetaProposalList): Finder[F] = def unit[F[_]: Applicative](value: MetaProposalList): Finder[F] =
_ => value.pure[F] _ => value.pure[F]
def searchExact[F[_]: Sync](ctx: Context[F, ProcessItemArgs]): Finder[F] = def searchExact[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): Finder[F] =
labels => labels =>
labels.toList.traverse(nl => search(nl, true, ctx)).map(MetaProposalList.flatten) labels.toList
.traverse(nl => search(nl, true, ctx, store))
.map(MetaProposalList.flatten)
def searchFuzzy[F[_]: Sync](ctx: Context[F, ProcessItemArgs]): Finder[F] = def searchFuzzy[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): Finder[F] =
labels => labels =>
labels.toList.traverse(nl => search(nl, false, ctx)).map(MetaProposalList.flatten) labels.toList
.traverse(nl => search(nl, false, ctx, store))
.map(MetaProposalList.flatten)
} }
private def search[F[_]: Sync]( private def search[F[_]: Sync](
nt: NerLabel, nt: NerLabel,
exact: Boolean, exact: Boolean,
ctx: Context[F, ProcessItemArgs] ctx: Context[F, ProcessItemArgs],
store: Store[F]
): F[MetaProposalList] = { ): F[MetaProposalList] = {
val value = val value =
if (exact) normalizeSearchValue(nt.label) if (exact) normalizeSearchValue(nt.label)
@ -243,7 +250,7 @@ object FindProposal {
nt.tag match { nt.tag match {
case NerTag.Organization => case NerTag.Organization =>
ctx.logger.debug(s"Looking for organizations: $value") *> ctx.logger.debug(s"Looking for organizations: $value") *>
ctx.store store
.transact( .transact(
ROrganization ROrganization
.findLike(ctx.args.meta.collective, value, OrgUse.notDisabled) .findLike(ctx.args.meta.collective, value, OrgUse.notDisabled)
@ -251,20 +258,20 @@ object FindProposal {
.map(MetaProposalList.from(MetaProposalType.CorrOrg, nt)) .map(MetaProposalList.from(MetaProposalType.CorrOrg, nt))
case NerTag.Person => case NerTag.Person =>
val s1 = ctx.store val s1 = store
.transact( .transact(
RPerson RPerson
.findLike(ctx.args.meta.collective, value, PersonUse.concerningAndBoth) .findLike(ctx.args.meta.collective, value, PersonUse.concerningAndBoth)
) )
.map(MetaProposalList.from(MetaProposalType.ConcPerson, nt)) .map(MetaProposalList.from(MetaProposalType.ConcPerson, nt))
val s2 = ctx.store val s2 = store
.transact( .transact(
RPerson RPerson
.findLike(ctx.args.meta.collective, value, PersonUse.correspondentAndBoth) .findLike(ctx.args.meta.collective, value, PersonUse.correspondentAndBoth)
) )
.map(MetaProposalList.from(MetaProposalType.CorrPerson, nt)) .map(MetaProposalList.from(MetaProposalType.CorrPerson, nt))
val s3 = val s3 =
ctx.store store
.transact( .transact(
ROrganization ROrganization
.findLike(ctx.args.meta.collective, value, OrgUse.notDisabled) .findLike(ctx.args.meta.collective, value, OrgUse.notDisabled)
@ -283,7 +290,7 @@ object FindProposal {
case NerTag.Misc => case NerTag.Misc =>
ctx.logger.debug(s"Looking for equipments: $value") *> ctx.logger.debug(s"Looking for equipments: $value") *>
ctx.store store
.transact( .transact(
REquipment REquipment
.findLike(ctx.args.meta.collective, value, EquipmentUse.notDisabled) .findLike(ctx.args.meta.collective, value, EquipmentUse.notDisabled)
@ -291,7 +298,7 @@ object FindProposal {
.map(MetaProposalList.from(MetaProposalType.ConcEquip, nt)) .map(MetaProposalList.from(MetaProposalType.ConcEquip, nt))
case NerTag.Email => case NerTag.Email =>
searchContact(nt, ContactKind.Email, value, ctx) searchContact(nt, ContactKind.Email, value, ctx, store)
case NerTag.Website => case NerTag.Website =>
if (!exact) { if (!exact) {
@ -301,9 +308,9 @@ object FindProposal {
.map(_.toPrimaryDomain.asString) .map(_.toPrimaryDomain.asString)
.map(s => s"%$s%") .map(s => s"%$s%")
.getOrElse(value) .getOrElse(value)
searchContact(nt, ContactKind.Website, searchString, ctx) searchContact(nt, ContactKind.Website, searchString, ctx, store)
} else } else
searchContact(nt, ContactKind.Website, value, ctx) searchContact(nt, ContactKind.Website, value, ctx, store)
case NerTag.Date => case NerTag.Date =>
// There is no database search required for this tag // There is no database search required for this tag
@ -315,18 +322,19 @@ object FindProposal {
nt: NerLabel, nt: NerLabel,
kind: ContactKind, kind: ContactKind,
value: String, value: String,
ctx: Context[F, ProcessItemArgs] ctx: Context[F, ProcessItemArgs],
store: Store[F]
): F[MetaProposalList] = { ): F[MetaProposalList] = {
val orgs = ctx.store val orgs = store
.transact(ROrganization.findLike(ctx.args.meta.collective, kind, value)) .transact(ROrganization.findLike(ctx.args.meta.collective, kind, value))
.map(MetaProposalList.from(MetaProposalType.CorrOrg, nt)) .map(MetaProposalList.from(MetaProposalType.CorrOrg, nt))
val corrP = ctx.store val corrP = store
.transact( .transact(
RPerson RPerson
.findLike(ctx.args.meta.collective, kind, value, PersonUse.correspondentAndBoth) .findLike(ctx.args.meta.collective, kind, value, PersonUse.correspondentAndBoth)
) )
.map(MetaProposalList.from(MetaProposalType.CorrPerson, nt)) .map(MetaProposalList.from(MetaProposalType.CorrPerson, nt))
val concP = ctx.store val concP = store
.transact( .transact(
RPerson RPerson
.findLike(ctx.args.meta.collective, kind, value, PersonUse.concerningAndBoth) .findLike(ctx.args.meta.collective, kind, value, PersonUse.concerningAndBoth)

View File

@ -10,7 +10,6 @@ import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
import docspell.analysis.TextAnalyser import docspell.analysis.TextAnalyser
import docspell.backend.ops.OItem import docspell.backend.ops.OItem
import docspell.common.{ItemState, ProcessItemArgs} import docspell.common.{ItemState, ProcessItemArgs}
@ -18,49 +17,51 @@ import docspell.ftsclient.FtsClient
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.analysis.RegexNerFile import docspell.joex.analysis.RegexNerFile
import docspell.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.queries.QItem import docspell.store.queries.QItem
import docspell.store.records.RItem import docspell.store.records.RItem
object ItemHandler { object ItemHandler {
type Args = ProcessItemArgs type Args = ProcessItemArgs
def onCancel[F[_]: Sync]: Task[F, Args, Unit] = def onCancel[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] =
logWarn[F]("Now cancelling.").flatMap(_ => logWarn[F]("Now cancelling.").flatMap(_ =>
markItemCreated.flatMap { markItemCreated(store).flatMap {
case true => case true =>
Task.pure(()) Task.pure(())
case false => case false =>
deleteByFileIds[F].flatMap(_ => deleteFiles) deleteByFileIds[F](store).flatMap(_ => deleteFiles(store))
} }
) )
def newItem[F[_]: Async]( def newItem[F[_]: Async](
cfg: Config, cfg: Config,
store: Store[F],
itemOps: OItem[F], itemOps: OItem[F],
fts: FtsClient[F], fts: FtsClient[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F]
): Task[F, Args, Option[ItemData]] = ): Task[F, Args, Option[ItemData]] =
logBeginning[F].flatMap(_ => logBeginning[F].flatMap(_ =>
DuplicateCheck[F] DuplicateCheck[F](store)
.flatMap(args => .flatMap(args =>
if (args.files.isEmpty) logNoFiles[F].map(_ => None) if (args.files.isEmpty) logNoFiles[F].map(_ => None)
else { else {
val create: Task[F, Args, ItemData] = val create: Task[F, Args, ItemData] =
CreateItem[F].contramap(_ => args.pure[F]) CreateItem[F](store).contramap(_ => args.pure[F])
create create
.flatMap(itemStateTask(ItemState.Processing)) .flatMap(itemStateTask(store, ItemState.Processing))
.flatMap(safeProcess[F](cfg, itemOps, fts, analyser, regexNer)) .flatMap(safeProcess[F](cfg, store, itemOps, fts, analyser, regexNer))
.map(_.some) .map(_.some)
} }
) )
) )
def itemStateTask[F[_]: Sync, A]( def itemStateTask[F[_]: Sync, A](store: Store[F], state: ItemState)(
state: ItemState data: ItemData
)(data: ItemData): Task[F, A, ItemData] = ): Task[F, A, ItemData] =
Task(ctx => Task(_ =>
ctx.store store
.transact(RItem.updateState(data.item.id, state, ItemState.invalidStates)) .transact(RItem.updateState(data.item.id, state, ItemState.invalidStates))
.map(_ => data) .map(_ => data)
) )
@ -70,6 +71,7 @@ object ItemHandler {
def safeProcess[F[_]: Async]( def safeProcess[F[_]: Async](
cfg: Config, cfg: Config,
store: Store[F],
itemOps: OItem[F], itemOps: OItem[F],
fts: FtsClient[F], fts: FtsClient[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
@ -77,30 +79,31 @@ object ItemHandler {
)(data: ItemData): Task[F, Args, ItemData] = )(data: ItemData): Task[F, Args, ItemData] =
isLastRetry[F].flatMap { isLastRetry[F].flatMap {
case true => case true =>
ProcessItem[F](cfg, itemOps, fts, analyser, regexNer)(data).attempt.flatMap { ProcessItem[F](cfg, itemOps, fts, analyser, regexNer, store)(data).attempt
case Right(d) => .flatMap {
Task.pure(d) case Right(d) =>
case Left(ex) => Task.pure(d)
logWarn[F]( case Left(ex) =>
"Processing failed on last retry. Creating item but without proposals." logWarn[F](
).flatMap(_ => itemStateTask(ItemState.Created)(data)) "Processing failed on last retry. Creating item but without proposals."
.andThen(_ => Sync[F].raiseError(ex)) ).flatMap(_ => itemStateTask(store, ItemState.Created)(data))
} .andThen(_ => Sync[F].raiseError(ex))
}
case false => case false =>
ProcessItem[F](cfg, itemOps, fts, analyser, regexNer)(data) ProcessItem[F](cfg, itemOps, fts, analyser, regexNer, store)(data)
.flatMap(itemStateTask(ItemState.Created)) .flatMap(itemStateTask(store, ItemState.Created))
} }
private def markItemCreated[F[_]: Sync]: Task[F, Args, Boolean] = private def markItemCreated[F[_]: Sync](store: Store[F]): Task[F, Args, Boolean] =
Task { ctx => Task { ctx =>
val fileMetaIds = ctx.args.files.map(_.fileMetaId).toSet val fileMetaIds = ctx.args.files.map(_.fileMetaId).toSet
(for { (for {
item <- OptionT(ctx.store.transact(QItem.findOneByFileIds(fileMetaIds.toSeq))) item <- OptionT(store.transact(QItem.findOneByFileIds(fileMetaIds.toSeq)))
_ <- OptionT.liftF( _ <- OptionT.liftF(
ctx.logger.info("Processing cancelled. Marking item as created anyways.") ctx.logger.info("Processing cancelled. Marking item as created anyways.")
) )
_ <- OptionT.liftF( _ <- OptionT.liftF(
ctx.store store
.transact( .transact(
RItem.updateState(item.id, ItemState.Created, ItemState.invalidStates) RItem.updateState(item.id, ItemState.Created, ItemState.invalidStates)
) )
@ -111,11 +114,11 @@ object ItemHandler {
) )
} }
private def deleteByFileIds[F[_]: Sync]: Task[F, Args, Unit] = private def deleteByFileIds[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
val states = ItemState.invalidStates val states = ItemState.invalidStates
for { for {
items <- ctx.store.transact( items <- store.transact(
QItem.findByFileIds(ctx.args.files.map(_.fileMetaId), states) QItem.findByFileIds(ctx.args.files.map(_.fileMetaId), states)
) )
_ <- _ <-
@ -124,16 +127,16 @@ object ItemHandler {
ctx.logger.info( ctx.logger.info(
s"No items found for file ids ${ctx.args.files.map(_.fileMetaId)}" s"No items found for file ids ${ctx.args.files.map(_.fileMetaId)}"
) )
_ <- items.traverse(i => QItem.delete(ctx.store)(i.id, ctx.args.meta.collective)) _ <- items.traverse(i => QItem.delete(store)(i.id, ctx.args.meta.collective))
} yield () } yield ()
} }
private def deleteFiles[F[_]: Sync]: Task[F, Args, Unit] = private def deleteFiles[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] =
Task(ctx => Task(ctx =>
ctx.logger.info("Deleting input files …") *> ctx.logger.info("Deleting input files …") *>
Stream Stream
.emits(ctx.args.files.map(_.fileMetaId)) .emits(ctx.args.files.map(_.fileMetaId))
.evalMap(id => ctx.store.fileRepo.delete(id).attempt) .evalMap(id => store.fileRepo.delete(id).attempt)
.compile .compile
.drain .drain
) )

View File

@ -9,22 +9,26 @@ package docspell.joex.process
import cats.data.NonEmptyList import cats.data.NonEmptyList
import cats.effect.Sync import cats.effect.Sync
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.records.RItem import docspell.store.records.RItem
object LinkProposal { object LinkProposal {
def onlyNew[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = def onlyNew[F[_]: Sync](
store: Store[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
if (data.item.state.isValid) if (data.item.state.isValid)
Task Task
.log[F, ProcessItemArgs](_.debug(s"Not linking proposals on existing item")) .log[F, ProcessItemArgs](_.debug(s"Not linking proposals on existing item"))
.map(_ => data) .map(_ => data)
else else
LinkProposal[F](data) LinkProposal[F](store)(data)
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = def apply[F[_]: Sync](
store: Store[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
if (data.item.state == ItemState.Confirmed) if (data.item.state == ItemState.Confirmed)
Task Task
.log[F, ProcessItemArgs](_.debug(s"Not linking proposals on confirmed item")) .log[F, ProcessItemArgs](_.debug(s"Not linking proposals on confirmed item"))
@ -35,7 +39,7 @@ object LinkProposal {
ctx.logger.info(s"Starting linking proposals") *> ctx.logger.info(s"Starting linking proposals") *>
MetaProposalType.all MetaProposalType.all
.traverse(applyValue(data, proposals, ctx)) .traverse(applyValue(data, proposals, ctx, store))
.map(result => ctx.logger.info(s"Results from proposal processing: $result")) .map(result => ctx.logger.info(s"Results from proposal processing: $result"))
.map(_ => data) .map(_ => data)
} }
@ -43,7 +47,8 @@ object LinkProposal {
def applyValue[F[_]: Sync]( def applyValue[F[_]: Sync](
data: ItemData, data: ItemData,
proposalList: MetaProposalList, proposalList: MetaProposalList,
ctx: Context[F, ProcessItemArgs] ctx: Context[F, ProcessItemArgs],
store: Store[F]
)(mpt: MetaProposalType): F[Result] = )(mpt: MetaProposalType): F[Result] =
data.givenMeta.find(mpt).orElse(proposalList.find(mpt)) match { data.givenMeta.find(mpt).orElse(proposalList.find(mpt)) match {
case None => case None =>
@ -51,29 +56,30 @@ object LinkProposal {
Result.noneFound(mpt).pure[F] Result.noneFound(mpt).pure[F]
case Some(a) if a.isSingleValue => case Some(a) if a.isSingleValue =>
ctx.logger.info(s"Found one candidate for ${a.proposalType}") *> ctx.logger.info(s"Found one candidate for ${a.proposalType}") *>
setItemMeta(data.item.id, ctx, a.proposalType, a.values.head.ref.id).map(_ => setItemMeta(data.item.id, ctx, store, a.proposalType, a.values.head.ref.id).map(
Result.single(mpt) _ => Result.single(mpt)
) )
case Some(a) => case Some(a) =>
val ids = a.values.map(_.ref.id.id) val ids = a.values.map(_.ref.id.id)
ctx.logger.info( ctx.logger.info(
s"Found many (${a.size}, $ids) candidates for ${a.proposalType}. Setting first." s"Found many (${a.size}, $ids) candidates for ${a.proposalType}. Setting first."
) *> ) *>
setItemMeta(data.item.id, ctx, a.proposalType, a.values.head.ref.id).map(_ => setItemMeta(data.item.id, ctx, store, a.proposalType, a.values.head.ref.id).map(
Result.multiple(mpt) _ => Result.multiple(mpt)
) )
} }
def setItemMeta[F[_]: Sync]( def setItemMeta[F[_]: Sync](
itemId: Ident, itemId: Ident,
ctx: Context[F, ProcessItemArgs], ctx: Context[F, ProcessItemArgs],
store: Store[F],
mpt: MetaProposalType, mpt: MetaProposalType,
value: Ident value: Ident
): F[Int] = ): F[Int] =
mpt match { mpt match {
case MetaProposalType.CorrOrg => case MetaProposalType.CorrOrg =>
ctx.logger.debug(s"Updating item organization with: ${value.id}") *> ctx.logger.debug(s"Updating item organization with: ${value.id}") *>
ctx.store.transact( store.transact(
RItem.updateCorrOrg( RItem.updateCorrOrg(
NonEmptyList.of(itemId), NonEmptyList.of(itemId),
ctx.args.meta.collective, ctx.args.meta.collective,
@ -82,7 +88,7 @@ object LinkProposal {
) )
case MetaProposalType.ConcPerson => case MetaProposalType.ConcPerson =>
ctx.logger.debug(s"Updating item concerning person with: $value") *> ctx.logger.debug(s"Updating item concerning person with: $value") *>
ctx.store.transact( store.transact(
RItem.updateConcPerson( RItem.updateConcPerson(
NonEmptyList.of(itemId), NonEmptyList.of(itemId),
ctx.args.meta.collective, ctx.args.meta.collective,
@ -91,7 +97,7 @@ object LinkProposal {
) )
case MetaProposalType.CorrPerson => case MetaProposalType.CorrPerson =>
ctx.logger.debug(s"Updating item correspondent person with: $value") *> ctx.logger.debug(s"Updating item correspondent person with: $value") *>
ctx.store.transact( store.transact(
RItem.updateCorrPerson( RItem.updateCorrPerson(
NonEmptyList.of(itemId), NonEmptyList.of(itemId),
ctx.args.meta.collective, ctx.args.meta.collective,
@ -100,7 +106,7 @@ object LinkProposal {
) )
case MetaProposalType.ConcEquip => case MetaProposalType.ConcEquip =>
ctx.logger.debug(s"Updating item concerning equipment with: $value") *> ctx.logger.debug(s"Updating item concerning equipment with: $value") *>
ctx.store.transact( store.transact(
RItem.updateConcEquip( RItem.updateConcEquip(
NonEmptyList.of(itemId), NonEmptyList.of(itemId),
ctx.args.meta.collective, ctx.args.meta.collective,
@ -112,7 +118,7 @@ object LinkProposal {
case Some(ld) => case Some(ld) =>
val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC)) val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC))
ctx.logger.debug(s"Updating item date ${value.id}") *> ctx.logger.debug(s"Updating item date ${value.id}") *>
ctx.store.transact( store.transact(
RItem.updateDate( RItem.updateDate(
NonEmptyList.of(itemId), NonEmptyList.of(itemId),
ctx.args.meta.collective, ctx.args.meta.collective,
@ -128,7 +134,7 @@ object LinkProposal {
case Some(ld) => case Some(ld) =>
val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC)) val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC))
ctx.logger.debug(s"Updating item due-date suggestion ${value.id}") *> ctx.logger.debug(s"Updating item due-date suggestion ${value.id}") *>
ctx.store.transact( store.transact(
RItem.updateDueDate( RItem.updateDueDate(
NonEmptyList.of(itemId), NonEmptyList.of(itemId),
ctx.args.meta.collective, ctx.args.meta.collective,

View File

@ -8,7 +8,6 @@ package docspell.joex.process
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.analysis.TextAnalyser import docspell.analysis.TextAnalyser
import docspell.backend.ops.OItem import docspell.backend.ops.OItem
import docspell.common.ProcessItemArgs import docspell.common.ProcessItemArgs
@ -16,6 +15,7 @@ import docspell.ftsclient.FtsClient
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.analysis.RegexNerFile import docspell.joex.analysis.RegexNerFile
import docspell.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
object ProcessItem { object ProcessItem {
@ -24,12 +24,13 @@ object ProcessItem {
itemOps: OItem[F], itemOps: OItem[F],
fts: FtsClient[F], fts: FtsClient[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F],
store: Store[F]
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] = )(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
ExtractArchive(item) ExtractArchive(store)(item)
.flatMap(Task.setProgress(20)) .flatMap(Task.setProgress(20))
.flatMap(processAttachments0(cfg, fts, analyser, regexNer, (40, 60, 80))) .flatMap(processAttachments0(cfg, fts, analyser, regexNer, store, (40, 60, 80)))
.flatMap(LinkProposal.onlyNew[F]) .flatMap(LinkProposal.onlyNew[F](store))
.flatMap(SetGivenData.onlyNew[F](itemOps)) .flatMap(SetGivenData.onlyNew[F](itemOps))
.flatMap(Task.setProgress(99)) .flatMap(Task.setProgress(99))
.flatMap(RemoveEmptyItem(itemOps)) .flatMap(RemoveEmptyItem(itemOps))
@ -38,34 +39,37 @@ object ProcessItem {
cfg: Config, cfg: Config,
fts: FtsClient[F], fts: FtsClient[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F],
store: Store[F]
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] = )(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
processAttachments0[F](cfg, fts, analyser, regexNer, (30, 60, 90))(item) processAttachments0[F](cfg, fts, analyser, regexNer, store, (30, 60, 90))(item)
def analysisOnly[F[_]: Async]( def analysisOnly[F[_]: Async](
cfg: Config, cfg: Config,
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F],
store: Store[F]
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] = )(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
TextAnalysis[F](cfg.textAnalysis, analyser, regexNer)(item) TextAnalysis[F](cfg.textAnalysis, analyser, regexNer, store)(item)
.flatMap(FindProposal[F](cfg.textAnalysis)) .flatMap(FindProposal[F](cfg.textAnalysis, store))
.flatMap(EvalProposals[F]) .flatMap(EvalProposals[F](store))
.flatMap(CrossCheckProposals[F]) .flatMap(CrossCheckProposals[F](store))
.flatMap(SaveProposals[F]) .flatMap(SaveProposals[F](store))
private def processAttachments0[F[_]: Async]( private def processAttachments0[F[_]: Async](
cfg: Config, cfg: Config,
fts: FtsClient[F], fts: FtsClient[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F], regexNer: RegexNerFile[F],
store: Store[F],
progress: (Int, Int, Int) progress: (Int, Int, Int)
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] = )(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
ConvertPdf(cfg.convert, item) ConvertPdf(cfg.convert, store, item)
.flatMap(Task.setProgress(progress._1)) .flatMap(Task.setProgress(progress._1))
.flatMap(TextExtraction(cfg.extraction, fts)) .flatMap(TextExtraction(cfg.extraction, fts, store))
.flatMap(AttachmentPreview(cfg.extraction.preview)) .flatMap(AttachmentPreview(cfg.extraction.preview, store))
.flatMap(AttachmentPageCount()) .flatMap(AttachmentPageCount(store))
.flatMap(Task.setProgress(progress._2)) .flatMap(Task.setProgress(progress._2))
.flatMap(analysisOnly[F](cfg, analyser, regexNer)) .flatMap(analysisOnly[F](cfg, analyser, regexNer, store))
.flatMap(Task.setProgress(progress._3)) .flatMap(Task.setProgress(progress._3))
} }

View File

@ -9,7 +9,6 @@ package docspell.joex.process
import cats.data.OptionT import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.analysis.TextAnalyser import docspell.analysis.TextAnalyser
import docspell.backend.ops.OItem import docspell.backend.ops.OItem
import docspell.common._ import docspell.common._
@ -18,6 +17,7 @@ import docspell.joex.Config
import docspell.joex.analysis.RegexNerFile import docspell.joex.analysis.RegexNerFile
import docspell.scheduler.Context import docspell.scheduler.Context
import docspell.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.queries.QItem import docspell.store.queries.QItem
import docspell.store.records.RAttachment import docspell.store.records.RAttachment
import docspell.store.records.RAttachmentSource import docspell.store.records.RAttachmentSource
@ -32,13 +32,14 @@ object ReProcessItem {
fts: FtsClient[F], fts: FtsClient[F],
itemOps: OItem[F], itemOps: OItem[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F],
store: Store[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
Task Task
.log[F, Args](_.info("===== Start reprocessing ======")) .log[F, Args](_.info("===== Start reprocessing ======"))
.flatMap(_ => .flatMap(_ =>
loadItem[F] loadItem[F](store)
.flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer)) .flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer, store))
.map(_ => ()) .map(_ => ())
) )
@ -53,13 +54,13 @@ object ReProcessItem {
else ra => selection.contains(ra.id) else ra => selection.contains(ra.id)
} }
def loadItem[F[_]: Sync]: Task[F, Args, ItemData] = def loadItem[F[_]: Sync](store: Store[F]): Task[F, Args, ItemData] =
Task { ctx => Task { ctx =>
(for { (for {
item <- OptionT(ctx.store.transact(RItem.findById(ctx.args.itemId))) item <- OptionT(store.transact(RItem.findById(ctx.args.itemId)))
attach <- OptionT.liftF(ctx.store.transact(RAttachment.findByItem(item.id))) attach <- OptionT.liftF(store.transact(RAttachment.findByItem(item.id)))
asrc <- asrc <-
OptionT.liftF(ctx.store.transact(RAttachmentSource.findByItem(ctx.args.itemId))) OptionT.liftF(store.transact(RAttachmentSource.findByItem(ctx.args.itemId)))
asrcMap = asrc.map(s => s.id -> s).toMap asrcMap = asrc.map(s => s.id -> s).toMap
// copy the original files over to attachments to run the default processing task // copy the original files over to attachments to run the default processing task
// the processing doesn't touch the original files, only RAttachments // the processing doesn't touch the original files, only RAttachments
@ -97,6 +98,7 @@ object ReProcessItem {
itemOps: OItem[F], itemOps: OItem[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F], regexNer: RegexNerFile[F],
store: Store[F],
data: ItemData data: ItemData
): Task[F, Args, ItemData] = { ): Task[F, Args, ItemData] = {
@ -121,21 +123,21 @@ object ReProcessItem {
Nil Nil
).pure[F] ).pure[F]
getLanguage[F].flatMap { lang => getLanguage[F](store).flatMap { lang =>
ProcessItem ProcessItem
.processAttachments[F](cfg, fts, analyser, regexNer)(data) .processAttachments[F](cfg, fts, analyser, regexNer, store)(data)
.flatMap(LinkProposal[F]) .flatMap(LinkProposal[F](store))
.flatMap(SetGivenData[F](itemOps)) .flatMap(SetGivenData[F](itemOps))
.contramap[Args](convertArgs(lang)) .contramap[Args](convertArgs(lang))
} }
} }
def getLanguage[F[_]: Sync]: Task[F, Args, Language] = def getLanguage[F[_]: Sync](store: Store[F]): Task[F, Args, Language] =
Task { ctx => Task { ctx =>
val lang1 = OptionT( val lang1 = OptionT(
ctx.store.transact(QItem.getItemLanguage(ctx.args.itemId)).map(_.headOption) store.transact(QItem.getItemLanguage(ctx.args.itemId)).map(_.headOption)
) )
val lang2 = OptionT(ctx.store.transact(RCollective.findByItem(ctx.args.itemId))) val lang2 = OptionT(store.transact(RCollective.findByItem(ctx.args.itemId)))
.map(_.language) .map(_.language)
lang1.orElse(lang2).getOrElse(Language.German) lang1.orElse(lang2).getOrElse(Language.German)
@ -149,11 +151,12 @@ object ReProcessItem {
fts: FtsClient[F], fts: FtsClient[F],
itemOps: OItem[F], itemOps: OItem[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F],
store: Store[F]
)(data: ItemData): Task[F, Args, ItemData] = )(data: ItemData): Task[F, Args, ItemData] =
isLastRetry[F].flatMap { isLastRetry[F].flatMap {
case true => case true =>
processFiles[F](cfg, fts, itemOps, analyser, regexNer, data).attempt processFiles[F](cfg, fts, itemOps, analyser, regexNer, store, data).attempt
.flatMap { .flatMap {
case Right(d) => case Right(d) =>
Task.pure(d) Task.pure(d)
@ -163,7 +166,7 @@ object ReProcessItem {
).andThen(_ => Sync[F].raiseError(ex)) ).andThen(_ => Sync[F].raiseError(ex))
} }
case false => case false =>
processFiles[F](cfg, fts, itemOps, analyser, regexNer, data) processFiles[F](cfg, fts, itemOps, analyser, regexNer, store, data)
} }
private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] = private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] =

View File

@ -8,17 +8,16 @@ package docspell.joex.process
import cats.effect.Sync import cats.effect.Sync
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.AddResult import docspell.store.{AddResult, Store}
import docspell.store.records._ import docspell.store.records._
/** Saves the proposals in the database */ /** Saves the proposals in the database */
object SaveProposals { object SaveProposals {
type Args = ProcessItemArgs type Args = ProcessItemArgs
def apply[F[_]: Sync](data: ItemData): Task[F, Args, ItemData] = def apply[F[_]: Sync](store: Store[F])(data: ItemData): Task[F, Args, ItemData] =
Task { ctx => Task { ctx =>
for { for {
_ <- ctx.logger.info("Storing proposals") _ <- ctx.logger.info("Storing proposals")
@ -26,20 +25,24 @@ object SaveProposals {
.traverse(rm => .traverse(rm =>
ctx.logger.debug( ctx.logger.debug(
s"Storing attachment proposals: ${rm.proposals}" s"Storing attachment proposals: ${rm.proposals}"
) *> ctx.store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals)) ) *> store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals))
) )
_ <- _ <-
if (data.classifyProposals.isEmpty && data.classifyTags.isEmpty) 0.pure[F] if (data.classifyProposals.isEmpty && data.classifyTags.isEmpty) 0.pure[F]
else saveItemProposal(ctx, data) else saveItemProposal(ctx, store, data)
} yield data } yield data
} }
def saveItemProposal[F[_]: Sync](ctx: Context[F, Args], data: ItemData): F[Unit] = { def saveItemProposal[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
data: ItemData
): F[Unit] = {
def upsert(v: RItemProposal): F[Int] = def upsert(v: RItemProposal): F[Int] =
ctx.store.add(RItemProposal.insert(v), RItemProposal.exists(v.itemId)).flatMap { store.add(RItemProposal.insert(v), RItemProposal.exists(v.itemId)).flatMap {
case AddResult.Success => 1.pure[F] case AddResult.Success => 1.pure[F]
case AddResult.EntityExists(_) => case AddResult.EntityExists(_) =>
ctx.store.transact(RItemProposal.update(v)) store.transact(RItemProposal.update(v))
case AddResult.Failure(ex) => case AddResult.Failure(ex) =>
ctx.logger.warn(s"Could not store item proposals: ${ex.getMessage}") *> 0 ctx.logger.warn(s"Could not store item proposals: ${ex.getMessage}") *> 0
.pure[F] .pure[F]
@ -47,7 +50,7 @@ object SaveProposals {
for { for {
_ <- ctx.logger.debug(s"Storing classifier proposals: ${data.classifyProposals}") _ <- ctx.logger.debug(s"Storing classifier proposals: ${data.classifyProposals}")
tags <- ctx.store.transact( tags <- store.transact(
RTag.findAllByNameOrId(data.classifyTags, ctx.args.meta.collective) RTag.findAllByNameOrId(data.classifyTags, ctx.args.meta.collective)
) )
tagRefs = tags.map(t => IdRef(t.tagId, t.name)) tagRefs = tags.map(t => IdRef(t.tagId, t.name))

View File

@ -9,7 +9,6 @@ package docspell.joex.process
import cats.Traverse import cats.Traverse
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.analysis.classifier.TextClassifier import docspell.analysis.classifier.TextClassifier
import docspell.analysis.{NlpSettings, TextAnalyser} import docspell.analysis.{NlpSettings, TextAnalyser}
import docspell.common.MetaProposal.Candidate import docspell.common.MetaProposal.Candidate
@ -20,6 +19,7 @@ import docspell.joex.learn.{ClassifierName, Classify, LearnClassifierTask}
import docspell.joex.process.ItemData.AttachmentDates import docspell.joex.process.ItemData.AttachmentDates
import docspell.scheduler.Context import docspell.scheduler.Context
import docspell.scheduler.Task import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records.{RAttachmentMeta, RClassifierSetting} import docspell.store.records.{RAttachmentMeta, RClassifierSetting}
object TextAnalysis { object TextAnalysis {
@ -28,7 +28,8 @@ object TextAnalysis {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
nerFile: RegexNerFile[F] nerFile: RegexNerFile[F],
store: Store[F]
)(item: ItemData): Task[F, Args, ItemData] = )(item: ItemData): Task[F, Args, ItemData] =
Task { ctx => Task { ctx =>
for { for {
@ -41,18 +42,19 @@ object TextAnalysis {
) )
_ <- ctx.logger.debug(s"Storing tags: ${t.map(_._1.copy(content = None))}") _ <- ctx.logger.debug(s"Storing tags: ${t.map(_._1.copy(content = None))}")
_ <- t.traverse(m => _ <- t.traverse(m =>
ctx.store.transact(RAttachmentMeta.updateLabels(m._1.id, m._1.nerlabels)) store.transact(RAttachmentMeta.updateLabels(m._1.id, m._1.nerlabels))
) )
v = t.toVector v = t.toVector
autoTagEnabled <- getActiveAutoTag(ctx, cfg) autoTagEnabled <- getActiveAutoTag(ctx, store, cfg)
tag <- tag <-
if (autoTagEnabled) predictTags(ctx, cfg, item.metas, analyser.classifier) if (autoTagEnabled)
predictTags(ctx, store, cfg, item.metas, analyser.classifier)
else List.empty[String].pure[F] else List.empty[String].pure[F]
classProposals <- classProposals <-
if (cfg.classification.enabled) if (cfg.classification.enabled)
predictItemEntities(ctx, cfg, item.metas, analyser.classifier) predictItemEntities(ctx, store, cfg, item.metas, analyser.classifier)
else MetaProposalList.empty.pure[F] else MetaProposalList.empty.pure[F]
e <- s e <- s
@ -86,16 +88,17 @@ object TextAnalysis {
def predictTags[F[_]: Async]( def predictTags[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
metas: Vector[RAttachmentMeta], metas: Vector[RAttachmentMeta],
classifier: TextClassifier[F] classifier: TextClassifier[F]
): F[List[String]] = { ): F[List[String]] = {
val text = metas.flatMap(_.content).mkString(LearnClassifierTask.pageSep) val text = metas.flatMap(_.content).mkString(LearnClassifierTask.pageSep)
val classifyWith: ClassifierName => F[Option[String]] = val classifyWith: ClassifierName => F[Option[String]] =
makeClassify(ctx, cfg, classifier)(text) makeClassify(ctx, store, cfg, classifier)(text)
for { for {
names <- ctx.store.transact( names <- store.transact(
ClassifierName.findTagClassifiers(ctx.args.meta.collective) ClassifierName.findTagClassifiers(ctx.args.meta.collective)
) )
_ <- ctx.logger.debug(s"Guessing tags for ${names.size} categories") _ <- ctx.logger.debug(s"Guessing tags for ${names.size} categories")
@ -105,6 +108,7 @@ object TextAnalysis {
def predictItemEntities[F[_]: Async]( def predictItemEntities[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
metas: Vector[RAttachmentMeta], metas: Vector[RAttachmentMeta],
classifier: TextClassifier[F] classifier: TextClassifier[F]
@ -116,7 +120,7 @@ object TextAnalysis {
mtype: MetaProposalType mtype: MetaProposalType
): F[Option[MetaProposal]] = ): F[Option[MetaProposal]] =
for { for {
label <- makeClassify(ctx, cfg, classifier)(text).apply(cname) label <- makeClassify(ctx, store, cfg, classifier)(text).apply(cname)
} yield label.map(str => } yield label.map(str =>
MetaProposal(mtype, Candidate(IdRef(Ident.unsafe(""), str), Set.empty)) MetaProposal(mtype, Candidate(IdRef(Ident.unsafe(""), str), Set.empty))
) )
@ -136,13 +140,14 @@ object TextAnalysis {
private def makeClassify[F[_]: Async]( private def makeClassify[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
classifier: TextClassifier[F] classifier: TextClassifier[F]
)(text: String): ClassifierName => F[Option[String]] = )(text: String): ClassifierName => F[Option[String]] =
Classify[F]( Classify[F](
ctx.logger, ctx.logger,
cfg.workingDir, cfg.workingDir,
ctx.store, store,
classifier, classifier,
ctx.args.meta.collective, ctx.args.meta.collective,
text text
@ -150,10 +155,11 @@ object TextAnalysis {
private def getActiveAutoTag[F[_]: Sync]( private def getActiveAutoTag[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis cfg: Config.TextAnalysis
): F[Boolean] = ): F[Boolean] =
if (cfg.classification.enabled) if (cfg.classification.enabled)
ctx.store store
.transact(RClassifierSetting.findById(ctx.args.meta.collective)) .transact(RClassifierSetting.findById(ctx.args.meta.collective))
.map(_.exists(_.autoTagEnabled)) .map(_.exists(_.autoTagEnabled))
.flatTap(enabled => .flatTap(enabled =>

View File

@ -9,16 +9,16 @@ package docspell.joex.process
import cats.data.OptionT import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.extract.{ExtractConfig, ExtractResult, Extraction} import docspell.extract.{ExtractConfig, ExtractResult, Extraction}
import docspell.ftsclient.{FtsClient, TextData} import docspell.ftsclient.{FtsClient, TextData}
import docspell.scheduler.{Context, Task} import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta} import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta}
object TextExtraction { object TextExtraction {
def apply[F[_]: Async](cfg: ExtractConfig, fts: FtsClient[F])( def apply[F[_]: Async](cfg: ExtractConfig, fts: FtsClient[F], store: Store[F])(
item: ItemData item: ItemData
): Task[F, ProcessItemArgs, ItemData] = ): Task[F, ProcessItemArgs, ItemData] =
Task { ctx => Task { ctx =>
@ -30,6 +30,7 @@ object TextExtraction {
txt <- item.attachments.traverse( txt <- item.attachments.traverse(
extractTextIfEmpty( extractTextIfEmpty(
ctx, ctx,
store,
cfg, cfg,
ctx.args.meta.language, ctx.args.meta.language,
ctx.args.meta.collective, ctx.args.meta.collective,
@ -38,7 +39,7 @@ object TextExtraction {
) )
_ <- ctx.logger.debug("Storing extracted texts …") _ <- ctx.logger.debug("Storing extracted texts …")
_ <- _ <-
txt.toList.traverse(res => ctx.store.transact(RAttachmentMeta.upsert(res.am))) txt.toList.traverse(res => store.transact(RAttachmentMeta.upsert(res.am)))
_ <- ctx.logger.debug(s"Extracted text stored.") _ <- ctx.logger.debug(s"Extracted text stored.")
idxItem = TextData.item( idxItem = TextData.item(
item.item.id, item.item.id,
@ -65,6 +66,7 @@ object TextExtraction {
def extractTextIfEmpty[F[_]: Async]( def extractTextIfEmpty[F[_]: Async](
ctx: Context[F, ProcessItemArgs], ctx: Context[F, ProcessItemArgs],
store: Store[F],
cfg: ExtractConfig, cfg: ExtractConfig,
lang: Language, lang: Language,
collective: Ident, collective: Ident,
@ -91,13 +93,14 @@ object TextExtraction {
ctx.logger.info("TextExtraction skipped, since text is already available.") *> ctx.logger.info("TextExtraction skipped, since text is already available.") *>
makeTextData((rm, Nil)).pure[F] makeTextData((rm, Nil)).pure[F]
case _ => case _ =>
extractTextToMeta[F](ctx, cfg, lang, item)(ra) extractTextToMeta[F](ctx, store, cfg, lang, item)(ra)
.map(makeTextData) .map(makeTextData)
} }
} }
def extractTextToMeta[F[_]: Async]( def extractTextToMeta[F[_]: Async](
ctx: Context[F, _], ctx: Context[F, _],
store: Store[F],
cfg: ExtractConfig, cfg: ExtractConfig,
lang: Language, lang: Language,
item: ItemData item: ItemData
@ -105,8 +108,8 @@ object TextExtraction {
for { for {
_ <- ctx.logger.debug(s"Extracting text for attachment ${stripAttachmentName(ra)}") _ <- ctx.logger.debug(s"Extracting text for attachment ${stripAttachmentName(ra)}")
dst <- Duration.stopTime[F] dst <- Duration.stopTime[F]
fids <- filesToExtract(ctx)(item, ra) fids <- filesToExtract(store)(item, ra)
res <- extractTextFallback(ctx, cfg, ra, lang)(fids) res <- extractTextFallback(ctx, store, cfg, ra, lang)(fids)
meta = item.changeMeta( meta = item.changeMeta(
ra.id, ra.id,
lang, lang,
@ -123,14 +126,14 @@ object TextExtraction {
} yield (meta, tags) } yield (meta, tags)
def extractText[F[_]: Sync]( def extractText[F[_]: Sync](
ctx: Context[F, _], store: Store[F],
extr: Extraction[F], extr: Extraction[F],
lang: Language lang: Language
)(fileId: FileKey): F[ExtractResult] = { )(fileId: FileKey): F[ExtractResult] = {
val data = ctx.store.fileRepo.getBytes(fileId) val data = store.fileRepo.getBytes(fileId)
def findMime: F[MimeType] = def findMime: F[MimeType] =
OptionT(ctx.store.fileRepo.findMeta(fileId)) OptionT(store.fileRepo.findMeta(fileId))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(MimeType.octetStream) .getOrElse(MimeType.octetStream)
@ -140,6 +143,7 @@ object TextExtraction {
private def extractTextFallback[F[_]: Async]( private def extractTextFallback[F[_]: Async](
ctx: Context[F, _], ctx: Context[F, _],
store: Store[F],
cfg: ExtractConfig, cfg: ExtractConfig,
ra: RAttachment, ra: RAttachment,
lang: Language lang: Language
@ -151,7 +155,7 @@ object TextExtraction {
case id :: rest => case id :: rest =>
val extr = Extraction.create[F](ctx.logger, cfg) val extr = Extraction.create[F](ctx.logger, cfg)
extractText[F](ctx, extr, lang)(id) extractText[F](store, extr, lang)(id)
.flatMap { .flatMap {
case res @ ExtractResult.Success(_, _) => case res @ ExtractResult.Success(_, _) =>
res.some.pure[F] res.some.pure[F]
@ -161,12 +165,12 @@ object TextExtraction {
.warn( .warn(
s"Cannot extract text from file ${stripAttachmentName(ra)}: unsupported format ${mt.asString}. Try with converted file." s"Cannot extract text from file ${stripAttachmentName(ra)}: unsupported format ${mt.asString}. Try with converted file."
) )
.flatMap(_ => extractTextFallback[F](ctx, cfg, ra, lang)(rest)) .flatMap(_ => extractTextFallback[F](ctx, store, cfg, ra, lang)(rest))
case ExtractResult.Failure(ex) => case ExtractResult.Failure(ex) =>
ctx.logger ctx.logger
.warn(s"Cannot extract text: ${ex.getMessage}. Try with converted file") .warn(s"Cannot extract text: ${ex.getMessage}. Try with converted file")
.flatMap(_ => extractTextFallback[F](ctx, cfg, ra, lang)(rest)) .flatMap(_ => extractTextFallback[F](ctx, store, cfg, ra, lang)(rest))
} }
} }
@ -176,13 +180,13 @@ object TextExtraction {
* If the source file is a PDF, then use the converted file. This may then already * If the source file is a PDF, then use the converted file. This may then already
* contain the text if ocrmypdf is enabled. If it is disabled, both files are the same. * contain the text if ocrmypdf is enabled. If it is disabled, both files are the same.
*/ */
private def filesToExtract[F[_]: Sync](ctx: Context[F, _])( private def filesToExtract[F[_]: Sync](store: Store[F])(
item: ItemData, item: ItemData,
ra: RAttachment ra: RAttachment
): F[List[FileKey]] = ): F[List[FileKey]] =
item.originFile.get(ra.id) match { item.originFile.get(ra.id) match {
case Some(sid) => case Some(sid) =>
ctx.store.transact(RFileMeta.findMime(sid)).map { store.transact(RFileMeta.findMime(sid)).map {
case Some(MimeType.PdfMatch(_)) => case Some(MimeType.PdfMatch(_)) =>
List(ra.fileId) List(ra.fileId)
case _ => case _ =>

View File

@ -12,7 +12,7 @@ import cats.implicits._
import docspell.common.{Duration, Ident, Timestamp} import docspell.common.{Duration, Ident, Timestamp}
import docspell.joex.JoexApp import docspell.joex.JoexApp
import docspell.joexapi.model._ import docspell.joexapi.model._
import docspell.store.records.{RJob, RJobLog} import docspell.store.records.RJobLog
import org.http4s.HttpRoutes import org.http4s.HttpRoutes
import org.http4s.circe.CirceEntityEncoder._ import org.http4s.circe.CirceEntityEncoder._
@ -67,17 +67,19 @@ object JoexRoutes {
} }
} }
def mkJob(j: RJob): Job = // TODO !!
def mkJob(j: docspell.scheduler.Job[String]): Job =
Job( Job(
j.id, j.id,
j.subject, j.subject,
j.submitted, Timestamp.Epoch,
j.priority, j.priority,
j.retries, -1,
j.progress, -1,
j.started.getOrElse(Timestamp.Epoch) Timestamp.Epoch
) )
def mkJobLog(j: RJob, jl: Vector[RJobLog]): JobAndLog = def mkJobLog(j: docspell.scheduler.Job[String], jl: Vector[RJobLog]): JobAndLog =
JobAndLog(mkJob(j), jl.map(r => JobLogEvent(r.created, r.level, r.message)).toList) JobAndLog(mkJob(j), jl.map(r => JobLogEvent(r.created, r.level, r.message)).toList)
} }

View File

@ -12,7 +12,6 @@ import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2._ import fs2._
import docspell.backend.ops.{OJoex, OUpload} import docspell.backend.ops.{OJoex, OUpload}
import docspell.common._ import docspell.common._
import docspell.joex.Config import docspell.joex.Config
@ -20,8 +19,8 @@ import docspell.scheduler.{Context, Task}
import docspell.logging.Logger import docspell.logging.Logger
import docspell.store.queries.QOrganization import docspell.store.queries.QOrganization
import docspell.store.records._ import docspell.store.records._
import _root_.io.circe.syntax._ import _root_.io.circe.syntax._
import docspell.store.Store
import emil.SearchQuery.{All, ReceivedDate} import emil.SearchQuery.{All, ReceivedDate}
import emil.javamail.syntax._ import emil.javamail.syntax._
import emil.{MimeType => _, _} import emil.{MimeType => _, _}
@ -32,6 +31,7 @@ object ScanMailboxTask {
def apply[F[_]: Sync]( def apply[F[_]: Sync](
cfg: Config.ScanMailbox, cfg: Config.ScanMailbox,
store: Store[F],
emil: Emil[F], emil: Emil[F],
upload: OUpload[F], upload: OUpload[F],
joex: OJoex[F] joex: OJoex[F]
@ -42,22 +42,22 @@ object ScanMailboxTask {
s"=== Start importing mails for user ${ctx.args.account.user.id}" s"=== Start importing mails for user ${ctx.args.account.user.id}"
) )
_ <- ctx.logger.debug(s"Settings: ${ctx.args.asJson.noSpaces}") _ <- ctx.logger.debug(s"Settings: ${ctx.args.asJson.noSpaces}")
mailCfg <- getMailSettings(ctx) mailCfg <- getMailSettings(ctx, store)
folders = ctx.args.folders.mkString(", ") folders = ctx.args.folders.mkString(", ")
userId = ctx.args.account.user userId = ctx.args.account.user
imapConn = ctx.args.imapConnection imapConn = ctx.args.imapConnection
_ <- ctx.logger.info( _ <- ctx.logger.info(
s"Reading mails for user ${userId.id} from ${imapConn.id}/$folders" s"Reading mails for user ${userId.id} from ${imapConn.id}/$folders"
) )
_ <- importMails(cfg, mailCfg, emil, upload, joex, ctx) _ <- importMails(cfg, mailCfg, emil, upload, joex, ctx, store)
} yield () } yield ()
} }
def onCancel[F[_]]: Task[F, ScanMailboxArgs, Unit] = def onCancel[F[_]]: Task[F, ScanMailboxArgs, Unit] =
Task.log(_.warn("Cancelling scan-mailbox task")) Task.log(_.warn("Cancelling scan-mailbox task"))
def getMailSettings[F[_]: Sync](ctx: Context[F, Args]): F[RUserImap] = def getMailSettings[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): F[RUserImap] =
ctx.store store
.transact(RUserImap.getByName(ctx.args.account, ctx.args.imapConnection)) .transact(RUserImap.getByName(ctx.args.account, ctx.args.imapConnection))
.flatMap { .flatMap {
case Some(c) => c.pure[F] case Some(c) => c.pure[F]
@ -75,10 +75,11 @@ object ScanMailboxTask {
theEmil: Emil[F], theEmil: Emil[F],
upload: OUpload[F], upload: OUpload[F],
joex: OJoex[F], joex: OJoex[F],
ctx: Context[F, Args] ctx: Context[F, Args],
store: Store[F]
): F[Unit] = { ): F[Unit] = {
val mailer = theEmil(mailCfg.toMailConfig) val mailer = theEmil(mailCfg.toMailConfig)
val impl = new Impl[F](cfg, ctx) val impl = new Impl[F](cfg, ctx, store)
val inFolders = ctx.args.folders.take(cfg.maxFolders) val inFolders = ctx.args.folders.take(cfg.maxFolders)
val getInitialInput = val getInitialInput =
@ -142,7 +143,11 @@ object ScanMailboxTask {
ScanResult(List(folder -> left), processed) ScanResult(List(folder -> left), processed)
} }
final private class Impl[F[_]: Sync](cfg: Config.ScanMailbox, ctx: Context[F, Args]) { final private class Impl[F[_]: Sync](
cfg: Config.ScanMailbox,
ctx: Context[F, Args],
store: Store[F]
) {
private def logOp[C](f: Logger[F] => F[Unit]): MailOp[F, C, Unit] = private def logOp[C](f: Logger[F] => F[Unit]): MailOp[F, C, Unit] =
MailOp(_ => f(ctx.logger)) MailOp(_ => f(ctx.logger))
@ -213,7 +218,7 @@ object ScanMailboxTask {
NonEmptyList.fromFoldable(headers.flatMap(_.mh.messageId)) match { NonEmptyList.fromFoldable(headers.flatMap(_.mh.messageId)) match {
case Some(nl) => case Some(nl) =>
for { for {
archives <- ctx.store.transact( archives <- store.transact(
RAttachmentArchive RAttachmentArchive
.findByMessageIdAndCollective(nl, ctx.args.account.collective) .findByMessageIdAndCollective(nl, ctx.args.account.collective)
) )
@ -237,7 +242,7 @@ object ScanMailboxTask {
for { for {
from <- OptionT.fromOption[F](mh.from) from <- OptionT.fromOption[F](mh.from)
_ <- OptionT( _ <- OptionT(
ctx.store.transact( store.transact(
QOrganization QOrganization
.findPersonByContact( .findPersonByContact(
ctx.args.account.collective, ctx.args.account.collective,

View File

@ -10,9 +10,9 @@ import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.scheduler.Context
import docspell.scheduler.Task import docspell.scheduler.Task
import docspell.scheduler.usertask.UserTask import docspell.scheduler.usertask.UserTask
import docspell.store.Store
import docspell.store.records.RUserEmail import docspell.store.records.RUserEmail
import emil._ import emil._
@ -37,6 +37,7 @@ object UpdateCheckTask {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: UpdateCheckConfig, cfg: UpdateCheckConfig,
sendCfg: MailSendConfig, sendCfg: MailSendConfig,
store: Store[F],
emil: Emil[F], emil: Emil[F],
updateCheck: UpdateCheck[F], updateCheck: UpdateCheck[F],
thisVersion: ThisVersion thisVersion: ThisVersion
@ -50,7 +51,7 @@ object UpdateCheckTask {
_ <- ctx.logger.debug( _ <- ctx.logger.debug(
s"Get SMTP connection for ${cfg.senderAccount.asString} and ${cfg.smtpId}" s"Get SMTP connection for ${cfg.senderAccount.asString} and ${cfg.smtpId}"
) )
smtpCfg <- findConnection(ctx, cfg) smtpCfg <- findConnection(store, cfg)
_ <- ctx.logger.debug("Checking for latest release at GitHub") _ <- ctx.logger.debug("Checking for latest release at GitHub")
latest <- updateCheck.latestRelease latest <- updateCheck.latestRelease
_ <- ctx.logger.debug(s"Got latest release: $latest.") _ <- ctx.logger.debug(s"Got latest release: $latest.")
@ -77,10 +78,10 @@ object UpdateCheckTask {
Task.pure(()) Task.pure(())
def findConnection[F[_]: Sync]( def findConnection[F[_]: Sync](
ctx: Context[F, _], store: Store[F],
cfg: UpdateCheckConfig cfg: UpdateCheckConfig
): F[RUserEmail] = ): F[RUserEmail] =
OptionT(ctx.store.transact(RUserEmail.getByName(cfg.senderAccount, cfg.smtpId))) OptionT(store.transact(RUserEmail.getByName(cfg.senderAccount, cfg.smtpId)))
.getOrElseF( .getOrElseF(
Sync[F].raiseError( Sync[F].raiseError(
new Exception( new Exception(

View File

@ -8,7 +8,6 @@ package docspell.scheduler
import docspell.common._ import docspell.common._
import docspell.logging.Logger import docspell.logging.Logger
import docspell.store.Store
trait Context[F[_], A] { self => trait Context[F[_], A] { self =>
@ -22,8 +21,6 @@ trait Context[F[_], A] { self =>
def setProgress(percent: Int): F[Unit] def setProgress(percent: Int): F[Unit]
def store: Store[F]
def isLastRetry: F[Boolean] def isLastRetry: F[Boolean]
def map[C](f: A => C): Context[F, C] def map[C](f: A => C): Context[F, C]

View File

@ -10,13 +10,12 @@ import cats.effect._
import fs2.Stream import fs2.Stream
import docspell.common.Ident import docspell.common.Ident
import docspell.store.records.RJob
trait Scheduler[F[_]] { trait Scheduler[F[_]] {
def config: SchedulerConfig def config: SchedulerConfig
def getRunning: F[Vector[RJob]] def getRunning: F[Vector[Job[String]]]
def requestCancel(jobId: Ident): F[Boolean] def requestCancel(jobId: Ident): F[Boolean]

View File

@ -13,7 +13,7 @@ import docspell.store.records.RJob
class ContextImpl[F[_]: Functor, A]( class ContextImpl[F[_]: Functor, A](
val args: A, val args: A,
val logger: Logger[F], val logger: Logger[F],
val store: Store[F], store: Store[F],
val config: SchedulerConfig, val config: SchedulerConfig,
val jobId: Ident val jobId: Ident
) extends Context[F, A] { ) extends Context[F, A] {

View File

@ -64,8 +64,23 @@ final class SchedulerImpl[F[_]: Async](
.drain .drain
) )
def getRunning: F[Vector[RJob]] = def getRunning: F[Vector[Job[String]]] =
state.get.flatMap(s => QJob.findAll(s.getRunning, store)) state.get
.flatMap(s => QJob.findAll(s.getRunning, store))
.map(
_.map(rj =>
Job(
rj.id,
rj.task,
rj.group,
rj.args,
rj.subject,
rj.submitter,
rj.priority,
rj.tracker
)
)
)
def requestCancel(jobId: Ident): F[Boolean] = def requestCancel(jobId: Ident): F[Boolean] =
logger.info(s"Scheduler requested to cancel job: ${jobId.id}") *> logger.info(s"Scheduler requested to cancel job: ${jobId.id}") *>