mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-21 09:58:26 +00:00
Add a task to re-process files of an item
This commit is contained in:
@ -14,6 +14,7 @@ import docspell.joex.fts.{MigrationTask, ReIndexTask}
|
||||
import docspell.joex.hk._
|
||||
import docspell.joex.notify._
|
||||
import docspell.joex.process.ItemHandler
|
||||
import docspell.joex.process.ReProcessItem
|
||||
import docspell.joex.scanmailbox._
|
||||
import docspell.joex.scheduler._
|
||||
import docspell.joexapi.client.JoexClient
|
||||
@ -96,6 +97,13 @@ object JoexAppImpl {
|
||||
ItemHandler.onCancel[F]
|
||||
)
|
||||
)
|
||||
.withTask(
|
||||
JobTask.json(
|
||||
ReProcessItemArgs.taskName,
|
||||
ReProcessItem[F](cfg, fts),
|
||||
ReProcessItem.onCancel[F]
|
||||
)
|
||||
)
|
||||
.withTask(
|
||||
JobTask.json(
|
||||
NotifyDueItemsArgs.taskName,
|
||||
|
@ -126,11 +126,46 @@ object ConvertPdf {
|
||||
.compile
|
||||
.lastOrError
|
||||
.map(fm => Ident.unsafe(fm.id))
|
||||
.flatMap(fmId =>
|
||||
ctx.store
|
||||
.transact(RAttachment.updateFileIdAndName(ra.id, fmId, newName))
|
||||
.map(_ => fmId)
|
||||
)
|
||||
.flatMap(fmId => updateAttachment[F](ctx, ra, fmId, newName).map(_ => fmId))
|
||||
.map(fmId => ra.copy(fileId = fmId, name = newName))
|
||||
}
|
||||
|
||||
private def updateAttachment[F[_]: Sync](
|
||||
ctx: Context[F, _],
|
||||
ra: RAttachment,
|
||||
fmId: Ident,
|
||||
newName: Option[String]
|
||||
): F[Unit] =
|
||||
for {
|
||||
oldFile <- ctx.store.transact(RAttachment.findById(ra.id))
|
||||
_ <-
|
||||
ctx.store
|
||||
.transact(RAttachment.updateFileIdAndName(ra.id, fmId, newName))
|
||||
_ <- oldFile match {
|
||||
case Some(raPrev) =>
|
||||
for {
|
||||
sameFile <-
|
||||
ctx.store
|
||||
.transact(RAttachmentSource.isSameFile(ra.id, raPrev.fileId))
|
||||
_ <-
|
||||
if (sameFile) ().pure[F]
|
||||
else
|
||||
ctx.logger.info("Deleting previous attachment file") *>
|
||||
ctx.store.bitpeace
|
||||
.delete(raPrev.fileId.id)
|
||||
.compile
|
||||
.drain
|
||||
.attempt
|
||||
.flatMap {
|
||||
case Right(_) => ().pure[F]
|
||||
case Left(ex) =>
|
||||
ctx.logger
|
||||
.error(ex)(s"Cannot delete previous attachment file: ${raPrev}")
|
||||
|
||||
}
|
||||
} yield ()
|
||||
case None =>
|
||||
().pure[F]
|
||||
}
|
||||
} yield ()
|
||||
}
|
||||
|
@ -27,6 +27,17 @@ object ProcessItem {
|
||||
.flatMap(SetGivenData[F](itemOps))
|
||||
.flatMap(Task.setProgress(99))
|
||||
|
||||
def processAttachments[F[_]: ConcurrentEffect: ContextShift](
|
||||
cfg: Config,
|
||||
fts: FtsClient[F]
|
||||
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||
ConvertPdf(cfg.convert, item)
|
||||
.flatMap(Task.setProgress(30))
|
||||
.flatMap(TextExtraction(cfg.extraction, fts))
|
||||
.flatMap(Task.setProgress(60))
|
||||
.flatMap(analysisOnly[F](cfg))
|
||||
.flatMap(Task.setProgress(90))
|
||||
|
||||
def analysisOnly[F[_]: Sync](
|
||||
cfg: Config
|
||||
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||
|
@ -0,0 +1,131 @@
|
||||
package docspell.joex.process
|
||||
|
||||
import cats.data.OptionT
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.common._
|
||||
import docspell.ftsclient.FtsClient
|
||||
import docspell.joex.Config
|
||||
import docspell.joex.scheduler.Context
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.store.records.RAttachment
|
||||
import docspell.store.records.RAttachmentSource
|
||||
import docspell.store.records.RCollective
|
||||
import docspell.store.records.RItem
|
||||
|
||||
object ReProcessItem {
|
||||
type Args = ReProcessItemArgs
|
||||
|
||||
def apply[F[_]: ConcurrentEffect: ContextShift](
|
||||
cfg: Config,
|
||||
fts: FtsClient[F]
|
||||
): Task[F, Args, Unit] =
|
||||
loadItem[F]
|
||||
.flatMap(safeProcess[F](cfg, fts))
|
||||
.map(_ => ())
|
||||
|
||||
def onCancel[F[_]: Sync: ContextShift]: Task[F, Args, Unit] =
|
||||
logWarn("Now cancelling re-processing.")
|
||||
|
||||
// --- Helpers
|
||||
|
||||
private def contains[F[_]](ctx: Context[F, Args]): RAttachment => Boolean = {
|
||||
val selection = ctx.args.attachments.toSet
|
||||
if (selection.isEmpty) (_ => true)
|
||||
else ra => selection.contains(ra.id)
|
||||
}
|
||||
|
||||
def loadItem[F[_]: Sync]: Task[F, Args, ItemData] =
|
||||
Task { ctx =>
|
||||
(for {
|
||||
item <- OptionT(ctx.store.transact(RItem.findById(ctx.args.itemId)))
|
||||
attach <- OptionT.liftF(ctx.store.transact(RAttachment.findByItem(item.id)))
|
||||
asrc <-
|
||||
OptionT.liftF(ctx.store.transact(RAttachmentSource.findByItem(ctx.args.itemId)))
|
||||
asrcMap = asrc.map(s => s.id -> s).toMap
|
||||
// copy the original files over to attachments to run the default processing task
|
||||
// the processing doesn't touch the original files, only RAttachments
|
||||
attachSrc =
|
||||
attach
|
||||
.filter(contains(ctx))
|
||||
.flatMap(a =>
|
||||
asrcMap.get(a.id).map { src =>
|
||||
a.copy(fileId = src.fileId, name = src.name)
|
||||
}
|
||||
)
|
||||
} yield ItemData(
|
||||
item,
|
||||
attachSrc,
|
||||
Vector.empty,
|
||||
Vector.empty,
|
||||
asrcMap.view.mapValues(_.fileId).toMap,
|
||||
MetaProposalList.empty,
|
||||
Nil
|
||||
)).getOrElseF(
|
||||
Sync[F].raiseError(new Exception(s"Item not found: ${ctx.args.itemId.id}"))
|
||||
)
|
||||
}
|
||||
|
||||
def processFiles[F[_]: ConcurrentEffect: ContextShift](
|
||||
cfg: Config,
|
||||
fts: FtsClient[F],
|
||||
data: ItemData
|
||||
): Task[F, Args, ItemData] = {
|
||||
|
||||
val convertArgs: Language => Args => F[ProcessItemArgs] =
|
||||
lang =>
|
||||
args =>
|
||||
ProcessItemArgs(
|
||||
ProcessItemArgs.ProcessMeta(
|
||||
data.item.cid,
|
||||
args.itemId.some,
|
||||
lang,
|
||||
None, //direction
|
||||
"", //source-id
|
||||
None, //folder
|
||||
Seq.empty
|
||||
),
|
||||
Nil
|
||||
).pure[F]
|
||||
|
||||
getLanguage[F].flatMap { lang =>
|
||||
ProcessItem
|
||||
.processAttachments[F](cfg, fts)(data)
|
||||
.contramap[Args](convertArgs(lang))
|
||||
}
|
||||
}
|
||||
|
||||
def getLanguage[F[_]: Sync]: Task[F, Args, Language] =
|
||||
Task { ctx =>
|
||||
(for {
|
||||
coll <- OptionT(ctx.store.transact(RCollective.findByItem(ctx.args.itemId)))
|
||||
lang = coll.language
|
||||
} yield lang).getOrElse(Language.German)
|
||||
}
|
||||
|
||||
def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] =
|
||||
Task(_.isLastRetry)
|
||||
|
||||
def safeProcess[F[_]: ConcurrentEffect: ContextShift](
|
||||
cfg: Config,
|
||||
fts: FtsClient[F]
|
||||
)(data: ItemData): Task[F, Args, ItemData] =
|
||||
isLastRetry[F].flatMap {
|
||||
case true =>
|
||||
processFiles[F](cfg, fts, data).attempt
|
||||
.flatMap({
|
||||
case Right(d) =>
|
||||
Task.pure(d)
|
||||
case Left(ex) =>
|
||||
logWarn[F](
|
||||
"Processing failed on last retry."
|
||||
).andThen(_ => Sync[F].raiseError(ex))
|
||||
})
|
||||
case false =>
|
||||
processFiles[F](cfg, fts, data)
|
||||
}
|
||||
|
||||
private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] =
|
||||
Task(_.logger.warn(msg))
|
||||
}
|
Reference in New Issue
Block a user