Reprocessing now sets metadata to an item if not in state confirmed

When reprocessing an item, the metadat of all *files* are replaced.
This change now also sets some metadat to an item, but only if the
item is not in state "confirmed". Confirmed items are not touched, but
the metadata of the files is updated.
This commit is contained in:
Eike Kettner 2021-03-11 21:43:06 +01:00
parent ebaa31898e
commit 058c31e1f6
6 changed files with 53 additions and 19 deletions

View File

@ -113,7 +113,7 @@ object JoexAppImpl {
.withTask( .withTask(
JobTask.json( JobTask.json(
ReProcessItemArgs.taskName, ReProcessItemArgs.taskName,
ReProcessItem[F](cfg, fts, analyser, regexNer), ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer),
ReProcessItem.onCancel[F] ReProcessItem.onCancel[F]
) )
) )

View File

@ -10,11 +10,19 @@ import docspell.store.records.RItem
object LinkProposal { object LinkProposal {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = def onlyNew[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
if (data.item.state.isValid) if (data.item.state.isValid)
Task Task
.log[F, ProcessItemArgs](_.debug(s"Not linking proposals on existing item")) .log[F, ProcessItemArgs](_.debug(s"Not linking proposals on existing item"))
.map(_ => data) .map(_ => data)
else
LinkProposal[F](data)
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
if (data.item.state == ItemState.Confirmed)
Task
.log[F, ProcessItemArgs](_.debug(s"Not linking proposals on confirmed item"))
.map(_ => data)
else else
Task { ctx => Task { ctx =>
val proposals = data.finalProposals val proposals = data.finalProposals

View File

@ -22,8 +22,8 @@ object ProcessItem {
ExtractArchive(item) ExtractArchive(item)
.flatMap(Task.setProgress(20)) .flatMap(Task.setProgress(20))
.flatMap(processAttachments0(cfg, fts, analyser, regexNer, (40, 60, 80))) .flatMap(processAttachments0(cfg, fts, analyser, regexNer, (40, 60, 80)))
.flatMap(LinkProposal[F]) .flatMap(LinkProposal.onlyNew[F])
.flatMap(SetGivenData[F](itemOps)) .flatMap(SetGivenData.onlyNew[F](itemOps))
.flatMap(Task.setProgress(99)) .flatMap(Task.setProgress(99))
.flatMap(RemoveEmptyItem(itemOps)) .flatMap(RemoveEmptyItem(itemOps))

View File

@ -5,6 +5,7 @@ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.analysis.TextAnalyser import docspell.analysis.TextAnalyser
import docspell.backend.ops.OItem
import docspell.common._ import docspell.common._
import docspell.ftsclient.FtsClient import docspell.ftsclient.FtsClient
import docspell.joex.Config import docspell.joex.Config
@ -22,12 +23,17 @@ object ReProcessItem {
def apply[F[_]: ConcurrentEffect: ContextShift]( def apply[F[_]: ConcurrentEffect: ContextShift](
cfg: Config, cfg: Config,
fts: FtsClient[F], fts: FtsClient[F],
itemOps: OItem[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
loadItem[F] Task
.flatMap(safeProcess[F](cfg, fts, analyser, regexNer)) .log[F, Args](_.info("===== Start reprocessing ======"))
.map(_ => ()) .flatMap(_ =>
loadItem[F]
.flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer))
.map(_ => ())
)
def onCancel[F[_]]: Task[F, Args, Unit] = def onCancel[F[_]]: Task[F, Args, Unit] =
logWarn("Now cancelling re-processing.") logWarn("Now cancelling re-processing.")
@ -58,6 +64,11 @@ object ReProcessItem {
a.copy(fileId = src.fileId, name = src.name) a.copy(fileId = src.fileId, name = src.name)
} }
) )
_ <- OptionT.liftF(
ctx.logger.debug(
s"Loaded item and ${attachSrc.size} attachments to reprocess"
)
)
} yield ItemData( } yield ItemData(
item, item,
attachSrc, attachSrc,
@ -76,6 +87,7 @@ object ReProcessItem {
def processFiles[F[_]: ConcurrentEffect: ContextShift]( def processFiles[F[_]: ConcurrentEffect: ContextShift](
cfg: Config, cfg: Config,
fts: FtsClient[F], fts: FtsClient[F],
itemOps: OItem[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F], regexNer: RegexNerFile[F],
data: ItemData data: ItemData
@ -89,9 +101,9 @@ object ReProcessItem {
data.item.cid, data.item.cid,
args.itemId.some, args.itemId.some,
lang, lang,
None, //direction None, //direction
"", //source-id data.item.source, //source-id
None, //folder None, //folder
Seq.empty, Seq.empty,
false, false,
None, None,
@ -103,6 +115,8 @@ object ReProcessItem {
getLanguage[F].flatMap { lang => getLanguage[F].flatMap { lang =>
ProcessItem ProcessItem
.processAttachments[F](cfg, fts, analyser, regexNer)(data) .processAttachments[F](cfg, fts, analyser, regexNer)(data)
.flatMap(LinkProposal[F])
.flatMap(SetGivenData[F](itemOps))
.contramap[Args](convertArgs(lang)) .contramap[Args](convertArgs(lang))
} }
} }
@ -121,12 +135,13 @@ object ReProcessItem {
def safeProcess[F[_]: ConcurrentEffect: ContextShift]( def safeProcess[F[_]: ConcurrentEffect: ContextShift](
cfg: Config, cfg: Config,
fts: FtsClient[F], fts: FtsClient[F],
itemOps: OItem[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F]
)(data: ItemData): Task[F, Args, ItemData] = )(data: ItemData): Task[F, Args, ItemData] =
isLastRetry[F].flatMap { isLastRetry[F].flatMap {
case true => case true =>
processFiles[F](cfg, fts, analyser, regexNer, data).attempt processFiles[F](cfg, fts, itemOps, analyser, regexNer, data).attempt
.flatMap({ .flatMap({
case Right(d) => case Right(d) =>
Task.pure(d) Task.pure(d)
@ -136,7 +151,7 @@ object ReProcessItem {
).andThen(_ => Sync[F].raiseError(ex)) ).andThen(_ => Sync[F].raiseError(ex))
}) })
case false => case false =>
processFiles[F](cfg, fts, analyser, regexNer, data) processFiles[F](cfg, fts, itemOps, analyser, regexNer, data)
} }
private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] = private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] =

View File

@ -8,13 +8,20 @@ import docspell.common._
import docspell.joex.scheduler.Task import docspell.joex.scheduler.Task
object SetGivenData { object SetGivenData {
type Args = ProcessItemArgs
def apply[F[_]: Sync]( def onlyNew[F[_]: Sync](ops: OItem[F])(data: ItemData): Task[F, Args, ItemData] =
ops: OItem[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
if (data.item.state.isValid) if (data.item.state.isValid)
Task Task
.log[F, ProcessItemArgs](_.debug(s"Not setting data on existing item")) .log[F, Args](_.debug(s"Not setting data on existing item"))
.map(_ => data)
else
SetGivenData[F](ops)(data)
def apply[F[_]: Sync](ops: OItem[F])(data: ItemData): Task[F, Args, ItemData] =
if (data.item.state == ItemState.Confirmed)
Task
.log[F, Args](_.debug(s"Not setting data on confirmed item"))
.map(_ => data) .map(_ => data)
else else
setFolder(data, ops).flatMap(d => setTags[F](d, ops)) setFolder(data, ops).flatMap(d => setTags[F](d, ops))
@ -22,7 +29,7 @@ object SetGivenData {
private def setFolder[F[_]: Sync]( private def setFolder[F[_]: Sync](
data: ItemData, data: ItemData,
ops: OItem[F] ops: OItem[F]
): Task[F, ProcessItemArgs, ItemData] = ): Task[F, Args, ItemData] =
Task { ctx => Task { ctx =>
val itemId = data.item.id val itemId = data.item.id
val folderId = ctx.args.meta.folderId val folderId = ctx.args.meta.folderId
@ -41,7 +48,7 @@ object SetGivenData {
private def setTags[F[_]: Sync]( private def setTags[F[_]: Sync](
data: ItemData, data: ItemData,
ops: OItem[F] ops: OItem[F]
): Task[F, ProcessItemArgs, ItemData] = ): Task[F, Args, ItemData] =
Task { ctx => Task { ctx =>
val itemId = data.item.id val itemId = data.item.id
val collective = ctx.args.meta.collective val collective = ctx.args.meta.collective

View File

@ -2113,7 +2113,11 @@ paths:
summary: Start reprocessing the files of the item. summary: Start reprocessing the files of the item.
description: | description: |
This submits a job that will re-process the files (either all This submits a job that will re-process the files (either all
or the ones specified) of the item and replace the metadata. or the ones specified) of the item and replace their metadata.
If the item is not in "confirmed" state, its associated metada
is also updated. Otherwise only the file metadata is updated
(like extracted text etc).
security: security:
- authTokenHeader: [] - authTokenHeader: []
parameters: parameters: