From 058c31e1f629b47469aa61ee46f87ffc50c6fb96 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Thu, 11 Mar 2021 21:43:06 +0100 Subject: [PATCH] Reprocessing now sets metadata to an item if not in state confirmed When reprocessing an item, the metadat of all *files* are replaced. This change now also sets some metadat to an item, but only if the item is not in state "confirmed". Confirmed items are not touched, but the metadata of the files is updated. --- .../scala/docspell/joex/JoexAppImpl.scala | 2 +- .../docspell/joex/process/LinkProposal.scala | 10 +++++- .../docspell/joex/process/ProcessItem.scala | 4 +-- .../docspell/joex/process/ReProcessItem.scala | 31 ++++++++++++++----- .../docspell/joex/process/SetGivenData.scala | 19 ++++++++---- .../src/main/resources/docspell-openapi.yml | 6 +++- 6 files changed, 53 insertions(+), 19 deletions(-) diff --git a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala index 69a48906..c98d95d5 100644 --- a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala +++ b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala @@ -113,7 +113,7 @@ object JoexAppImpl { .withTask( JobTask.json( ReProcessItemArgs.taskName, - ReProcessItem[F](cfg, fts, analyser, regexNer), + ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer), ReProcessItem.onCancel[F] ) ) diff --git a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala index 6fa15978..6d0c8ac0 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala @@ -10,11 +10,19 @@ import docspell.store.records.RItem object LinkProposal { - def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = + def onlyNew[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = if (data.item.state.isValid) Task .log[F, ProcessItemArgs](_.debug(s"Not linking proposals on existing item")) .map(_ => data) + else + LinkProposal[F](data) + + def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = + if (data.item.state == ItemState.Confirmed) + Task + .log[F, ProcessItemArgs](_.debug(s"Not linking proposals on confirmed item")) + .map(_ => data) else Task { ctx => val proposals = data.finalProposals diff --git a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala index 1ba548de..f3fd1862 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala @@ -22,8 +22,8 @@ object ProcessItem { ExtractArchive(item) .flatMap(Task.setProgress(20)) .flatMap(processAttachments0(cfg, fts, analyser, regexNer, (40, 60, 80))) - .flatMap(LinkProposal[F]) - .flatMap(SetGivenData[F](itemOps)) + .flatMap(LinkProposal.onlyNew[F]) + .flatMap(SetGivenData.onlyNew[F](itemOps)) .flatMap(Task.setProgress(99)) .flatMap(RemoveEmptyItem(itemOps)) diff --git a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala index e4e40f49..ae9911d1 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala @@ -5,6 +5,7 @@ import cats.effect._ import cats.implicits._ import docspell.analysis.TextAnalyser +import docspell.backend.ops.OItem import docspell.common._ import docspell.ftsclient.FtsClient import docspell.joex.Config @@ -22,12 +23,17 @@ object ReProcessItem { def apply[F[_]: ConcurrentEffect: ContextShift]( cfg: Config, fts: FtsClient[F], + itemOps: OItem[F], analyser: TextAnalyser[F], regexNer: RegexNerFile[F] ): Task[F, Args, Unit] = - loadItem[F] - .flatMap(safeProcess[F](cfg, fts, analyser, regexNer)) - .map(_ => ()) + Task + .log[F, Args](_.info("===== Start reprocessing ======")) + .flatMap(_ => + loadItem[F] + .flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer)) + .map(_ => ()) + ) def onCancel[F[_]]: Task[F, Args, Unit] = logWarn("Now cancelling re-processing.") @@ -58,6 +64,11 @@ object ReProcessItem { a.copy(fileId = src.fileId, name = src.name) } ) + _ <- OptionT.liftF( + ctx.logger.debug( + s"Loaded item and ${attachSrc.size} attachments to reprocess" + ) + ) } yield ItemData( item, attachSrc, @@ -76,6 +87,7 @@ object ReProcessItem { def processFiles[F[_]: ConcurrentEffect: ContextShift]( cfg: Config, fts: FtsClient[F], + itemOps: OItem[F], analyser: TextAnalyser[F], regexNer: RegexNerFile[F], data: ItemData @@ -89,9 +101,9 @@ object ReProcessItem { data.item.cid, args.itemId.some, lang, - None, //direction - "", //source-id - None, //folder + None, //direction + data.item.source, //source-id + None, //folder Seq.empty, false, None, @@ -103,6 +115,8 @@ object ReProcessItem { getLanguage[F].flatMap { lang => ProcessItem .processAttachments[F](cfg, fts, analyser, regexNer)(data) + .flatMap(LinkProposal[F]) + .flatMap(SetGivenData[F](itemOps)) .contramap[Args](convertArgs(lang)) } } @@ -121,12 +135,13 @@ object ReProcessItem { def safeProcess[F[_]: ConcurrentEffect: ContextShift]( cfg: Config, fts: FtsClient[F], + itemOps: OItem[F], analyser: TextAnalyser[F], regexNer: RegexNerFile[F] )(data: ItemData): Task[F, Args, ItemData] = isLastRetry[F].flatMap { case true => - processFiles[F](cfg, fts, analyser, regexNer, data).attempt + processFiles[F](cfg, fts, itemOps, analyser, regexNer, data).attempt .flatMap({ case Right(d) => Task.pure(d) @@ -136,7 +151,7 @@ object ReProcessItem { ).andThen(_ => Sync[F].raiseError(ex)) }) case false => - processFiles[F](cfg, fts, analyser, regexNer, data) + processFiles[F](cfg, fts, itemOps, analyser, regexNer, data) } private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] = diff --git a/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala b/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala index b668dbe9..5d1c6038 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala @@ -8,13 +8,20 @@ import docspell.common._ import docspell.joex.scheduler.Task object SetGivenData { + type Args = ProcessItemArgs - def apply[F[_]: Sync]( - ops: OItem[F] - )(data: ItemData): Task[F, ProcessItemArgs, ItemData] = + def onlyNew[F[_]: Sync](ops: OItem[F])(data: ItemData): Task[F, Args, ItemData] = if (data.item.state.isValid) Task - .log[F, ProcessItemArgs](_.debug(s"Not setting data on existing item")) + .log[F, Args](_.debug(s"Not setting data on existing item")) + .map(_ => data) + else + SetGivenData[F](ops)(data) + + def apply[F[_]: Sync](ops: OItem[F])(data: ItemData): Task[F, Args, ItemData] = + if (data.item.state == ItemState.Confirmed) + Task + .log[F, Args](_.debug(s"Not setting data on confirmed item")) .map(_ => data) else setFolder(data, ops).flatMap(d => setTags[F](d, ops)) @@ -22,7 +29,7 @@ object SetGivenData { private def setFolder[F[_]: Sync]( data: ItemData, ops: OItem[F] - ): Task[F, ProcessItemArgs, ItemData] = + ): Task[F, Args, ItemData] = Task { ctx => val itemId = data.item.id val folderId = ctx.args.meta.folderId @@ -41,7 +48,7 @@ object SetGivenData { private def setTags[F[_]: Sync]( data: ItemData, ops: OItem[F] - ): Task[F, ProcessItemArgs, ItemData] = + ): Task[F, Args, ItemData] = Task { ctx => val itemId = data.item.id val collective = ctx.args.meta.collective diff --git a/modules/restapi/src/main/resources/docspell-openapi.yml b/modules/restapi/src/main/resources/docspell-openapi.yml index 41494740..e144ca07 100644 --- a/modules/restapi/src/main/resources/docspell-openapi.yml +++ b/modules/restapi/src/main/resources/docspell-openapi.yml @@ -2113,7 +2113,11 @@ paths: summary: Start reprocessing the files of the item. description: | This submits a job that will re-process the files (either all - or the ones specified) of the item and replace the metadata. + or the ones specified) of the item and replace their metadata. + + If the item is not in "confirmed" state, its associated metada + is also updated. Otherwise only the file metadata is updated + (like extracted text etc). security: - authTokenHeader: [] parameters: