diff --git a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala index 69a48906..c98d95d5 100644 --- a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala +++ b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala @@ -113,7 +113,7 @@ object JoexAppImpl { .withTask( JobTask.json( ReProcessItemArgs.taskName, - ReProcessItem[F](cfg, fts, analyser, regexNer), + ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer), ReProcessItem.onCancel[F] ) ) diff --git a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala index 6fa15978..6d0c8ac0 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala @@ -10,11 +10,19 @@ import docspell.store.records.RItem object LinkProposal { - def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = + def onlyNew[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = if (data.item.state.isValid) Task .log[F, ProcessItemArgs](_.debug(s"Not linking proposals on existing item")) .map(_ => data) + else + LinkProposal[F](data) + + def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = + if (data.item.state == ItemState.Confirmed) + Task + .log[F, ProcessItemArgs](_.debug(s"Not linking proposals on confirmed item")) + .map(_ => data) else Task { ctx => val proposals = data.finalProposals diff --git a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala index 1ba548de..f3fd1862 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala @@ -22,8 +22,8 @@ object ProcessItem { ExtractArchive(item) .flatMap(Task.setProgress(20)) .flatMap(processAttachments0(cfg, fts, analyser, regexNer, (40, 60, 80))) - .flatMap(LinkProposal[F]) - .flatMap(SetGivenData[F](itemOps)) + .flatMap(LinkProposal.onlyNew[F]) + .flatMap(SetGivenData.onlyNew[F](itemOps)) .flatMap(Task.setProgress(99)) .flatMap(RemoveEmptyItem(itemOps)) diff --git a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala index e4e40f49..ae9911d1 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala @@ -5,6 +5,7 @@ import cats.effect._ import cats.implicits._ import docspell.analysis.TextAnalyser +import docspell.backend.ops.OItem import docspell.common._ import docspell.ftsclient.FtsClient import docspell.joex.Config @@ -22,12 +23,17 @@ object ReProcessItem { def apply[F[_]: ConcurrentEffect: ContextShift]( cfg: Config, fts: FtsClient[F], + itemOps: OItem[F], analyser: TextAnalyser[F], regexNer: RegexNerFile[F] ): Task[F, Args, Unit] = - loadItem[F] - .flatMap(safeProcess[F](cfg, fts, analyser, regexNer)) - .map(_ => ()) + Task + .log[F, Args](_.info("===== Start reprocessing ======")) + .flatMap(_ => + loadItem[F] + .flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer)) + .map(_ => ()) + ) def onCancel[F[_]]: Task[F, Args, Unit] = logWarn("Now cancelling re-processing.") @@ -58,6 +64,11 @@ object ReProcessItem { a.copy(fileId = src.fileId, name = src.name) } ) + _ <- OptionT.liftF( + ctx.logger.debug( + s"Loaded item and ${attachSrc.size} attachments to reprocess" + ) + ) } yield ItemData( item, attachSrc, @@ -76,6 +87,7 @@ object ReProcessItem { def processFiles[F[_]: ConcurrentEffect: ContextShift]( cfg: Config, fts: FtsClient[F], + itemOps: OItem[F], analyser: TextAnalyser[F], regexNer: RegexNerFile[F], data: ItemData @@ -89,9 +101,9 @@ object ReProcessItem { data.item.cid, args.itemId.some, lang, - None, //direction - "", //source-id - None, //folder + None, //direction + data.item.source, //source-id + None, //folder Seq.empty, false, None, @@ -103,6 +115,8 @@ object ReProcessItem { getLanguage[F].flatMap { lang => ProcessItem .processAttachments[F](cfg, fts, analyser, regexNer)(data) + .flatMap(LinkProposal[F]) + .flatMap(SetGivenData[F](itemOps)) .contramap[Args](convertArgs(lang)) } } @@ -121,12 +135,13 @@ object ReProcessItem { def safeProcess[F[_]: ConcurrentEffect: ContextShift]( cfg: Config, fts: FtsClient[F], + itemOps: OItem[F], analyser: TextAnalyser[F], regexNer: RegexNerFile[F] )(data: ItemData): Task[F, Args, ItemData] = isLastRetry[F].flatMap { case true => - processFiles[F](cfg, fts, analyser, regexNer, data).attempt + processFiles[F](cfg, fts, itemOps, analyser, regexNer, data).attempt .flatMap({ case Right(d) => Task.pure(d) @@ -136,7 +151,7 @@ object ReProcessItem { ).andThen(_ => Sync[F].raiseError(ex)) }) case false => - processFiles[F](cfg, fts, analyser, regexNer, data) + processFiles[F](cfg, fts, itemOps, analyser, regexNer, data) } private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] = diff --git a/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala b/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala index b668dbe9..5d1c6038 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala @@ -8,13 +8,20 @@ import docspell.common._ import docspell.joex.scheduler.Task object SetGivenData { + type Args = ProcessItemArgs - def apply[F[_]: Sync]( - ops: OItem[F] - )(data: ItemData): Task[F, ProcessItemArgs, ItemData] = + def onlyNew[F[_]: Sync](ops: OItem[F])(data: ItemData): Task[F, Args, ItemData] = if (data.item.state.isValid) Task - .log[F, ProcessItemArgs](_.debug(s"Not setting data on existing item")) + .log[F, Args](_.debug(s"Not setting data on existing item")) + .map(_ => data) + else + SetGivenData[F](ops)(data) + + def apply[F[_]: Sync](ops: OItem[F])(data: ItemData): Task[F, Args, ItemData] = + if (data.item.state == ItemState.Confirmed) + Task + .log[F, Args](_.debug(s"Not setting data on confirmed item")) .map(_ => data) else setFolder(data, ops).flatMap(d => setTags[F](d, ops)) @@ -22,7 +29,7 @@ object SetGivenData { private def setFolder[F[_]: Sync]( data: ItemData, ops: OItem[F] - ): Task[F, ProcessItemArgs, ItemData] = + ): Task[F, Args, ItemData] = Task { ctx => val itemId = data.item.id val folderId = ctx.args.meta.folderId @@ -41,7 +48,7 @@ object SetGivenData { private def setTags[F[_]: Sync]( data: ItemData, ops: OItem[F] - ): Task[F, ProcessItemArgs, ItemData] = + ): Task[F, Args, ItemData] = Task { ctx => val itemId = data.item.id val collective = ctx.args.meta.collective diff --git a/modules/restapi/src/main/resources/docspell-openapi.yml b/modules/restapi/src/main/resources/docspell-openapi.yml index 41494740..e144ca07 100644 --- a/modules/restapi/src/main/resources/docspell-openapi.yml +++ b/modules/restapi/src/main/resources/docspell-openapi.yml @@ -2113,7 +2113,11 @@ paths: summary: Start reprocessing the files of the item. description: | This submits a job that will re-process the files (either all - or the ones specified) of the item and replace the metadata. + or the ones specified) of the item and replace their metadata. + + If the item is not in "confirmed" state, its associated metada + is also updated. Otherwise only the file metadata is updated + (like extracted text etc). security: - authTokenHeader: [] parameters: