Reprocessing now sets metadata to an item if not in state confirmed

When reprocessing an item, the metadat of all *files* are replaced.
This change now also sets some metadat to an item, but only if the
item is not in state "confirmed". Confirmed items are not touched, but
the metadata of the files is updated.
This commit is contained in:
Eike Kettner 2021-03-11 21:43:06 +01:00
parent ebaa31898e
commit 058c31e1f6
6 changed files with 53 additions and 19 deletions

View File

@ -113,7 +113,7 @@ object JoexAppImpl {
.withTask(
JobTask.json(
ReProcessItemArgs.taskName,
ReProcessItem[F](cfg, fts, analyser, regexNer),
ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer),
ReProcessItem.onCancel[F]
)
)

View File

@ -10,11 +10,19 @@ import docspell.store.records.RItem
object LinkProposal {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
def onlyNew[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
if (data.item.state.isValid)
Task
.log[F, ProcessItemArgs](_.debug(s"Not linking proposals on existing item"))
.map(_ => data)
else
LinkProposal[F](data)
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
if (data.item.state == ItemState.Confirmed)
Task
.log[F, ProcessItemArgs](_.debug(s"Not linking proposals on confirmed item"))
.map(_ => data)
else
Task { ctx =>
val proposals = data.finalProposals

View File

@ -22,8 +22,8 @@ object ProcessItem {
ExtractArchive(item)
.flatMap(Task.setProgress(20))
.flatMap(processAttachments0(cfg, fts, analyser, regexNer, (40, 60, 80)))
.flatMap(LinkProposal[F])
.flatMap(SetGivenData[F](itemOps))
.flatMap(LinkProposal.onlyNew[F])
.flatMap(SetGivenData.onlyNew[F](itemOps))
.flatMap(Task.setProgress(99))
.flatMap(RemoveEmptyItem(itemOps))

View File

@ -5,6 +5,7 @@ import cats.effect._
import cats.implicits._
import docspell.analysis.TextAnalyser
import docspell.backend.ops.OItem
import docspell.common._
import docspell.ftsclient.FtsClient
import docspell.joex.Config
@ -22,12 +23,17 @@ object ReProcessItem {
def apply[F[_]: ConcurrentEffect: ContextShift](
cfg: Config,
fts: FtsClient[F],
itemOps: OItem[F],
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F]
): Task[F, Args, Unit] =
loadItem[F]
.flatMap(safeProcess[F](cfg, fts, analyser, regexNer))
.map(_ => ())
Task
.log[F, Args](_.info("===== Start reprocessing ======"))
.flatMap(_ =>
loadItem[F]
.flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer))
.map(_ => ())
)
def onCancel[F[_]]: Task[F, Args, Unit] =
logWarn("Now cancelling re-processing.")
@ -58,6 +64,11 @@ object ReProcessItem {
a.copy(fileId = src.fileId, name = src.name)
}
)
_ <- OptionT.liftF(
ctx.logger.debug(
s"Loaded item and ${attachSrc.size} attachments to reprocess"
)
)
} yield ItemData(
item,
attachSrc,
@ -76,6 +87,7 @@ object ReProcessItem {
def processFiles[F[_]: ConcurrentEffect: ContextShift](
cfg: Config,
fts: FtsClient[F],
itemOps: OItem[F],
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F],
data: ItemData
@ -89,9 +101,9 @@ object ReProcessItem {
data.item.cid,
args.itemId.some,
lang,
None, //direction
"", //source-id
None, //folder
None, //direction
data.item.source, //source-id
None, //folder
Seq.empty,
false,
None,
@ -103,6 +115,8 @@ object ReProcessItem {
getLanguage[F].flatMap { lang =>
ProcessItem
.processAttachments[F](cfg, fts, analyser, regexNer)(data)
.flatMap(LinkProposal[F])
.flatMap(SetGivenData[F](itemOps))
.contramap[Args](convertArgs(lang))
}
}
@ -121,12 +135,13 @@ object ReProcessItem {
def safeProcess[F[_]: ConcurrentEffect: ContextShift](
cfg: Config,
fts: FtsClient[F],
itemOps: OItem[F],
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F]
)(data: ItemData): Task[F, Args, ItemData] =
isLastRetry[F].flatMap {
case true =>
processFiles[F](cfg, fts, analyser, regexNer, data).attempt
processFiles[F](cfg, fts, itemOps, analyser, regexNer, data).attempt
.flatMap({
case Right(d) =>
Task.pure(d)
@ -136,7 +151,7 @@ object ReProcessItem {
).andThen(_ => Sync[F].raiseError(ex))
})
case false =>
processFiles[F](cfg, fts, analyser, regexNer, data)
processFiles[F](cfg, fts, itemOps, analyser, regexNer, data)
}
private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] =

View File

@ -8,13 +8,20 @@ import docspell.common._
import docspell.joex.scheduler.Task
object SetGivenData {
type Args = ProcessItemArgs
def apply[F[_]: Sync](
ops: OItem[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
def onlyNew[F[_]: Sync](ops: OItem[F])(data: ItemData): Task[F, Args, ItemData] =
if (data.item.state.isValid)
Task
.log[F, ProcessItemArgs](_.debug(s"Not setting data on existing item"))
.log[F, Args](_.debug(s"Not setting data on existing item"))
.map(_ => data)
else
SetGivenData[F](ops)(data)
def apply[F[_]: Sync](ops: OItem[F])(data: ItemData): Task[F, Args, ItemData] =
if (data.item.state == ItemState.Confirmed)
Task
.log[F, Args](_.debug(s"Not setting data on confirmed item"))
.map(_ => data)
else
setFolder(data, ops).flatMap(d => setTags[F](d, ops))
@ -22,7 +29,7 @@ object SetGivenData {
private def setFolder[F[_]: Sync](
data: ItemData,
ops: OItem[F]
): Task[F, ProcessItemArgs, ItemData] =
): Task[F, Args, ItemData] =
Task { ctx =>
val itemId = data.item.id
val folderId = ctx.args.meta.folderId
@ -41,7 +48,7 @@ object SetGivenData {
private def setTags[F[_]: Sync](
data: ItemData,
ops: OItem[F]
): Task[F, ProcessItemArgs, ItemData] =
): Task[F, Args, ItemData] =
Task { ctx =>
val itemId = data.item.id
val collective = ctx.args.meta.collective

View File

@ -2113,7 +2113,11 @@ paths:
summary: Start reprocessing the files of the item.
description: |
This submits a job that will re-process the files (either all
or the ones specified) of the item and replace the metadata.
or the ones specified) of the item and replace their metadata.
If the item is not in "confirmed" state, its associated metada
is also updated. Otherwise only the file metadata is updated
(like extracted text etc).
security:
- authTokenHeader: []
parameters: