mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-07 15:45:59 +00:00
Reprocessing now sets metadata to an item if not in state confirmed
When reprocessing an item, the metadat of all *files* are replaced. This change now also sets some metadat to an item, but only if the item is not in state "confirmed". Confirmed items are not touched, but the metadata of the files is updated.
This commit is contained in:
parent
ebaa31898e
commit
058c31e1f6
@ -113,7 +113,7 @@ object JoexAppImpl {
|
|||||||
.withTask(
|
.withTask(
|
||||||
JobTask.json(
|
JobTask.json(
|
||||||
ReProcessItemArgs.taskName,
|
ReProcessItemArgs.taskName,
|
||||||
ReProcessItem[F](cfg, fts, analyser, regexNer),
|
ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer),
|
||||||
ReProcessItem.onCancel[F]
|
ReProcessItem.onCancel[F]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -10,11 +10,19 @@ import docspell.store.records.RItem
|
|||||||
|
|
||||||
object LinkProposal {
|
object LinkProposal {
|
||||||
|
|
||||||
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
def onlyNew[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||||
if (data.item.state.isValid)
|
if (data.item.state.isValid)
|
||||||
Task
|
Task
|
||||||
.log[F, ProcessItemArgs](_.debug(s"Not linking proposals on existing item"))
|
.log[F, ProcessItemArgs](_.debug(s"Not linking proposals on existing item"))
|
||||||
.map(_ => data)
|
.map(_ => data)
|
||||||
|
else
|
||||||
|
LinkProposal[F](data)
|
||||||
|
|
||||||
|
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||||
|
if (data.item.state == ItemState.Confirmed)
|
||||||
|
Task
|
||||||
|
.log[F, ProcessItemArgs](_.debug(s"Not linking proposals on confirmed item"))
|
||||||
|
.map(_ => data)
|
||||||
else
|
else
|
||||||
Task { ctx =>
|
Task { ctx =>
|
||||||
val proposals = data.finalProposals
|
val proposals = data.finalProposals
|
||||||
|
@ -22,8 +22,8 @@ object ProcessItem {
|
|||||||
ExtractArchive(item)
|
ExtractArchive(item)
|
||||||
.flatMap(Task.setProgress(20))
|
.flatMap(Task.setProgress(20))
|
||||||
.flatMap(processAttachments0(cfg, fts, analyser, regexNer, (40, 60, 80)))
|
.flatMap(processAttachments0(cfg, fts, analyser, regexNer, (40, 60, 80)))
|
||||||
.flatMap(LinkProposal[F])
|
.flatMap(LinkProposal.onlyNew[F])
|
||||||
.flatMap(SetGivenData[F](itemOps))
|
.flatMap(SetGivenData.onlyNew[F](itemOps))
|
||||||
.flatMap(Task.setProgress(99))
|
.flatMap(Task.setProgress(99))
|
||||||
.flatMap(RemoveEmptyItem(itemOps))
|
.flatMap(RemoveEmptyItem(itemOps))
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ import cats.effect._
|
|||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.analysis.TextAnalyser
|
import docspell.analysis.TextAnalyser
|
||||||
|
import docspell.backend.ops.OItem
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.ftsclient.FtsClient
|
import docspell.ftsclient.FtsClient
|
||||||
import docspell.joex.Config
|
import docspell.joex.Config
|
||||||
@ -22,12 +23,17 @@ object ReProcessItem {
|
|||||||
def apply[F[_]: ConcurrentEffect: ContextShift](
|
def apply[F[_]: ConcurrentEffect: ContextShift](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
|
itemOps: OItem[F],
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
regexNer: RegexNerFile[F]
|
regexNer: RegexNerFile[F]
|
||||||
): Task[F, Args, Unit] =
|
): Task[F, Args, Unit] =
|
||||||
loadItem[F]
|
Task
|
||||||
.flatMap(safeProcess[F](cfg, fts, analyser, regexNer))
|
.log[F, Args](_.info("===== Start reprocessing ======"))
|
||||||
.map(_ => ())
|
.flatMap(_ =>
|
||||||
|
loadItem[F]
|
||||||
|
.flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer))
|
||||||
|
.map(_ => ())
|
||||||
|
)
|
||||||
|
|
||||||
def onCancel[F[_]]: Task[F, Args, Unit] =
|
def onCancel[F[_]]: Task[F, Args, Unit] =
|
||||||
logWarn("Now cancelling re-processing.")
|
logWarn("Now cancelling re-processing.")
|
||||||
@ -58,6 +64,11 @@ object ReProcessItem {
|
|||||||
a.copy(fileId = src.fileId, name = src.name)
|
a.copy(fileId = src.fileId, name = src.name)
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
_ <- OptionT.liftF(
|
||||||
|
ctx.logger.debug(
|
||||||
|
s"Loaded item and ${attachSrc.size} attachments to reprocess"
|
||||||
|
)
|
||||||
|
)
|
||||||
} yield ItemData(
|
} yield ItemData(
|
||||||
item,
|
item,
|
||||||
attachSrc,
|
attachSrc,
|
||||||
@ -76,6 +87,7 @@ object ReProcessItem {
|
|||||||
def processFiles[F[_]: ConcurrentEffect: ContextShift](
|
def processFiles[F[_]: ConcurrentEffect: ContextShift](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
|
itemOps: OItem[F],
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
regexNer: RegexNerFile[F],
|
regexNer: RegexNerFile[F],
|
||||||
data: ItemData
|
data: ItemData
|
||||||
@ -89,9 +101,9 @@ object ReProcessItem {
|
|||||||
data.item.cid,
|
data.item.cid,
|
||||||
args.itemId.some,
|
args.itemId.some,
|
||||||
lang,
|
lang,
|
||||||
None, //direction
|
None, //direction
|
||||||
"", //source-id
|
data.item.source, //source-id
|
||||||
None, //folder
|
None, //folder
|
||||||
Seq.empty,
|
Seq.empty,
|
||||||
false,
|
false,
|
||||||
None,
|
None,
|
||||||
@ -103,6 +115,8 @@ object ReProcessItem {
|
|||||||
getLanguage[F].flatMap { lang =>
|
getLanguage[F].flatMap { lang =>
|
||||||
ProcessItem
|
ProcessItem
|
||||||
.processAttachments[F](cfg, fts, analyser, regexNer)(data)
|
.processAttachments[F](cfg, fts, analyser, regexNer)(data)
|
||||||
|
.flatMap(LinkProposal[F])
|
||||||
|
.flatMap(SetGivenData[F](itemOps))
|
||||||
.contramap[Args](convertArgs(lang))
|
.contramap[Args](convertArgs(lang))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -121,12 +135,13 @@ object ReProcessItem {
|
|||||||
def safeProcess[F[_]: ConcurrentEffect: ContextShift](
|
def safeProcess[F[_]: ConcurrentEffect: ContextShift](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
|
itemOps: OItem[F],
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
regexNer: RegexNerFile[F]
|
regexNer: RegexNerFile[F]
|
||||||
)(data: ItemData): Task[F, Args, ItemData] =
|
)(data: ItemData): Task[F, Args, ItemData] =
|
||||||
isLastRetry[F].flatMap {
|
isLastRetry[F].flatMap {
|
||||||
case true =>
|
case true =>
|
||||||
processFiles[F](cfg, fts, analyser, regexNer, data).attempt
|
processFiles[F](cfg, fts, itemOps, analyser, regexNer, data).attempt
|
||||||
.flatMap({
|
.flatMap({
|
||||||
case Right(d) =>
|
case Right(d) =>
|
||||||
Task.pure(d)
|
Task.pure(d)
|
||||||
@ -136,7 +151,7 @@ object ReProcessItem {
|
|||||||
).andThen(_ => Sync[F].raiseError(ex))
|
).andThen(_ => Sync[F].raiseError(ex))
|
||||||
})
|
})
|
||||||
case false =>
|
case false =>
|
||||||
processFiles[F](cfg, fts, analyser, regexNer, data)
|
processFiles[F](cfg, fts, itemOps, analyser, regexNer, data)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] =
|
private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] =
|
||||||
|
@ -8,13 +8,20 @@ import docspell.common._
|
|||||||
import docspell.joex.scheduler.Task
|
import docspell.joex.scheduler.Task
|
||||||
|
|
||||||
object SetGivenData {
|
object SetGivenData {
|
||||||
|
type Args = ProcessItemArgs
|
||||||
|
|
||||||
def apply[F[_]: Sync](
|
def onlyNew[F[_]: Sync](ops: OItem[F])(data: ItemData): Task[F, Args, ItemData] =
|
||||||
ops: OItem[F]
|
|
||||||
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
|
||||||
if (data.item.state.isValid)
|
if (data.item.state.isValid)
|
||||||
Task
|
Task
|
||||||
.log[F, ProcessItemArgs](_.debug(s"Not setting data on existing item"))
|
.log[F, Args](_.debug(s"Not setting data on existing item"))
|
||||||
|
.map(_ => data)
|
||||||
|
else
|
||||||
|
SetGivenData[F](ops)(data)
|
||||||
|
|
||||||
|
def apply[F[_]: Sync](ops: OItem[F])(data: ItemData): Task[F, Args, ItemData] =
|
||||||
|
if (data.item.state == ItemState.Confirmed)
|
||||||
|
Task
|
||||||
|
.log[F, Args](_.debug(s"Not setting data on confirmed item"))
|
||||||
.map(_ => data)
|
.map(_ => data)
|
||||||
else
|
else
|
||||||
setFolder(data, ops).flatMap(d => setTags[F](d, ops))
|
setFolder(data, ops).flatMap(d => setTags[F](d, ops))
|
||||||
@ -22,7 +29,7 @@ object SetGivenData {
|
|||||||
private def setFolder[F[_]: Sync](
|
private def setFolder[F[_]: Sync](
|
||||||
data: ItemData,
|
data: ItemData,
|
||||||
ops: OItem[F]
|
ops: OItem[F]
|
||||||
): Task[F, ProcessItemArgs, ItemData] =
|
): Task[F, Args, ItemData] =
|
||||||
Task { ctx =>
|
Task { ctx =>
|
||||||
val itemId = data.item.id
|
val itemId = data.item.id
|
||||||
val folderId = ctx.args.meta.folderId
|
val folderId = ctx.args.meta.folderId
|
||||||
@ -41,7 +48,7 @@ object SetGivenData {
|
|||||||
private def setTags[F[_]: Sync](
|
private def setTags[F[_]: Sync](
|
||||||
data: ItemData,
|
data: ItemData,
|
||||||
ops: OItem[F]
|
ops: OItem[F]
|
||||||
): Task[F, ProcessItemArgs, ItemData] =
|
): Task[F, Args, ItemData] =
|
||||||
Task { ctx =>
|
Task { ctx =>
|
||||||
val itemId = data.item.id
|
val itemId = data.item.id
|
||||||
val collective = ctx.args.meta.collective
|
val collective = ctx.args.meta.collective
|
||||||
|
@ -2113,7 +2113,11 @@ paths:
|
|||||||
summary: Start reprocessing the files of the item.
|
summary: Start reprocessing the files of the item.
|
||||||
description: |
|
description: |
|
||||||
This submits a job that will re-process the files (either all
|
This submits a job that will re-process the files (either all
|
||||||
or the ones specified) of the item and replace the metadata.
|
or the ones specified) of the item and replace their metadata.
|
||||||
|
|
||||||
|
If the item is not in "confirmed" state, its associated metada
|
||||||
|
is also updated. Otherwise only the file metadata is updated
|
||||||
|
(like extracted text etc).
|
||||||
security:
|
security:
|
||||||
- authTokenHeader: []
|
- authTokenHeader: []
|
||||||
parameters:
|
parameters:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user