mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-07 07:35:59 +00:00
Redo pdf conversion and text extraction on reprocess
When processing a new file conversion and text extraction is skipped if detected to be already done. This prevents running expensive tasks again after restarting/retrying. When explicitely reprocessing a file, these tasks should run again and replace the existing results.
This commit is contained in:
parent
a7ee0aa08b
commit
f8bd42e5bd
@ -40,14 +40,14 @@ object ConvertPdf {
|
|||||||
Task { ctx =>
|
Task { ctx =>
|
||||||
def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] =
|
def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] =
|
||||||
isConverted(ctx)(ra).flatMap {
|
isConverted(ctx)(ra).flatMap {
|
||||||
case true =>
|
case true if ctx.args.isNormalProcessing =>
|
||||||
ctx.logger.info(
|
ctx.logger.info(
|
||||||
s"Conversion to pdf already done for attachment ${ra.name}."
|
s"Conversion to pdf already done for attachment ${ra.name}."
|
||||||
) *>
|
) *>
|
||||||
ctx.store
|
ctx.store
|
||||||
.transact(RAttachmentMeta.findById(ra.id))
|
.transact(RAttachmentMeta.findById(ra.id))
|
||||||
.map(rmOpt => (ra, rmOpt))
|
.map(rmOpt => (ra, rmOpt))
|
||||||
case false =>
|
case _ =>
|
||||||
findMime(ctx)(ra).flatMap(m =>
|
findMime(ctx)(ra).flatMap(m =>
|
||||||
convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m)
|
convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m)
|
||||||
)
|
)
|
||||||
|
@ -84,10 +84,10 @@ object TextExtraction {
|
|||||||
|
|
||||||
val rm = item.findOrCreate(ra.id, lang)
|
val rm = item.findOrCreate(ra.id, lang)
|
||||||
rm.content match {
|
rm.content match {
|
||||||
case Some(_) =>
|
case Some(_) if ctx.args.isNormalProcessing =>
|
||||||
ctx.logger.info("TextExtraction skipped, since text is already available.") *>
|
ctx.logger.info("TextExtraction skipped, since text is already available.") *>
|
||||||
makeTextData((rm, Nil)).pure[F]
|
makeTextData((rm, Nil)).pure[F]
|
||||||
case None =>
|
case _ =>
|
||||||
extractTextToMeta[F](ctx, cfg, lang, item)(ra)
|
extractTextToMeta[F](ctx, cfg, lang, item)(ra)
|
||||||
.map(makeTextData)
|
.map(makeTextData)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user