mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-04 18:39:33 +00:00
Redo pdf conversion and text extraction on reprocess
When processing a new file conversion and text extraction is skipped if detected to be already done. This prevents running expensive tasks again after restarting/retrying. When explicitely reprocessing a file, these tasks should run again and replace the existing results.
This commit is contained in:
parent
a7ee0aa08b
commit
f8bd42e5bd
@ -40,14 +40,14 @@ object ConvertPdf {
|
||||
Task { ctx =>
|
||||
def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] =
|
||||
isConverted(ctx)(ra).flatMap {
|
||||
case true =>
|
||||
case true if ctx.args.isNormalProcessing =>
|
||||
ctx.logger.info(
|
||||
s"Conversion to pdf already done for attachment ${ra.name}."
|
||||
) *>
|
||||
ctx.store
|
||||
.transact(RAttachmentMeta.findById(ra.id))
|
||||
.map(rmOpt => (ra, rmOpt))
|
||||
case false =>
|
||||
case _ =>
|
||||
findMime(ctx)(ra).flatMap(m =>
|
||||
convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m)
|
||||
)
|
||||
|
@ -84,10 +84,10 @@ object TextExtraction {
|
||||
|
||||
val rm = item.findOrCreate(ra.id, lang)
|
||||
rm.content match {
|
||||
case Some(_) =>
|
||||
case Some(_) if ctx.args.isNormalProcessing =>
|
||||
ctx.logger.info("TextExtraction skipped, since text is already available.") *>
|
||||
makeTextData((rm, Nil)).pure[F]
|
||||
case None =>
|
||||
case _ =>
|
||||
extractTextToMeta[F](ctx, cfg, lang, item)(ra)
|
||||
.map(makeTextData)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user