mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 10:28:27 +00:00
@ -78,7 +78,14 @@ object AttachmentPageCount {
|
||||
s"No attachmentmeta record exists for ${ra.id.id}. Creating new."
|
||||
) *> ctx.store.transact(
|
||||
RAttachmentMeta.insert(
|
||||
RAttachmentMeta(ra.id, None, Nil, MetaProposalList.empty, md.pageCount.some)
|
||||
RAttachmentMeta(
|
||||
ra.id,
|
||||
None,
|
||||
Nil,
|
||||
MetaProposalList.empty,
|
||||
md.pageCount.some,
|
||||
None
|
||||
)
|
||||
)
|
||||
)
|
||||
else 0.pure[F]
|
||||
|
@ -108,7 +108,18 @@ object ConvertPdf {
|
||||
ctx.logger.info(s"Conversion to pdf+txt successful. Saving file.") *>
|
||||
storePDF(ctx, cfg, ra, pdf)
|
||||
.flatMap(r =>
|
||||
txt.map(t => (r, item.changeMeta(ra.id, _.setContentIfEmpty(t.some)).some))
|
||||
txt.map(t =>
|
||||
(
|
||||
r,
|
||||
item
|
||||
.changeMeta(
|
||||
ra.id,
|
||||
ctx.args.meta.language,
|
||||
_.setContentIfEmpty(t.some)
|
||||
)
|
||||
.some
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
case ConversionResult.UnsupportedFormat(mt) =>
|
||||
|
@ -32,8 +32,12 @@ case class ItemData(
|
||||
def findDates(rm: RAttachmentMeta): Vector[NerDateLabel] =
|
||||
dateLabels.find(m => m.rm.id == rm.id).map(_.dates).getOrElse(Vector.empty)
|
||||
|
||||
def mapMeta(attachId: Ident, f: RAttachmentMeta => RAttachmentMeta): ItemData = {
|
||||
val item = changeMeta(attachId, f)
|
||||
def mapMeta(
|
||||
attachId: Ident,
|
||||
lang: Language,
|
||||
f: RAttachmentMeta => RAttachmentMeta
|
||||
): ItemData = {
|
||||
val item = changeMeta(attachId, lang, f)
|
||||
val next = metas.map(a => if (a.id == attachId) item else a)
|
||||
copy(metas = next)
|
||||
}
|
||||
@ -43,13 +47,14 @@ case class ItemData(
|
||||
|
||||
def changeMeta(
|
||||
attachId: Ident,
|
||||
lang: Language,
|
||||
f: RAttachmentMeta => RAttachmentMeta
|
||||
): RAttachmentMeta =
|
||||
f(findOrCreate(attachId))
|
||||
f(findOrCreate(attachId, lang))
|
||||
|
||||
def findOrCreate(attachId: Ident): RAttachmentMeta =
|
||||
def findOrCreate(attachId: Ident, lang: Language): RAttachmentMeta =
|
||||
metas.find(_.id == attachId).getOrElse {
|
||||
RAttachmentMeta.empty(attachId)
|
||||
RAttachmentMeta.empty(attachId, lang)
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -78,7 +78,7 @@ object TextExtraction {
|
||||
pair._2
|
||||
)
|
||||
|
||||
val rm = item.findOrCreate(ra.id)
|
||||
val rm = item.findOrCreate(ra.id, lang)
|
||||
rm.content match {
|
||||
case Some(_) =>
|
||||
ctx.logger.info("TextExtraction skipped, since text is already available.") *>
|
||||
@ -102,6 +102,7 @@ object TextExtraction {
|
||||
res <- extractTextFallback(ctx, cfg, ra, lang)(fids)
|
||||
meta = item.changeMeta(
|
||||
ra.id,
|
||||
lang,
|
||||
rm =>
|
||||
rm.setContentIfEmpty(
|
||||
res.map(_.appendPdfMetaToText.text.trim).filter(_.nonEmpty)
|
||||
|
Reference in New Issue
Block a user