Store used language for processing on attachmentmeta

Issue: #570
This commit is contained in:
Eike Kettner
2021-01-15 23:30:49 +01:00
parent 6cf3f9be5a
commit a70e9ab614
9 changed files with 113 additions and 14 deletions

View File

@ -78,7 +78,14 @@ object AttachmentPageCount {
s"No attachmentmeta record exists for ${ra.id.id}. Creating new."
) *> ctx.store.transact(
RAttachmentMeta.insert(
RAttachmentMeta(ra.id, None, Nil, MetaProposalList.empty, md.pageCount.some)
RAttachmentMeta(
ra.id,
None,
Nil,
MetaProposalList.empty,
md.pageCount.some,
None
)
)
)
else 0.pure[F]

View File

@ -108,7 +108,18 @@ object ConvertPdf {
ctx.logger.info(s"Conversion to pdf+txt successful. Saving file.") *>
storePDF(ctx, cfg, ra, pdf)
.flatMap(r =>
txt.map(t => (r, item.changeMeta(ra.id, _.setContentIfEmpty(t.some)).some))
txt.map(t =>
(
r,
item
.changeMeta(
ra.id,
ctx.args.meta.language,
_.setContentIfEmpty(t.some)
)
.some
)
)
)
case ConversionResult.UnsupportedFormat(mt) =>

View File

@ -32,8 +32,12 @@ case class ItemData(
def findDates(rm: RAttachmentMeta): Vector[NerDateLabel] =
dateLabels.find(m => m.rm.id == rm.id).map(_.dates).getOrElse(Vector.empty)
def mapMeta(attachId: Ident, f: RAttachmentMeta => RAttachmentMeta): ItemData = {
val item = changeMeta(attachId, f)
def mapMeta(
attachId: Ident,
lang: Language,
f: RAttachmentMeta => RAttachmentMeta
): ItemData = {
val item = changeMeta(attachId, lang, f)
val next = metas.map(a => if (a.id == attachId) item else a)
copy(metas = next)
}
@ -43,13 +47,14 @@ case class ItemData(
def changeMeta(
attachId: Ident,
lang: Language,
f: RAttachmentMeta => RAttachmentMeta
): RAttachmentMeta =
f(findOrCreate(attachId))
f(findOrCreate(attachId, lang))
def findOrCreate(attachId: Ident): RAttachmentMeta =
def findOrCreate(attachId: Ident, lang: Language): RAttachmentMeta =
metas.find(_.id == attachId).getOrElse {
RAttachmentMeta.empty(attachId)
RAttachmentMeta.empty(attachId, lang)
}
}

View File

@ -78,7 +78,7 @@ object TextExtraction {
pair._2
)
val rm = item.findOrCreate(ra.id)
val rm = item.findOrCreate(ra.id, lang)
rm.content match {
case Some(_) =>
ctx.logger.info("TextExtraction skipped, since text is already available.") *>
@ -102,6 +102,7 @@ object TextExtraction {
res <- extractTextFallback(ctx, cfg, ra, lang)(fids)
meta = item.changeMeta(
ra.id,
lang,
rm =>
rm.setContentIfEmpty(
res.map(_.appendPdfMetaToText.text.trim).filter(_.nonEmpty)