Add pdf meta data to extracted text to add it to full-text index

This commit is contained in:
Eike Kettner
2020-07-19 01:07:49 +02:00
parent 209c068436
commit cec4948710
3 changed files with 19 additions and 1 deletions

View File

@ -97,7 +97,10 @@ object TextExtraction {
res <- extractTextFallback(ctx, cfg, ra, lang)(fids)
meta = item.changeMeta(
ra.id,
rm => rm.setContentIfEmpty(res.map(_.text.trim).filter(_.nonEmpty))
rm =>
rm.setContentIfEmpty(
res.map(_.appendPdfMetaToText.text.trim).filter(_.nonEmpty)
)
)
tags = res.flatMap(_.pdfMeta).map(_.keywordList).getOrElse(Nil)
est <- dst