mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-21 18:08:25 +00:00
Add pdf meta data to extracted text to add it to full-text index
This commit is contained in:
@ -97,7 +97,10 @@ object TextExtraction {
|
||||
res <- extractTextFallback(ctx, cfg, ra, lang)(fids)
|
||||
meta = item.changeMeta(
|
||||
ra.id,
|
||||
rm => rm.setContentIfEmpty(res.map(_.text.trim).filter(_.nonEmpty))
|
||||
rm =>
|
||||
rm.setContentIfEmpty(
|
||||
res.map(_.appendPdfMetaToText.text.trim).filter(_.nonEmpty)
|
||||
)
|
||||
)
|
||||
tags = res.flatMap(_.pdfMeta).map(_.keywordList).getOrElse(Nil)
|
||||
est <- dst
|
||||
|
Reference in New Issue
Block a user