mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-21 18:08:25 +00:00
Add pdf meta data to extracted text to add it to full-text index
This commit is contained in:
@ -28,6 +28,12 @@ object ExtractResult {
|
||||
|
||||
case class Success(text: String, pdfMeta: Option[PdfMetaData]) extends ExtractResult {
|
||||
val textOption = Some(text)
|
||||
def appendPdfMetaToText: Success =
|
||||
pdfMeta.flatMap(_.asText) match {
|
||||
case Some(m) =>
|
||||
copy(text = text + "\n\n" + m)
|
||||
case None => this
|
||||
}
|
||||
}
|
||||
def success(text: String, pdfMeta: Option[PdfMetaData]): ExtractResult =
|
||||
Success(text, pdfMeta)
|
||||
|
@ -24,6 +24,15 @@ final case class PdfMetaData(
|
||||
|
||||
def keywordList: List[String] =
|
||||
keywords.map(kws => kws.split("[,;]\\s*").toList).getOrElse(Nil)
|
||||
|
||||
/** Return all data in lines, except keywords. Keywords are handled separately. */
|
||||
def asText: Option[String] =
|
||||
(title.toList ++ author.toList ++ subject.toList ++ creationDate.toList.map(
|
||||
_.toUtcDate.toString
|
||||
)) match {
|
||||
case Nil => None
|
||||
case list => Some(list.mkString("\n"))
|
||||
}
|
||||
}
|
||||
|
||||
object PdfMetaData {
|
||||
|
Reference in New Issue
Block a user