mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-21 18:08:25 +00:00
Use ocrmypdf tool to create pdf/a during conversion
- Use another external tool to convert pdf to pdf which also adds the extracted text as another layer into the pdf - Although not used, the external conversion routine will now check for an existing text file that is named as the pdf file with extension `.txt`. If present it is included in the conversion result and will be used as the extracted text. - text extraction for pdf files happens now on the converted file, because it may already contain the text from the conversion step and thus avoids running OCR twice. - All errors during conversion are not fatal; processing continues without a converted file.
This commit is contained in:
@ -3,8 +3,10 @@ package docspell.store.records
|
||||
import docspell.common._
|
||||
import docspell.store.impl.Implicits._
|
||||
import docspell.store.impl._
|
||||
import docspell.store.syntax.MimeTypes._
|
||||
|
||||
import bitpeace.FileMeta
|
||||
import bitpeace.Mimetype
|
||||
import doobie._
|
||||
import doobie.implicits._
|
||||
|
||||
@ -30,4 +32,13 @@ object RFileMeta {
|
||||
|
||||
selectSimple(Columns.all, table, Columns.id.is(fid)).query[FileMeta].option
|
||||
}
|
||||
|
||||
def findMime(fid: Ident): ConnectionIO[Option[MimeType]] = {
|
||||
import bitpeace.sql._
|
||||
|
||||
selectSimple(Seq(Columns.mimetype), table, Columns.id.is(fid))
|
||||
.query[Mimetype]
|
||||
.option
|
||||
.map(_.map(_.toLocal))
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user