mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-21 18:08:25 +00:00
Convert some files to pdf
This commit is contained in:
@ -2,4 +2,4 @@ package docspell.extract
|
||||
|
||||
import docspell.extract.ocr.OcrConfig
|
||||
|
||||
case class ExtractConfig(ocr: OcrConfig, pdf: PdfConfig)
|
||||
case class ExtractConfig(maxImageSize: Int, ocr: OcrConfig, pdf: PdfConfig)
|
||||
|
@ -9,6 +9,7 @@ import docspell.extract.poi.{PoiExtract, PoiType}
|
||||
import docspell.extract.rtf.RtfExtract
|
||||
import fs2.Stream
|
||||
import docspell.files.TikaMimetype
|
||||
import docspell.files.ImageSize
|
||||
|
||||
trait Extraction[F[_]] {
|
||||
|
||||
@ -44,14 +45,29 @@ object Extraction {
|
||||
case OdfType(_) =>
|
||||
OdfExtract.get(data).map(ExtractResult.fromEither)
|
||||
|
||||
case OcrType(_) =>
|
||||
TextExtract
|
||||
case OcrType(mt) =>
|
||||
val doExtract = TextExtract
|
||||
.extractOCR(data, blocker, logger, lang.iso3, cfg.ocr)
|
||||
.compile
|
||||
.lastOrError
|
||||
.attempt
|
||||
.map(ExtractResult.fromEither)
|
||||
|
||||
ImageSize.get(data).flatMap {
|
||||
case Some(dim) =>
|
||||
if (dim.product > cfg.maxImageSize) {
|
||||
logger.info(s"Image size (${dim.product}) is too large (max ${cfg.maxImageSize}).") *>
|
||||
ExtractResult.failure(new Exception(
|
||||
s"Image size (${dim.width}x${dim.height}) is too large (max ${cfg.maxImageSize}).")
|
||||
).pure[F]
|
||||
} else {
|
||||
doExtract
|
||||
}
|
||||
case None =>
|
||||
logger.info(s"Cannot read image data from ${mt.asString}. Extracting anyways.") *>
|
||||
doExtract
|
||||
}
|
||||
|
||||
case OdfType.container =>
|
||||
logger.info(s"File detected as ${OdfType.container}. Try to read as OpenDocument file.") *>
|
||||
OdfExtract.get(data).map(ExtractResult.fromEither)
|
||||
|
@ -4,10 +4,10 @@ import docspell.common.MimeType
|
||||
|
||||
object OdfType {
|
||||
|
||||
val odt = MimeType.application("application/vnd.oasis.opendocument.text")
|
||||
val ods = MimeType.application("application/vnd.oasis.opendocument.spreadsheet")
|
||||
val odtAlias = MimeType.application("application/x-vnd.oasis.opendocument.text")
|
||||
val odsAlias = MimeType.application("application/x-vnd.oasis.opendocument.spreadsheet")
|
||||
val odt = MimeType.application("vnd.oasis.opendocument.text")
|
||||
val ods = MimeType.application("vnd.oasis.opendocument.spreadsheet")
|
||||
val odtAlias = MimeType.application("x-vnd.oasis.opendocument.text")
|
||||
val odsAlias = MimeType.application("x-vnd.oasis.opendocument.spreadsheet")
|
||||
|
||||
val container = MimeType.zip
|
||||
|
||||
|
Reference in New Issue
Block a user