mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Update scalafmt settings
This commit is contained in:
@ -20,14 +20,14 @@ object ExtractResult {
|
||||
|
||||
case class UnsupportedFormat(mime: MimeType) extends ExtractResult {
|
||||
val textOption = None
|
||||
val pdfMeta = None
|
||||
val pdfMeta = None
|
||||
}
|
||||
def unsupportedFormat(mt: MimeType): ExtractResult =
|
||||
UnsupportedFormat(mt)
|
||||
|
||||
case class Failure(ex: Throwable) extends ExtractResult {
|
||||
val textOption = None
|
||||
val pdfMeta = None
|
||||
val pdfMeta = None
|
||||
}
|
||||
def failure(ex: Throwable): ExtractResult =
|
||||
Failure(ex)
|
||||
|
@ -14,8 +14,7 @@ import docspell.common._
|
||||
|
||||
object Ocr {
|
||||
|
||||
/** Extract the text of all pages in the given pdf file.
|
||||
*/
|
||||
/** Extract the text of all pages in the given pdf file. */
|
||||
def extractPdf[F[_]: Async](
|
||||
pdf: Stream[F, Byte],
|
||||
logger: Logger[F],
|
||||
@ -30,8 +29,7 @@ object Ocr {
|
||||
.last
|
||||
}
|
||||
|
||||
/** Extract the text from the given image file
|
||||
*/
|
||||
/** Extract the text from the given image file */
|
||||
def extractImage[F[_]: Async](
|
||||
img: Stream[F, Byte],
|
||||
logger: Logger[F],
|
||||
@ -79,7 +77,7 @@ object Ocr {
|
||||
.copy(args = xargs)
|
||||
.replace(
|
||||
Map(
|
||||
"{{infile}}" -> "-",
|
||||
"{{infile}}" -> "-",
|
||||
"{{outfile}}" -> "%d.tif"
|
||||
)
|
||||
)
|
||||
@ -99,7 +97,7 @@ object Ocr {
|
||||
): Stream[F, Path] = {
|
||||
val cmd = ghostscript.replace(
|
||||
Map(
|
||||
"{{infile}}" -> pdf.absolute.toString,
|
||||
"{{infile}}" -> pdf.absolute.toString,
|
||||
"{{outfile}}" -> "%d.tif"
|
||||
)
|
||||
)
|
||||
@ -123,7 +121,7 @@ object Ocr {
|
||||
val targetFile = img.resolveSibling("u-" + img.fileName.toString).absolute
|
||||
val cmd = unpaper.replace(
|
||||
Map(
|
||||
"{{infile}}" -> img.absolute.toString,
|
||||
"{{infile}}" -> img.absolute.toString,
|
||||
"{{outfile}}" -> targetFile.toString
|
||||
)
|
||||
)
|
||||
@ -139,8 +137,7 @@ object Ocr {
|
||||
}
|
||||
}
|
||||
|
||||
/** Run tesseract on the given image file and return the extracted text.
|
||||
*/
|
||||
/** Run tesseract on the given image file and return the extracted text. */
|
||||
private[extract] def runTesseractFile[F[_]: Async](
|
||||
img: Path,
|
||||
logger: Logger[F],
|
||||
@ -159,8 +156,7 @@ object Ocr {
|
||||
.map(_.stdout)
|
||||
}
|
||||
|
||||
/** Run tesseract on the given image file and return the extracted text.
|
||||
*/
|
||||
/** Run tesseract on the given image file and return the extracted text. */
|
||||
private[extract] def runTesseractStdin[F[_]: Async](
|
||||
img: Stream[F, Byte],
|
||||
logger: Logger[F],
|
||||
|
@ -11,9 +11,9 @@ import docspell.common.MimeType
|
||||
object OcrType {
|
||||
|
||||
val jpeg = MimeType.jpeg
|
||||
val png = MimeType.png
|
||||
val png = MimeType.png
|
||||
val tiff = MimeType.tiff
|
||||
val pdf = MimeType.pdf
|
||||
val pdf = MimeType.pdf
|
||||
|
||||
val all = Set(jpeg, png, tiff, pdf)
|
||||
|
||||
|
@ -28,9 +28,9 @@ object OdfExtract {
|
||||
|
||||
def get(is: InputStream) =
|
||||
Try {
|
||||
val handler = new BodyContentHandler()
|
||||
val pctx = new ParseContext()
|
||||
val meta = new Metadata()
|
||||
val handler = new BodyContentHandler()
|
||||
val pctx = new ParseContext()
|
||||
val meta = new Metadata()
|
||||
val ooparser = new OpenDocumentParser()
|
||||
ooparser.parse(is, handler, meta, pctx)
|
||||
Text(Option(handler.toString))
|
||||
|
@ -10,8 +10,8 @@ import docspell.common.MimeType
|
||||
|
||||
object OdfType {
|
||||
|
||||
val odt = MimeType.application("vnd.oasis.opendocument.text")
|
||||
val ods = MimeType.application("vnd.oasis.opendocument.spreadsheet")
|
||||
val odt = MimeType.application("vnd.oasis.opendocument.text")
|
||||
val ods = MimeType.application("vnd.oasis.opendocument.spreadsheet")
|
||||
val odtAlias = MimeType.application("x-vnd.oasis.opendocument.text")
|
||||
val odsAlias = MimeType.application("x-vnd.oasis.opendocument.spreadsheet")
|
||||
|
||||
|
@ -30,7 +30,7 @@ object PdfboxExtract {
|
||||
.withDocumentStream(data) { doc =>
|
||||
(for {
|
||||
txt <- readText(doc)
|
||||
md <- readMetaData(doc)
|
||||
md <- readMetaData(doc)
|
||||
} yield (txt, Some(md).filter(_.nonEmpty))).pure[F]
|
||||
}
|
||||
.attempt
|
||||
|
@ -11,12 +11,12 @@ import docspell.common.MimeType
|
||||
object PoiType {
|
||||
|
||||
val msoffice = MimeType.application("x-tika-msoffice")
|
||||
val ooxml = MimeType.application("x-tika-ooxml")
|
||||
val ooxml = MimeType.application("x-tika-ooxml")
|
||||
val docx =
|
||||
MimeType.application("vnd.openxmlformats-officedocument.wordprocessingml.document")
|
||||
val xlsx = MimeType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
||||
val xls = MimeType.application("vnd.ms-excel")
|
||||
val doc = MimeType.application("msword")
|
||||
val xls = MimeType.application("vnd.ms-excel")
|
||||
val doc = MimeType.application("msword")
|
||||
|
||||
val all = Set(msoffice, ooxml, docx, xlsx, xls, doc)
|
||||
|
||||
|
Reference in New Issue
Block a user