mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Reformat with scalafmt 3.0.0
This commit is contained in:
@ -62,8 +62,8 @@ object Ocr {
|
||||
): Stream[F, String] =
|
||||
runTesseractFile(img, logger, lang, config)
|
||||
|
||||
/** Run ghostscript to extract all pdf pages into tiff files. The
|
||||
* files are stored to a temporary location on disk and returned.
|
||||
/** Run ghostscript to extract all pdf pages into tiff files. The files are stored to a
|
||||
* temporary location on disk and returned.
|
||||
*/
|
||||
private[extract] def runGhostscript[F[_]: Async](
|
||||
pdf: Stream[F, Byte],
|
||||
@ -88,8 +88,8 @@ object Ocr {
|
||||
.flatMap(_ => File.listFiles(pathEndsWith(".tif"), wd))
|
||||
}
|
||||
|
||||
/** Run ghostscript to extract all pdf pages into tiff files. The
|
||||
* files are stored to a temporary location on disk and returned.
|
||||
/** Run ghostscript to extract all pdf pages into tiff files. The files are stored to a
|
||||
* temporary location on disk and returned.
|
||||
*/
|
||||
private[extract] def runGhostscriptFile[F[_]: Async](
|
||||
pdf: Path,
|
||||
@ -111,8 +111,8 @@ object Ocr {
|
||||
private def pathEndsWith(ext: String): Path => Boolean =
|
||||
p => p.fileName.toString.endsWith(ext)
|
||||
|
||||
/** Run unpaper to optimize the image for ocr. The
|
||||
* files are stored to a temporary location on disk and returned.
|
||||
/** Run unpaper to optimize the image for ocr. The files are stored to a temporary
|
||||
* location on disk and returned.
|
||||
*/
|
||||
private[extract] def runUnpaperFile[F[_]: Async](
|
||||
img: Path,
|
||||
@ -139,8 +139,7 @@ object Ocr {
|
||||
}
|
||||
}
|
||||
|
||||
/** Run tesseract on the given image file and return the extracted
|
||||
* text.
|
||||
/** Run tesseract on the given image file and return the extracted text.
|
||||
*/
|
||||
private[extract] def runTesseractFile[F[_]: Async](
|
||||
img: Path,
|
||||
@ -160,8 +159,7 @@ object Ocr {
|
||||
.map(_.stdout)
|
||||
}
|
||||
|
||||
/** Run tesseract on the given image file and return the extracted
|
||||
* text.
|
||||
/** Run tesseract on the given image file and return the extracted text.
|
||||
*/
|
||||
private[extract] def runTesseractStdin[F[_]: Async](
|
||||
img: Stream[F, Byte],
|
||||
|
@ -31,7 +31,7 @@ object TextExtract {
|
||||
): Stream[F, Text] =
|
||||
Stream
|
||||
.eval(TikaMimetype.detect(in, MimeTypeHint.none))
|
||||
.flatMap({
|
||||
.flatMap {
|
||||
case MimeType.pdf =>
|
||||
Stream.eval(Ocr.extractPdf(in, logger, lang, config)).unNoneTerminate
|
||||
|
||||
@ -40,7 +40,7 @@ object TextExtract {
|
||||
|
||||
case mt =>
|
||||
raiseError(s"File `$mt` not supported")
|
||||
})
|
||||
}
|
||||
.map(Text.apply)
|
||||
|
||||
private def raiseError[F[_]: Sync](msg: String): Stream[F, Nothing] =
|
||||
|
@ -49,17 +49,13 @@ object PoiExtract {
|
||||
case PoiType.docx =>
|
||||
getDocx(data)
|
||||
case PoiType.msoffice =>
|
||||
EitherT(getDoc[F](data))
|
||||
.recoverWith({ case _ =>
|
||||
EitherT(getXls[F](data))
|
||||
})
|
||||
.value
|
||||
EitherT(getDoc[F](data)).recoverWith { case _ =>
|
||||
EitherT(getXls[F](data))
|
||||
}.value
|
||||
case PoiType.ooxml =>
|
||||
EitherT(getDocx[F](data))
|
||||
.recoverWith({ case _ =>
|
||||
EitherT(getXlsx[F](data))
|
||||
})
|
||||
.value
|
||||
EitherT(getDocx[F](data)).recoverWith { case _ =>
|
||||
EitherT(getXlsx[F](data))
|
||||
}.value
|
||||
case mt =>
|
||||
Sync[F].pure(Left(new Exception(s"Unsupported content: ${mt.asString}")))
|
||||
}
|
||||
|
Reference in New Issue
Block a user