Update scalafmt settings

This commit is contained in:
eikek
2021-09-22 17:23:24 +02:00
parent c37f1d7c31
commit 9013f2de5b
277 changed files with 1579 additions and 1615 deletions

View File

@ -20,14 +20,14 @@ object ExtractResult {
case class UnsupportedFormat(mime: MimeType) extends ExtractResult {
val textOption = None
val pdfMeta = None
val pdfMeta = None
}
def unsupportedFormat(mt: MimeType): ExtractResult =
UnsupportedFormat(mt)
case class Failure(ex: Throwable) extends ExtractResult {
val textOption = None
val pdfMeta = None
val pdfMeta = None
}
def failure(ex: Throwable): ExtractResult =
Failure(ex)

View File

@ -14,8 +14,7 @@ import docspell.common._
object Ocr {
/** Extract the text of all pages in the given pdf file.
*/
/** Extract the text of all pages in the given pdf file. */
def extractPdf[F[_]: Async](
pdf: Stream[F, Byte],
logger: Logger[F],
@ -30,8 +29,7 @@ object Ocr {
.last
}
/** Extract the text from the given image file
*/
/** Extract the text from the given image file */
def extractImage[F[_]: Async](
img: Stream[F, Byte],
logger: Logger[F],
@ -79,7 +77,7 @@ object Ocr {
.copy(args = xargs)
.replace(
Map(
"{{infile}}" -> "-",
"{{infile}}" -> "-",
"{{outfile}}" -> "%d.tif"
)
)
@ -99,7 +97,7 @@ object Ocr {
): Stream[F, Path] = {
val cmd = ghostscript.replace(
Map(
"{{infile}}" -> pdf.absolute.toString,
"{{infile}}" -> pdf.absolute.toString,
"{{outfile}}" -> "%d.tif"
)
)
@ -123,7 +121,7 @@ object Ocr {
val targetFile = img.resolveSibling("u-" + img.fileName.toString).absolute
val cmd = unpaper.replace(
Map(
"{{infile}}" -> img.absolute.toString,
"{{infile}}" -> img.absolute.toString,
"{{outfile}}" -> targetFile.toString
)
)
@ -139,8 +137,7 @@ object Ocr {
}
}
/** Run tesseract on the given image file and return the extracted text.
*/
/** Run tesseract on the given image file and return the extracted text. */
private[extract] def runTesseractFile[F[_]: Async](
img: Path,
logger: Logger[F],
@ -159,8 +156,7 @@ object Ocr {
.map(_.stdout)
}
/** Run tesseract on the given image file and return the extracted text.
*/
/** Run tesseract on the given image file and return the extracted text. */
private[extract] def runTesseractStdin[F[_]: Async](
img: Stream[F, Byte],
logger: Logger[F],

View File

@ -11,9 +11,9 @@ import docspell.common.MimeType
object OcrType {
val jpeg = MimeType.jpeg
val png = MimeType.png
val png = MimeType.png
val tiff = MimeType.tiff
val pdf = MimeType.pdf
val pdf = MimeType.pdf
val all = Set(jpeg, png, tiff, pdf)

View File

@ -28,9 +28,9 @@ object OdfExtract {
def get(is: InputStream) =
Try {
val handler = new BodyContentHandler()
val pctx = new ParseContext()
val meta = new Metadata()
val handler = new BodyContentHandler()
val pctx = new ParseContext()
val meta = new Metadata()
val ooparser = new OpenDocumentParser()
ooparser.parse(is, handler, meta, pctx)
Text(Option(handler.toString))

View File

@ -10,8 +10,8 @@ import docspell.common.MimeType
object OdfType {
val odt = MimeType.application("vnd.oasis.opendocument.text")
val ods = MimeType.application("vnd.oasis.opendocument.spreadsheet")
val odt = MimeType.application("vnd.oasis.opendocument.text")
val ods = MimeType.application("vnd.oasis.opendocument.spreadsheet")
val odtAlias = MimeType.application("x-vnd.oasis.opendocument.text")
val odsAlias = MimeType.application("x-vnd.oasis.opendocument.spreadsheet")

View File

@ -30,7 +30,7 @@ object PdfboxExtract {
.withDocumentStream(data) { doc =>
(for {
txt <- readText(doc)
md <- readMetaData(doc)
md <- readMetaData(doc)
} yield (txt, Some(md).filter(_.nonEmpty))).pure[F]
}
.attempt

View File

@ -11,12 +11,12 @@ import docspell.common.MimeType
object PoiType {
val msoffice = MimeType.application("x-tika-msoffice")
val ooxml = MimeType.application("x-tika-ooxml")
val ooxml = MimeType.application("x-tika-ooxml")
val docx =
MimeType.application("vnd.openxmlformats-officedocument.wordprocessingml.document")
val xlsx = MimeType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet")
val xls = MimeType.application("vnd.ms-excel")
val doc = MimeType.application("msword")
val xls = MimeType.application("vnd.ms-excel")
val doc = MimeType.application("msword")
val all = Set(msoffice, ooxml, docx, xlsx, xls, doc)

View File

@ -22,7 +22,7 @@ class OdfExtractTest extends FunSuite {
test("test extract from odt") {
files.foreach { case (file, len) =>
val is = file.toJavaUrl.map(_.openStream()).fold(sys.error, identity)
val is = file.toJavaUrl.map(_.openStream()).fold(sys.error, identity)
val str1 = OdfExtract.get(is).fold(throw _, identity)
assertEquals(str1.length, len)

View File

@ -22,20 +22,20 @@ class PdfboxExtractTest extends FunSuite {
test("extract text from text PDFs by inputstream") {
textPDFs.foreach { case (file, txt) =>
val url = file.toJavaUrl.fold(sys.error, identity)
val str = PdfboxExtract.getText(url.openStream()).fold(throw _, identity)
val url = file.toJavaUrl.fold(sys.error, identity)
val str = PdfboxExtract.getText(url.openStream()).fold(throw _, identity)
val received = removeFormatting(str.value)
val expect = removeFormatting(txt)
val expect = removeFormatting(txt)
assertEquals(received, expect)
}
}
test("extract text from text PDFs via Stream") {
textPDFs.foreach { case (file, txt) =>
val data = file.readURL[IO](8192)
val str = PdfboxExtract.getText(data).unsafeRunSync().fold(throw _, identity)
val data = file.readURL[IO](8192)
val str = PdfboxExtract.getText(data).unsafeRunSync().fold(throw _, identity)
val received = removeFormatting(str.value)
val expect = removeFormatting(txt)
val expect = removeFormatting(txt)
assertEquals(received, expect)
}
}

View File

@ -17,10 +17,10 @@ import munit._
class PoiExtractTest extends FunSuite {
val officeFiles = List(
ExampleFiles.examples_sample_doc -> 6241,
ExampleFiles.examples_sample_doc -> 6241,
ExampleFiles.examples_sample_docx -> 6179,
ExampleFiles.examples_sample_xlsx -> 660,
ExampleFiles.examples_sample_xls -> 660
ExampleFiles.examples_sample_xls -> 660
)
test("extract text from ms office files") {

View File

@ -14,8 +14,8 @@ class RtfExtractTest extends FunSuite {
test("extract text from rtf using java input-stream") {
val file = ExampleFiles.examples_sample_rtf
val is = file.toJavaUrl.map(_.openStream()).fold(sys.error, identity)
val str = RtfExtract.get(is).fold(throw _, identity)
val is = file.toJavaUrl.map(_.openStream()).fold(sys.error, identity)
val str = RtfExtract.get(is).fold(throw _, identity)
assertEquals(str.length, 7342)
}
}