diff --git a/build.sbt b/build.sbt index 4121cfd0..0289ed81 100644 --- a/build.sbt +++ b/build.sbt @@ -150,8 +150,34 @@ val exampleFiles = project.in(file("modules/example-files")). settings(sharedSettings). settings(testSettings). settings( - name := "docspell-examplefiles" - ) + name := "docspell-examplefiles", + libraryDependencies ++= + Dependencies.tika, + Test / sourceGenerators += Def.task { + val base = (Test/resourceDirectory).value + val files = (base ** (_.isFile)) pair sbt.io.Path.relativeTo(base) + val lines = files.toList.map(_._2).map(s => { + val ident = s.replaceAll("[^a-zA-Z0-9_]+", "_") + ident -> s"""val $ident = createUrl("${s}")""" + }) + val content = s"""package docspell.examplefiles + +object ExampleFiles extends ExampleFilesSupport { + +${lines.map(_._2).mkString("\n")} + +val all = List( +${lines.map(_._1).mkString(",\n")} +) + +} +""" + val target = (Test/sourceManaged).value/"scala"/"ExampleFiles.scala" + IO.createDirectory(target.getParentFile) + IO.write(target, content) + Seq(target) + }.taskValue + ).dependsOn(common) val store = project.in(file("modules/store")). disablePlugins(RevolverPlugin). @@ -180,9 +206,7 @@ val text = project.in(file("modules/text")). name := "docspell-text", libraryDependencies ++= Dependencies.fs2 ++ - Dependencies.tika ++ - Dependencies.stanfordNlpCore ++ - Dependencies.poi + Dependencies.stanfordNlpCore ).dependsOn(common, exampleFiles % "compile->compile;test->test") val convert = project.in(file("modules/convert")). diff --git a/modules/common/src/main/scala/docspell/common/LenientUri.scala b/modules/common/src/main/scala/docspell/common/LenientUri.scala index 03632818..8e9959d7 100644 --- a/modules/common/src/main/scala/docspell/common/LenientUri.scala +++ b/modules/common/src/main/scala/docspell/common/LenientUri.scala @@ -65,6 +65,11 @@ case class LenientUri( fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, blocker, true) ) + def readText[F[_]: Sync: ContextShift](chunkSize: Int, blocker: Blocker): F[String] = + readURL[F](chunkSize, blocker). + through(fs2.text.utf8Decode). + compile.foldMonoid + def host: Option[String] = authority.map(a => a.indexOf(':') match { diff --git a/modules/example-files/src/test/resources/camera/letter-en.jpg b/modules/example-files/src/test/resources/camera/letter-en.jpg new file mode 100644 index 00000000..f5da55e7 Binary files /dev/null and b/modules/example-files/src/test/resources/camera/letter-en.jpg differ diff --git a/modules/example-files/src/test/resources/camera/letter-en.png b/modules/example-files/src/test/resources/camera/letter-en.png new file mode 100644 index 00000000..d04e8d13 Binary files /dev/null and b/modules/example-files/src/test/resources/camera/letter-en.png differ diff --git a/modules/example-files/src/test/resources/camera/letter-en.tiff b/modules/example-files/src/test/resources/camera/letter-en.tiff new file mode 100644 index 00000000..62c936a0 Binary files /dev/null and b/modules/example-files/src/test/resources/camera/letter-en.tiff differ diff --git a/modules/example-files/src/test/resources/examples/file-sample_100kB.doc b/modules/example-files/src/test/resources/examples/sample.doc similarity index 100% rename from modules/example-files/src/test/resources/examples/file-sample_100kB.doc rename to modules/example-files/src/test/resources/examples/sample.doc diff --git a/modules/example-files/src/test/resources/examples/file-sample_100kB.docx b/modules/example-files/src/test/resources/examples/sample.docx similarity index 100% rename from modules/example-files/src/test/resources/examples/file-sample_100kB.docx rename to modules/example-files/src/test/resources/examples/sample.docx diff --git a/modules/example-files/src/test/resources/examples/file_example_ODS_10.ods b/modules/example-files/src/test/resources/examples/sample.ods similarity index 100% rename from modules/example-files/src/test/resources/examples/file_example_ODS_10.ods rename to modules/example-files/src/test/resources/examples/sample.ods diff --git a/modules/example-files/src/test/resources/examples/file-sample_100kB.odt b/modules/example-files/src/test/resources/examples/sample.odt similarity index 100% rename from modules/example-files/src/test/resources/examples/file-sample_100kB.odt rename to modules/example-files/src/test/resources/examples/sample.odt diff --git a/modules/example-files/src/test/resources/examples/file-sample_100kB.rtf b/modules/example-files/src/test/resources/examples/sample.rtf similarity index 100% rename from modules/example-files/src/test/resources/examples/file-sample_100kB.rtf rename to modules/example-files/src/test/resources/examples/sample.rtf diff --git a/modules/example-files/src/test/resources/examples/file_example_XLS_10.xls b/modules/example-files/src/test/resources/examples/sample.xls similarity index 100% rename from modules/example-files/src/test/resources/examples/file_example_XLS_10.xls rename to modules/example-files/src/test/resources/examples/sample.xls diff --git a/modules/example-files/src/test/resources/examples/file_example_XLSX_10.xlsx b/modules/example-files/src/test/resources/examples/sample.xlsx similarity index 100% rename from modules/example-files/src/test/resources/examples/file_example_XLSX_10.xlsx rename to modules/example-files/src/test/resources/examples/sample.xlsx diff --git a/modules/example-files/src/test/resources/letter-de-source.txt b/modules/example-files/src/test/resources/letter-de-source.txt new file mode 100644 index 00000000..a559c152 --- /dev/null +++ b/modules/example-files/src/test/resources/letter-de-source.txt @@ -0,0 +1,30 @@ +Max Mustermann + +Lilienweg 21 + +12345 Nebendorf + +E-Mail: max.muster@gmail.com + +Max Mustermann, Lilienweg 21, 12345 Nebendorf + +EasyCare AG +Abteilung Buchhaltung +Ackerweg 12 + +12346 Ulmen + +Nebendorf, 3. September 2019 +Sehr geehrte Damen und Herren, + +hiermit kündige ich meine Mitgliedschaft in der Kranken- und Pflegeversicherung zum +nächstmöglichen Termin. + +Bitte senden Sie mir innerhalb der gesetzlichen Frist von 14 Tagen eine Kündigungsbe- +stätigung zu. + +Vielen Dank im Vorraus! + +Mit freundlichen Grüßen + +Max Mustermann diff --git a/modules/example-files/src/test/resources/letter-en-source.txt b/modules/example-files/src/test/resources/letter-en-source.txt new file mode 100644 index 00000000..79bcca36 --- /dev/null +++ b/modules/example-files/src/test/resources/letter-en-source.txt @@ -0,0 +1,38 @@ +Derek Jeter + +123 Elm Ave. + +Treesville, ON MI1N 2P3 +November 7, 2016 + +Derek Jeter, 123 Elm Ave., Treesville, ON M1N 2P3, November 7, 2016 + +Mr. M. Leat + +Chief of Syrup Production +Old Sticky Pancake Company +456 Maple Lane + +Forest, ON 7TW8 9Y0 + +Hemptown, September 3, 2019 +Dear Mr. Leaf, + +Let me begin by thanking you for your past contributions to our Little League baseball +team. Your sponsorship aided in the purchase of ten full uniforms and several pieces of +baseball equipment for last year’s season. + +Next month, our company is planning an employee appreciation pancake breakfast hon- +oring retired employees for their past years of service and present employees for their +loyalty and dedication in spite of the current difficult economic conditions. + +We would like to place an order with your company for 25 pounds of pancake mix and +five gallons of maple syrup. We hope you will be able to provide these products in the +bulk quantities we require. + +As you are a committed corporate sponsor and long-time associate, we hope that you +will be able to join us for breakfast on December 12, 2016. + +Respectfully yours, + +Derek Jeter diff --git a/modules/example-files/src/test/resources/scanner/jfif.jpg b/modules/example-files/src/test/resources/scanner/jfif.jpg new file mode 100755 index 00000000..215664be Binary files /dev/null and b/modules/example-files/src/test/resources/scanner/jfif.jpg differ diff --git a/modules/example-files/src/test/resources/scanner/pdf13.pdf b/modules/example-files/src/test/resources/scanner/pdf13.pdf new file mode 100755 index 00000000..9411060d Binary files /dev/null and b/modules/example-files/src/test/resources/scanner/pdf13.pdf differ diff --git a/modules/example-files/src/test/resources/scanner/pdfa14.pdf b/modules/example-files/src/test/resources/scanner/pdfa14.pdf new file mode 100755 index 00000000..ec3712c3 Binary files /dev/null and b/modules/example-files/src/test/resources/scanner/pdfa14.pdf differ diff --git a/modules/example-files/src/test/scala/docspell/examplefiles/ExampleFiles.scala b/modules/example-files/src/test/scala/docspell/examplefiles/ExampleFiles.scala deleted file mode 100644 index 4b4fbe01..00000000 --- a/modules/example-files/src/test/scala/docspell/examplefiles/ExampleFiles.scala +++ /dev/null @@ -1,5 +0,0 @@ -package docspell.examplefiles - -final class ExampleFiles { - -} diff --git a/modules/example-files/src/test/scala/docspell/examplefiles/ExampleFilesSupport.scala b/modules/example-files/src/test/scala/docspell/examplefiles/ExampleFilesSupport.scala new file mode 100644 index 00000000..8333e3b7 --- /dev/null +++ b/modules/example-files/src/test/scala/docspell/examplefiles/ExampleFilesSupport.scala @@ -0,0 +1,14 @@ +package docspell.examplefiles + +import docspell.common._ + +trait ExampleFilesSupport { + + def createUrl(resource: String): LenientUri = + Option(getClass.getResource("/" + resource)) match { + case Some(u) => LenientUri.fromJava(u) + case None => sys.error(s"Resource '$resource' not found") + } + + +} diff --git a/modules/text/src/test/scala/docspell/text/TestFiles.scala b/modules/text/src/test/scala/docspell/text/TestFiles.scala index a33bf4b9..ff1e6963 100644 --- a/modules/text/src/test/scala/docspell/text/TestFiles.scala +++ b/modules/text/src/test/scala/docspell/text/TestFiles.scala @@ -1,8 +1,8 @@ package docspell.text -import cats.effect.{Blocker, IO} -import docspell.common.LenientUri import fs2.Stream +import cats.effect.{Blocker, IO} +import docspell.examplefiles._ import scala.concurrent.ExecutionContext @@ -11,86 +11,20 @@ object TestFiles { implicit val CS = IO.contextShift(ExecutionContext.global) val letterSourceDE: Stream[IO, Byte] = - LenientUri - .fromJava(getClass.getResource("/letter-de-source.pdf")) + ExampleFiles.letter_de_source_pdf .readURL[IO](16 * 1024, blocker) val letterSourceEN: Stream[IO, Byte] = - LenientUri - .fromJava(getClass.getResource("/letter-en-source.pdf")) + ExampleFiles.letter_en_source_pdf .readURL[IO](16 * 1024, blocker) - val letterDEText = - """Max Mustermann - | - |Lilienweg 21 - | - |12345 Nebendorf - | - |E-Mail: max.muster@gmail.com - | - |Max Mustermann, Lilienweg 21, 12345 Nebendorf - | - |EasyCare AG - |Abteilung Buchhaltung - |Ackerweg 12 - | - |12346 Ulmen - | - |Nebendorf, 3. September 2019 - |Sehr geehrte Damen und Herren, - | - |hiermit kündige ich meine Mitgliedschaft in der Kranken- und Pflegeversicherung zum - |nächstmöglichen Termin. - | - |Bitte senden Sie mir innerhalb der gesetzlichen Frist von 14 Tagen eine Kündigungsbe- - |stätigung zu. - | - |Vielen Dank im Vorraus! - | - |Mit freundlichen Grüßen - | - |Max Mustermann - |""".stripMargin.trim + lazy val letterDEText = + ExampleFiles.letter_de_source_txt + .readText[IO](16 * 1024, blocker) + .unsafeRunSync - val letterENText = - """Derek Jeter - | - |123 Elm Ave. - | - |Treesville, ON MI1N 2P3 - |November 7, 2016 - | - |Derek Jeter, 123 Elm Ave., Treesville, ON M1N 2P3, November 7, 2016 - | - |Mr. M. Leat - | - |Chief of Syrup Production - |Old Sticky Pancake Company - |456 Maple Lane - | - |Forest, ON 7TW8 9Y0 - | - |Hemptown, September 3, 2019 - |Dear Mr. Leaf, - | - |Let me begin by thanking you for your past contributions to our Little League baseball - |team. Your sponsorship aided in the purchase of ten full uniforms and several pieces of - |baseball equipment for last year’s season. - | - |Next month, our company is planning an employee appreciation pancake breakfast hon- - |oring retired employees for their past years of service and present employees for their - |loyalty and dedication in spite of the current difficult economic conditions. - | - |We would like to place an order with your company for 25 pounds of pancake mix and - |five gallons of maple syrup. We hope you will be able to provide these products in the - |bulk quantities we require. - | - |As you are a committed corporate sponsor and long-time associate, we hope that you - |will be able to join us for breakfast on December 12, 2016. - | - |Respectfully yours, - | - |Derek Jeter - |""".stripMargin.trim + lazy val letterENText = + ExampleFiles.letter_en_source_txt + .readText[IO](16 * 1024, blocker) + .unsafeRunSync }