Early draft for text extraction

2025-06-20 17:39:54 +00:00 · 2020-02-17 01:57:22 +01:00
parent 1a5546fe99
commit 3d615181e0
22 changed files with 287 additions and 45 deletions
--- a/modules/files/src/test/scala/docspell/files/Playing.scala
+++ b/modules/files/src/test/scala/docspell/files/Playing.scala
@ -0,0 +1,25 @@
+package docspell.files
+
+import cats.effect.{Blocker, ExitCode, IO, IOApp}
+import docspell.common.MimeTypeHint
+
+import scala.concurrent.ExecutionContext
+
+object Playing extends IOApp {
+  val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
+
+
+  def run(args: List[String]): IO[ExitCode] = IO {
+    //val ods = ExampleFiles.examples_sample_ods.readURL[IO](8192, blocker)
+    //val odt = ExampleFiles.examples_sample_odt.readURL[IO](8192, blocker)
+    val rtf = ExampleFiles.examples_sample_rtf.readURL[IO](8192, blocker)
+
+    val x = for {
+      odsm1 <- TikaMimetype.detect(rtf,
+        MimeTypeHint.filename(ExampleFiles.examples_sample_rtf.path.segments.last))
+      odsm2 <- TikaMimetype.detect(rtf, MimeTypeHint.none)
+    } yield (odsm1, odsm2)
+    println(x.unsafeRunSync())
+    ExitCode.Success
+  }
+}