Add support for archive files

Each attachment is now first extracted into potentially multiple ones,
if it is recognized as an archive. This is the first step in
processing. The original archive file is also stored and the resulting
attachments are associated to their original archive.

First support is implemented for zip files.
This commit is contained in:
Eike Kettner
2020-03-19 22:42:27 +01:00
parent 2a7066650f
commit 4ed7a137f7
12 changed files with 419 additions and 27 deletions

Binary file not shown.

View File

@ -0,0 +1,30 @@
package docspell.files
import minitest._
import cats.effect._
import cats.implicits._
import scala.concurrent.ExecutionContext
object ZipTest extends SimpleTestSuite {
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
implicit val CS = IO.contextShift(ExecutionContext.global)
test("unzip") {
val zipFile = ExampleFiles.letters_zip.readURL[IO](8192, blocker)
val uncomp = zipFile.through(Zip.unzip(8192, blocker))
uncomp.evalMap(entry => {
val x = entry.data.map(_ => 1).foldMonoid.compile.lastOrError
x.map(size => {
if (entry.name.endsWith(".pdf")) {
assertEquals(entry.name, "letter-de.pdf")
assertEquals(size, 34815)
} else {
assertEquals(entry.name, "letter-en.txt")
assertEquals(size, 1131)
}
})
}).compile.drain.unsafeRunSync
}
}