Improve glob and filter archive entries

This commit is contained in:
Eike Kettner
2020-11-11 08:52:21 +01:00
parent 27eb5d70de
commit 4fd6e02ec0
11 changed files with 211 additions and 72 deletions

View File

@ -9,24 +9,33 @@ import cats.implicits._
import fs2.{Pipe, Stream}
import docspell.common.Binary
import docspell.common.Glob
object Zip {
def unzipP[F[_]: ConcurrentEffect: ContextShift](
chunkSize: Int,
blocker: Blocker
blocker: Blocker,
glob: Glob
): Pipe[F, Byte, Binary[F]] =
s => unzip[F](chunkSize, blocker)(s)
s => unzip[F](chunkSize, blocker, glob)(s)
def unzip[F[_]: ConcurrentEffect: ContextShift](chunkSize: Int, blocker: Blocker)(
def unzip[F[_]: ConcurrentEffect: ContextShift](
chunkSize: Int,
blocker: Blocker,
glob: Glob
)(
data: Stream[F, Byte]
): Stream[F, Binary[F]] =
data.through(fs2.io.toInputStream[F]).flatMap(in => unzipJava(in, chunkSize, blocker))
data
.through(fs2.io.toInputStream[F])
.flatMap(in => unzipJava(in, chunkSize, blocker, glob))
def unzipJava[F[_]: Sync: ContextShift](
in: InputStream,
chunkSize: Int,
blocker: Blocker
blocker: Blocker,
glob: Glob
): Stream[F, Binary[F]] = {
val zin = new ZipInputStream(in)
@ -39,6 +48,7 @@ object Zip {
.resource(nextEntry)
.repeat
.unNoneTerminate
.filter(ze => glob.matchFilenameOrPath(ze.getName()))
.map { ze =>
val name = Paths.get(ze.getName()).getFileName.toString
val data =

View File

@ -4,6 +4,7 @@ import minitest._
import cats.effect._
import cats.implicits._
import scala.concurrent.ExecutionContext
import docspell.common.Glob
object ZipTest extends SimpleTestSuite {
@ -12,7 +13,7 @@ object ZipTest extends SimpleTestSuite {
test("unzip") {
val zipFile = ExampleFiles.letters_zip.readURL[IO](8192, blocker)
val uncomp = zipFile.through(Zip.unzip(8192, blocker))
val uncomp = zipFile.through(Zip.unzip(8192, blocker, Glob.all))
uncomp
.evalMap { entry =>