mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-03-31 21:55:06 +00:00
Improve performance of zip/unzip
Adds tests and includes some cleanup
This commit is contained in:
parent
483dbf5d2b
commit
6cef9d4f07
build.sbt
modules
addonlib/src
main/scala/docspell/addons
test/scala/docspell/addons
backend/src/main/scala/docspell/backend/joex
common/src
files/src
joex/src/main/scala/docspell/joex
addon
download
multiupload
process
40
build.sbt
40
build.sbt
@ -321,25 +321,6 @@ val loggingApi = project
|
||||
Dependencies.sourcecode
|
||||
)
|
||||
|
||||
// Base module, everything depends on this – including restapi and
|
||||
// joexapi modules. This should aim to have least possible
|
||||
// dependencies
|
||||
val common = project
|
||||
.in(file("modules/common"))
|
||||
.disablePlugins(RevolverPlugin)
|
||||
.settings(sharedSettings)
|
||||
.withTestSettings
|
||||
.settings(
|
||||
name := "docspell-common",
|
||||
libraryDependencies ++=
|
||||
Dependencies.fs2 ++
|
||||
Dependencies.circe ++
|
||||
Dependencies.circeGenericExtra ++
|
||||
Dependencies.calevCore ++
|
||||
Dependencies.calevCirce
|
||||
)
|
||||
.dependsOn(loggingApi)
|
||||
|
||||
val loggingScribe = project
|
||||
.in(file("modules/logging/scribe"))
|
||||
.disablePlugins(RevolverPlugin)
|
||||
@ -355,6 +336,25 @@ val loggingScribe = project
|
||||
)
|
||||
.dependsOn(loggingApi)
|
||||
|
||||
// Base module, everything depends on this – including restapi and
|
||||
// joexapi modules. This should aim to have least possible
|
||||
// dependencies
|
||||
val common = project
|
||||
.in(file("modules/common"))
|
||||
.disablePlugins(RevolverPlugin)
|
||||
.settings(sharedSettings)
|
||||
.withTestSettingsDependsOn(loggingScribe)
|
||||
.settings(
|
||||
name := "docspell-common",
|
||||
libraryDependencies ++=
|
||||
Dependencies.fs2 ++
|
||||
Dependencies.circe ++
|
||||
Dependencies.circeGenericExtra ++
|
||||
Dependencies.calevCore ++
|
||||
Dependencies.calevCirce
|
||||
)
|
||||
.dependsOn(loggingApi)
|
||||
|
||||
// Some example files for testing
|
||||
// https://file-examples.com/index.php/sample-documents-download/sample-doc-download/
|
||||
val files = project
|
||||
@ -393,7 +393,7 @@ ${lines.map(_._1).mkString(",\n")}
|
||||
Seq(target)
|
||||
}.taskValue
|
||||
)
|
||||
.dependsOn(common)
|
||||
.dependsOn(common, loggingScribe)
|
||||
|
||||
val query =
|
||||
crossProject(JSPlatform, JVMPlatform)
|
||||
|
@ -12,7 +12,8 @@ import fs2.Stream
|
||||
import fs2.io.file.{Files, Path}
|
||||
|
||||
import docspell.common._
|
||||
import docspell.files.Zip
|
||||
import docspell.common.syntax.file._
|
||||
import docspell.common.util.{Directory, Zip}
|
||||
|
||||
final case class AddonArchive(url: LenientUri, name: String, version: String) {
|
||||
def nameAndVersion: String =
|
||||
@ -36,8 +37,8 @@ final case class AddonArchive(url: LenientUri, name: String, version: String) {
|
||||
case false =>
|
||||
Files[F].createDirectories(target) *>
|
||||
reader(url)
|
||||
.through(Zip.unzip(8192, glob))
|
||||
.through(Zip.saveTo(logger, target, moveUp = true))
|
||||
.through(Zip[F](logger.some).unzip(glob = glob, targetDir = target.some))
|
||||
.evalTap(_ => Directory.unwrapSingle[F](logger, target))
|
||||
.compile
|
||||
.drain
|
||||
.as(target)
|
||||
@ -72,12 +73,13 @@ object AddonArchive {
|
||||
archive: Either[Path, Stream[F, Byte]]
|
||||
): F[(Boolean, Boolean)] = {
|
||||
val files = Files[F]
|
||||
val logger = docspell.logging.getLogger[F]
|
||||
def forPath(path: Path): F[(Boolean, Boolean)] =
|
||||
(files.exists(path / "Dockerfile"), files.exists(path / "flake.nix")).tupled
|
||||
|
||||
def forZip(data: Stream[F, Byte]): F[(Boolean, Boolean)] =
|
||||
data
|
||||
.through(Zip.unzip(8192, Glob("Dockerfile|flake.nix")))
|
||||
.through(Zip[F](logger.some).unzip(glob = Glob("Dockerfile|flake.nix")))
|
||||
.collect {
|
||||
case bin if bin.name == "Dockerfile" => (true, false)
|
||||
case bin if bin.name == "flake.nix" => (false, true)
|
||||
|
@ -14,6 +14,7 @@ import fs2.io.file._
|
||||
|
||||
import docspell.common.UrlReader
|
||||
import docspell.common.exec.Env
|
||||
import docspell.common.util.Directory
|
||||
import docspell.logging.Logger
|
||||
|
||||
trait AddonExecutor[F[_]] {
|
||||
|
@ -15,7 +15,8 @@ import fs2.Stream
|
||||
import fs2.io.file.{Files, Path}
|
||||
|
||||
import docspell.common.Glob
|
||||
import docspell.files.Zip
|
||||
import docspell.common.syntax.file._
|
||||
import docspell.common.util.Zip
|
||||
|
||||
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
|
||||
import io.circe.yaml.{parser => YamlParser}
|
||||
@ -153,6 +154,12 @@ object AddonMeta {
|
||||
.map(fromJsonString)
|
||||
.rethrow
|
||||
|
||||
def fromJsonFile[F[_]: Sync](file: Path): F[AddonMeta] =
|
||||
Sync[F]
|
||||
.blocking(java.nio.file.Files.readString(file.toNioPath))
|
||||
.map(fromJsonString)
|
||||
.rethrow
|
||||
|
||||
def fromYamlString(str: String): Either[Throwable, AddonMeta] =
|
||||
YamlParser.parse(str).flatMap(_.as[AddonMeta])
|
||||
|
||||
@ -164,6 +171,13 @@ object AddonMeta {
|
||||
.map(fromYamlString)
|
||||
.rethrow
|
||||
|
||||
def fromYamlFile[F[_]: Sync](file: Path): F[AddonMeta] =
|
||||
Sync[F]
|
||||
.blocking(YamlParser.parse(java.nio.file.Files.newBufferedReader(file.toNioPath)))
|
||||
.rethrow
|
||||
.map(_.as[AddonMeta])
|
||||
.rethrow
|
||||
|
||||
def findInDirectory[F[_]: Sync: Files](dir: Path): F[AddonMeta] = {
|
||||
val logger = docspell.logging.getLogger[F]
|
||||
val jsonFile = dir / "docspell-addon.json"
|
||||
@ -194,18 +208,22 @@ object AddonMeta {
|
||||
}
|
||||
|
||||
def findInZip[F[_]: Async](zipFile: Stream[F, Byte]): F[AddonMeta] = {
|
||||
val logger = docspell.logging.getLogger[F]
|
||||
val fail: F[AddonMeta] = Async[F].raiseError(
|
||||
new FileNotFoundException(
|
||||
s"No docspell-addon.{yaml|json} file found in zip!"
|
||||
)
|
||||
)
|
||||
zipFile
|
||||
.through(Zip.unzip(8192, Glob("docspell-addon.*|**/docspell-addon.*")))
|
||||
.filter(bin => !bin.name.endsWith("/"))
|
||||
.through(
|
||||
Zip[F](logger.some).unzip(glob = Glob("docspell-addon.*|**/docspell-addon.*"))
|
||||
)
|
||||
.filter(file => !file.name.endsWith("/"))
|
||||
.flatMap { bin =>
|
||||
if (bin.extensionIn(Set("json"))) Stream.eval(AddonMeta.fromJsonBytes(bin.data))
|
||||
else if (bin.extensionIn(Set("yaml", "yml")))
|
||||
Stream.eval(AddonMeta.fromYamlBytes(bin.data))
|
||||
val ext = bin.extension
|
||||
if (ext.equalsIgnoreCase("json")) Stream.eval(AddonMeta.fromJsonFile(bin))
|
||||
else if (Set("yaml", "yml").contains(ext.toLowerCase))
|
||||
Stream.eval(AddonMeta.fromYamlFile(bin))
|
||||
else Stream.empty
|
||||
}
|
||||
.take(1)
|
||||
|
@ -10,6 +10,7 @@ import cats.effect.Resource
|
||||
import fs2.io.file.{Files, Path}
|
||||
|
||||
import docspell.common.exec.Env
|
||||
import docspell.common.util.Directory
|
||||
|
||||
case class InputEnv(
|
||||
addons: List[AddonRef],
|
||||
|
@ -13,7 +13,7 @@ import fs2.io.file.{Files, Path, PosixPermissions}
|
||||
|
||||
import docspell.addons.out.AddonOutput
|
||||
import docspell.common.LenientUri
|
||||
import docspell.files.Zip
|
||||
import docspell.common.util.Zip
|
||||
|
||||
import io.circe.syntax._
|
||||
|
||||
@ -59,9 +59,9 @@ object AddonGenerator {
|
||||
private def createZip(dir: Path, files: List[Path]) =
|
||||
Stream
|
||||
.emits(files)
|
||||
.map(f => (f.fileName.toString, Files[IO].readAll(f)))
|
||||
.map(f => (f.fileName.toString, f))
|
||||
.covary[IO]
|
||||
.through(Zip.zip[IO](logger, 8192))
|
||||
.through(Zip[IO](logger.some).zipFiles())
|
||||
.through(Files[IO].writeAll(dir / "addon.zip"))
|
||||
.compile
|
||||
.drain
|
||||
|
@ -7,9 +7,10 @@
|
||||
package docspell.addons
|
||||
|
||||
import cats.effect._
|
||||
import cats.syntax.all._
|
||||
|
||||
import docspell.common.Glob
|
||||
import docspell.files.Zip
|
||||
import docspell.common.util.{Directory, Zip}
|
||||
import docspell.logging.TestLoggingConfig
|
||||
|
||||
import munit._
|
||||
@ -26,8 +27,8 @@ class AddonMetaTest extends CatsEffectSuite with TestLoggingConfig with Fixtures
|
||||
for {
|
||||
_ <- dummyAddonUrl
|
||||
.readURL[IO](8192)
|
||||
.through(Zip.unzip(8192, Glob.all))
|
||||
.through(Zip.saveTo(logger, dir, moveUp = true))
|
||||
.through(Zip[IO]().unzip(8192, Glob.all, dir.some))
|
||||
.evalTap(_ => Directory.unwrapSingle(logger, dir))
|
||||
.compile
|
||||
.drain
|
||||
meta <- AddonMeta.findInDirectory[IO](dir)
|
||||
|
@ -16,6 +16,7 @@ import docspell.backend.ops.OAttachment
|
||||
import docspell.common._
|
||||
import docspell.common.bc.BackendCommandRunner
|
||||
import docspell.common.exec.Env
|
||||
import docspell.common.util.Directory
|
||||
import docspell.logging.Logger
|
||||
import docspell.scheduler.JobStore
|
||||
import docspell.store.Store
|
||||
|
@ -17,7 +17,7 @@ import docspell.backend.JobFactory
|
||||
import docspell.backend.ops.OAttachment
|
||||
import docspell.common._
|
||||
import docspell.common.bc.BackendCommandRunner
|
||||
import docspell.files.FileSupport
|
||||
import docspell.common.syntax.file._
|
||||
import docspell.logging.Logger
|
||||
import docspell.scheduler.JobStore
|
||||
import docspell.store.Store
|
||||
@ -28,7 +28,7 @@ final private[joex] class AddonPostProcess[F[_]: Sync: Files](
|
||||
store: Store[F],
|
||||
attachOps: OAttachment[F],
|
||||
jobStore: JobStore[F]
|
||||
) extends FileSupport {
|
||||
) {
|
||||
|
||||
def onResult(
|
||||
logger: Logger[F],
|
||||
@ -105,7 +105,7 @@ final private[joex] class AddonPostProcess[F[_]: Sync: Files](
|
||||
.getOrElse(Vector.empty)
|
||||
_ <- textFiles.traverse_ { case (key, file) =>
|
||||
withAttach(logger, key, attachs) { ra =>
|
||||
setText(collective, ra, file.readText)
|
||||
setText(collective, ra, file.readString)
|
||||
}
|
||||
}
|
||||
_ <- pdfFiles.traverse_ { case (key, file) =>
|
||||
|
@ -9,9 +9,14 @@ package docspell.common
|
||||
import java.nio.charset.Charset
|
||||
import java.nio.charset.StandardCharsets
|
||||
|
||||
import cats.data.OptionT
|
||||
import cats.effect._
|
||||
import cats.syntax.all._
|
||||
import fs2.io.file.{Files, Path}
|
||||
import fs2.{Chunk, Pipe, Stream}
|
||||
|
||||
import docspell.logging.Logger
|
||||
|
||||
import scodec.bits.ByteVector
|
||||
|
||||
final case class Binary[F[_]](name: String, mime: MimeType, data: Stream[F, Byte]) {
|
||||
@ -34,6 +39,9 @@ final case class Binary[F[_]](name: String, mime: MimeType, data: Stream[F, Byte
|
||||
|
||||
object Binary {
|
||||
|
||||
def apply[F[_]: Async](file: Path): Binary[F] =
|
||||
Binary(file.fileName.toString, Files[F].readAll(file))
|
||||
|
||||
def apply[F[_]](name: String, data: Stream[F, Byte]): Binary[F] =
|
||||
Binary[F](name, MimeType.octetStream, data)
|
||||
|
||||
@ -65,6 +73,38 @@ object Binary {
|
||||
def loadAllBytes[F[_]: Sync](data: Stream[F, Byte]): F[ByteVector] =
|
||||
data.chunks.map(_.toByteVector).compile.fold(ByteVector.empty)((r, e) => r ++ e)
|
||||
|
||||
/** Convert paths into `Binary`s */
|
||||
def toBinary[F[_]: Async]: Pipe[F, Path, Binary[F]] =
|
||||
_.map(Binary[F](_))
|
||||
|
||||
/** Save one or more binaries to a target directory. */
|
||||
def saveTo[F[_]: Async](
|
||||
logger: Logger[F],
|
||||
targetDir: Path
|
||||
): Pipe[F, Binary[F], Path] =
|
||||
binaries =>
|
||||
binaries
|
||||
.filter(e => !e.name.endsWith("/"))
|
||||
.evalMap { entry =>
|
||||
val out = targetDir / entry.name
|
||||
val createParent =
|
||||
OptionT
|
||||
.fromOption[F](out.parent)
|
||||
.flatMapF(parent =>
|
||||
Files[F]
|
||||
.exists(parent)
|
||||
.map(flag => Option.when(!flag)(parent))
|
||||
)
|
||||
.semiflatMap(p => Files[F].createDirectories(p))
|
||||
.getOrElse(())
|
||||
|
||||
logger.trace(s"Copy ${entry.name} -> $out") *>
|
||||
createParent *>
|
||||
entry.data.through(Files[F].writeAll(out)).compile.drain
|
||||
}
|
||||
.drain
|
||||
.as(targetDir)
|
||||
|
||||
// This is a copy from org.http4s.util
|
||||
// Http4s is licensed under the Apache License 2.0
|
||||
private object util {
|
||||
|
@ -6,17 +6,49 @@
|
||||
|
||||
package docspell.common.syntax
|
||||
|
||||
import fs2.io.file.Path
|
||||
import java.nio.file.{Files => NioFiles}
|
||||
|
||||
import cats.effect._
|
||||
import fs2.Stream
|
||||
import fs2.io.file.{Files, Path}
|
||||
|
||||
import docspell.common.syntax.stream._
|
||||
|
||||
import io.circe.Encoder
|
||||
import io.circe.syntax._
|
||||
|
||||
trait FileSyntax {
|
||||
|
||||
implicit final class PathOps(p: Path) {
|
||||
implicit final class PathOps(self: Path) {
|
||||
|
||||
def absolutePath: Path =
|
||||
p.absolute
|
||||
self.absolute
|
||||
|
||||
def absolutePathAsString: String =
|
||||
absolutePath.toString
|
||||
|
||||
def name: String = self.fileName.toString
|
||||
def extension: String = self.extName.stripPrefix(".")
|
||||
def dropLeft(n: Int): Path =
|
||||
Path.fromNioPath(self.toNioPath.subpath(n, self.toNioPath.getNameCount))
|
||||
|
||||
def readString[F[_]: Sync]: F[String] = Sync[F].blocking(
|
||||
NioFiles.readString(self.toNioPath)
|
||||
)
|
||||
|
||||
def sha256Hex[F[_]: Files: Sync]: F[String] =
|
||||
Files[F].readAll(self).sha256Hex
|
||||
|
||||
def readAll[F[_]: Files]: Stream[F, Byte] =
|
||||
Files[F].readAll(self)
|
||||
|
||||
def writeJson[A: Encoder, F[_]: Files: Sync](value: A): F[Unit] =
|
||||
Stream
|
||||
.emit(value.asJson.noSpaces)
|
||||
.through(fs2.text.utf8.encode)
|
||||
.through(Files[F].writeAll(self))
|
||||
.compile
|
||||
.drain
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12,6 +12,7 @@ import fs2.Stream
|
||||
|
||||
import io.circe._
|
||||
import io.circe.parser._
|
||||
import scodec.bits.ByteVector
|
||||
|
||||
trait StreamSyntax {
|
||||
implicit class StringStreamOps[F[_]](s: Stream[F, String]) {
|
||||
@ -24,4 +25,14 @@ trait StreamSyntax {
|
||||
} yield value
|
||||
)
|
||||
}
|
||||
|
||||
implicit final class ByteStreamSyntax[F[_]](self: Stream[F, Byte]) {
|
||||
def sha256Hex(implicit F: Sync[F]): F[String] =
|
||||
self
|
||||
.through(fs2.hash.sha256)
|
||||
.compile
|
||||
.foldChunks(ByteVector.empty)(_ ++ _.toByteVector)
|
||||
.map(_.toHex)
|
||||
}
|
||||
}
|
||||
object StreamSyntax extends StreamSyntax
|
||||
|
@ -8,6 +8,11 @@ package docspell.common
|
||||
|
||||
package object syntax {
|
||||
|
||||
val either = EitherSyntax
|
||||
val stream = StreamSyntax
|
||||
val string = StringSyntax
|
||||
val file = FileSyntax
|
||||
|
||||
object all extends EitherSyntax with StreamSyntax with StringSyntax with FileSyntax
|
||||
|
||||
}
|
||||
|
@ -4,13 +4,17 @@
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package docspell.addons
|
||||
package docspell.common.util
|
||||
|
||||
import cats.effect._
|
||||
import cats.syntax.all._
|
||||
import cats.{Applicative, Monad}
|
||||
import fs2.Stream
|
||||
import fs2.io.file.{Files, Path, PosixPermissions}
|
||||
|
||||
import docspell.logging.Logger
|
||||
|
||||
/** Utility functions for directories. */
|
||||
object Directory {
|
||||
|
||||
def create[F[_]: Files: Applicative](dir: Path): F[Path] =
|
||||
@ -36,17 +40,6 @@ object Directory {
|
||||
d <- mkTemp(parent, prefix)
|
||||
} yield d
|
||||
|
||||
def temp2[F[_]: Files](
|
||||
parent: Path,
|
||||
prefix1: String,
|
||||
prefix2: String
|
||||
): Resource[F, (Path, Path)] =
|
||||
for {
|
||||
_ <- Resource.eval(Files[F].createDirectories(parent))
|
||||
a <- mkTemp(parent, prefix1)
|
||||
b <- mkTemp(parent, prefix2)
|
||||
} yield (a, b)
|
||||
|
||||
def createTemp[F[_]: Files: Monad](
|
||||
parent: Path,
|
||||
prefix: String
|
||||
@ -71,4 +64,46 @@ object Directory {
|
||||
prefix,
|
||||
PosixPermissions.fromOctal("777")
|
||||
)
|
||||
|
||||
/** If `dir` contains only a single non-empty directory, then its contents are moved out
|
||||
* of it and the directory is deleted. This is applied repeatedly until the condition
|
||||
* doesn't apply anymore (there are multiple entries in the directory or none).
|
||||
*/
|
||||
def unwrapSingle[F[_]: Sync: Files](logger: Logger[F], dir: Path): F[Boolean] =
|
||||
Stream
|
||||
.repeatEval(unwrapSingle1(logger, dir))
|
||||
.takeWhile(identity)
|
||||
.compile
|
||||
.fold(false)(_ || _)
|
||||
|
||||
def unwrapSingle1[F[_]: Sync: Files](
|
||||
logger: Logger[F],
|
||||
dir: Path
|
||||
): F[Boolean] =
|
||||
Files[F]
|
||||
.list(dir)
|
||||
.take(2)
|
||||
.compile
|
||||
.toList
|
||||
.flatMap {
|
||||
case subdir :: Nil =>
|
||||
nonEmpty(subdir)
|
||||
.flatMap {
|
||||
case false => false.pure[F]
|
||||
case true =>
|
||||
for {
|
||||
_ <- Files[F]
|
||||
.list(subdir)
|
||||
.filter(p => p != dir)
|
||||
.evalTap(c => logger.trace(s"Move $c -> ${dir / c.fileName}"))
|
||||
.evalMap(child => Files[F].move(child, dir / child.fileName))
|
||||
.compile
|
||||
.drain
|
||||
_ <- Files[F].delete(subdir)
|
||||
} yield true
|
||||
}
|
||||
|
||||
case _ =>
|
||||
false.pure[F]
|
||||
}
|
||||
}
|
43
modules/common/src/main/scala/docspell/common/util/Zip.scala
Normal file
43
modules/common/src/main/scala/docspell/common/util/Zip.scala
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright 2020 Eike K. & Contributors
|
||||
*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package docspell.common.util
|
||||
|
||||
import cats.effect._
|
||||
import fs2.io.file.Path
|
||||
import fs2.{Pipe, Stream}
|
||||
|
||||
import docspell.common.Glob
|
||||
import docspell.logging.Logger
|
||||
|
||||
trait Zip[F[_]] {
|
||||
|
||||
def zip(chunkSize: Int = Zip.defaultChunkSize): Pipe[F, (String, Stream[F, Byte]), Byte]
|
||||
|
||||
def zipFiles(chunkSize: Int = Zip.defaultChunkSize): Pipe[F, (String, Path), Byte]
|
||||
|
||||
def unzip(
|
||||
chunkSize: Int = Zip.defaultChunkSize,
|
||||
glob: Glob = Glob.all,
|
||||
targetDir: Option[Path] = None
|
||||
): Pipe[F, Byte, Path]
|
||||
|
||||
def unzipFiles(
|
||||
chunkSize: Int = Zip.defaultChunkSize,
|
||||
glob: Glob = Glob.all,
|
||||
targetDir: Path => Option[Path] = _ => None
|
||||
): Pipe[F, Path, Path]
|
||||
}
|
||||
|
||||
object Zip {
|
||||
val defaultChunkSize = 64 * 1024
|
||||
|
||||
def apply[F[_]: Async](
|
||||
logger: Option[Logger[F]] = None,
|
||||
tempDir: Option[Path] = None
|
||||
): Zip[F] =
|
||||
new ZipImpl[F](logger, tempDir)
|
||||
}
|
233
modules/common/src/main/scala/docspell/common/util/ZipImpl.scala
Normal file
233
modules/common/src/main/scala/docspell/common/util/ZipImpl.scala
Normal file
@ -0,0 +1,233 @@
|
||||
/*
|
||||
* Copyright 2020 Eike K. & Contributors
|
||||
*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package docspell.common.util
|
||||
|
||||
import java.io.BufferedInputStream
|
||||
import java.nio.charset.StandardCharsets
|
||||
import java.util.zip.{ZipEntry, ZipFile, ZipOutputStream}
|
||||
|
||||
import scala.jdk.CollectionConverters._
|
||||
import scala.util.Using
|
||||
import scala.util.Using.Releasable
|
||||
|
||||
import cats.effect._
|
||||
import cats.syntax.all._
|
||||
import fs2.io.file.{Files, Path}
|
||||
import fs2.{Chunk, Pipe, Stream}
|
||||
|
||||
import docspell.common.Glob
|
||||
import docspell.logging.Logger
|
||||
|
||||
final private class ZipImpl[F[_]: Async](
|
||||
log: Option[Logger[F]],
|
||||
tempDir: Option[Path]
|
||||
) extends Zip[F] {
|
||||
private[this] val logger = log.getOrElse(docspell.logging.Logger.offF[F])
|
||||
|
||||
private val createTempDir: Resource[F, Path] =
|
||||
Files[F].tempDirectory(tempDir, "docspell-zip-", None)
|
||||
|
||||
def zip(chunkSize: Int): Pipe[F, (String, Stream[F, Byte]), Byte] =
|
||||
in => ZipImpl.zipJava(logger, chunkSize, in.through(ZipImpl.deduplicate))
|
||||
|
||||
def zipFiles(chunkSize: Int): Pipe[F, (String, Path), Byte] =
|
||||
in => ZipImpl.zipJavaPath(logger, chunkSize, in.through(ZipImpl.deduplicate))
|
||||
|
||||
def unzip(
|
||||
chunkSize: Int,
|
||||
glob: Glob,
|
||||
targetDir: Option[Path]
|
||||
): Pipe[F, Byte, Path] = { input =>
|
||||
Stream
|
||||
.resource(Files[F].tempFile(tempDir, "", ".zip", None))
|
||||
.evalTap(tempFile => input.through(Files[F].writeAll(tempFile)).compile.drain)
|
||||
.through(unzipFiles(chunkSize, glob, _ => targetDir))
|
||||
}
|
||||
|
||||
def unzipFiles(
|
||||
chunkSize: Int,
|
||||
glob: Glob,
|
||||
targetDir: Path => Option[Path]
|
||||
): Pipe[F, Path, Path] =
|
||||
input =>
|
||||
for {
|
||||
zipArchive <- input
|
||||
tempDir <- targetDir(zipArchive)
|
||||
.map(Stream.emit)
|
||||
.getOrElse(Stream.resource(createTempDir))
|
||||
entries <- Stream.eval(Sync[F].blocking {
|
||||
ZipImpl.unzipZipFile(zipArchive, tempDir, glob)
|
||||
})
|
||||
e <- Stream.chunk(entries)
|
||||
} yield e
|
||||
}
|
||||
|
||||
object ZipImpl {
|
||||
implicit val zipFileReleasable: Releasable[ZipFile] =
|
||||
(resource: ZipFile) => resource.close()
|
||||
|
||||
private def unzipZipFile(zip: Path, target: Path, glob: Glob): Chunk[Path] =
|
||||
Using.resource(new ZipFile(zip.toNioPath.toFile, StandardCharsets.UTF_8)) { zf =>
|
||||
Chunk.iterator(
|
||||
zf.entries()
|
||||
.asScala
|
||||
.filter(ze => !ze.getName.endsWith("/"))
|
||||
.filter(ze => glob.matchFilenameOrPath(ze.getName))
|
||||
.map { ze =>
|
||||
val out = target / ze.getName
|
||||
out.parent.map(_.toNioPath).foreach { p =>
|
||||
java.nio.file.Files.createDirectories(p)
|
||||
}
|
||||
Using.resource(java.nio.file.Files.newOutputStream(out.toNioPath)) { fout =>
|
||||
zf.getInputStream(ze).transferTo(fout)
|
||||
out
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
// private def unzipZipStream(
|
||||
// zip: InputStream,
|
||||
// target: Path,
|
||||
// glob: Glob
|
||||
// ): List[Path] =
|
||||
// Using.resource(new ZipInputStream(zip, StandardCharsets.UTF_8)) { zf =>
|
||||
// @annotation.tailrec
|
||||
// def go(entry: Option[ZipEntry], result: List[Path]): List[Path] =
|
||||
// entry match {
|
||||
// case Some(ze) if glob.matchFilenameOrPath(ze.getName) =>
|
||||
// val out = target / ze.getName
|
||||
// Using.resource(java.nio.file.Files.newOutputStream(out.toNioPath)) { fout =>
|
||||
// zf.transferTo(fout)
|
||||
// }
|
||||
// zf.closeEntry()
|
||||
// go(Option(zf.getNextEntry), out :: result)
|
||||
// case Some(_) =>
|
||||
// zf.closeEntry()
|
||||
// go(Option(zf.getNextEntry), result)
|
||||
// case None =>
|
||||
// result
|
||||
// }
|
||||
//
|
||||
// go(Option(zf.getNextEntry), Nil)
|
||||
// }
|
||||
|
||||
// private def unzipStream2[F[_]: Async](
|
||||
// in: InputStream,
|
||||
// chunkSize: Int,
|
||||
// glob: Glob
|
||||
// ): Stream[F, Binary[F]] = {
|
||||
// val zin = new ZipInputStream(in)
|
||||
//
|
||||
// val nextEntry = Resource.make(Sync[F].delay(Option(zin.getNextEntry))) {
|
||||
// case Some(_) => Sync[F].delay(zin.closeEntry())
|
||||
// case None => ().pure[F]
|
||||
// }
|
||||
//
|
||||
// Stream
|
||||
// .resource(nextEntry)
|
||||
// .repeat
|
||||
// .unNoneTerminate
|
||||
// .filter(ze => glob.matchFilenameOrPath(ze.getName))
|
||||
// .map { ze =>
|
||||
// val name = ze.getName
|
||||
// val data =
|
||||
// fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, false)
|
||||
// Binary(name, data)
|
||||
// }
|
||||
// }
|
||||
|
||||
private def deduplicate[F[_]: Sync, A]: Pipe[F, (String, A), (String, A)] = {
|
||||
def makeName(name: String, count: Int): String =
|
||||
if (count <= 0) name
|
||||
else
|
||||
name.lastIndexOf('.') match {
|
||||
case n if n > 0 =>
|
||||
s"${name.substring(0, n)}_$count${name.substring(n)}"
|
||||
case _ =>
|
||||
s"${name}_$count"
|
||||
}
|
||||
|
||||
@annotation.tailrec
|
||||
def unique(
|
||||
current: Set[String],
|
||||
name: String,
|
||||
counter: Int
|
||||
): (Set[String], String) = {
|
||||
val nextName = makeName(name, counter)
|
||||
if (current.contains(nextName))
|
||||
unique(current, name, counter + 1)
|
||||
else (current + nextName, nextName)
|
||||
}
|
||||
|
||||
in =>
|
||||
Stream
|
||||
.eval(Ref.of[F, Set[String]](Set.empty[String]))
|
||||
.flatMap { ref =>
|
||||
in.evalMap { element =>
|
||||
ref
|
||||
.modify(names => unique(names, element._1, 0))
|
||||
.map(n => (n, element._2))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def zipJava[F[_]: Async](
|
||||
logger: Logger[F],
|
||||
chunkSize: Int,
|
||||
entries: Stream[F, (String, Stream[F, Byte])]
|
||||
): Stream[F, Byte] =
|
||||
fs2.io.readOutputStream(chunkSize) { out =>
|
||||
val zip = new ZipOutputStream(out, StandardCharsets.UTF_8)
|
||||
val writeEntries =
|
||||
entries.evalMap { case (name, bytes) =>
|
||||
val javaOut =
|
||||
bytes.through(
|
||||
fs2.io.writeOutputStream[F](Sync[F].pure(zip), closeAfterUse = false)
|
||||
)
|
||||
val nextEntry =
|
||||
logger.debug(s"Adding $name to zip file…") *>
|
||||
Sync[F].delay(zip.putNextEntry(new ZipEntry(name)))
|
||||
Resource
|
||||
.make(nextEntry)(_ => Sync[F].delay(zip.closeEntry()))
|
||||
.use(_ => javaOut.compile.drain)
|
||||
}
|
||||
val closeStream = Sync[F].delay(zip.close())
|
||||
|
||||
writeEntries.onFinalize(closeStream).compile.drain
|
||||
}
|
||||
|
||||
private def zipJavaPath[F[_]: Async](
|
||||
logger: Logger[F],
|
||||
chunkSize: Int,
|
||||
entries: Stream[F, (String, Path)]
|
||||
): Stream[F, Byte] =
|
||||
fs2.io.readOutputStream(chunkSize) { out =>
|
||||
val zip = new ZipOutputStream(out, StandardCharsets.UTF_8)
|
||||
val writeEntries =
|
||||
entries.evalMap { case (name, file) =>
|
||||
val javaOut = Sync[F].blocking {
|
||||
val fin = new BufferedInputStream(
|
||||
java.nio.file.Files.newInputStream(file.toNioPath),
|
||||
chunkSize
|
||||
)
|
||||
fin.transferTo(zip)
|
||||
fin.close()
|
||||
}
|
||||
|
||||
val nextEntry =
|
||||
logger.debug(s"Adding $name to zip file…") *>
|
||||
Sync[F].delay(zip.putNextEntry(new ZipEntry(name)))
|
||||
Resource
|
||||
.make(nextEntry)(_ => Sync[F].delay(zip.closeEntry()))
|
||||
.use(_ => javaOut)
|
||||
}
|
||||
val closeStream = Sync[F].delay(zip.close())
|
||||
|
||||
writeEntries.onFinalize(closeStream).compile.drain
|
||||
}
|
||||
}
|
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright 2020 Eike K. & Contributors
|
||||
*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package docspell.common.util
|
||||
|
||||
import cats.effect._
|
||||
import cats.syntax.all._
|
||||
import fs2.io.file.{Files, Path}
|
||||
|
||||
import docspell.logging.TestLoggingConfig
|
||||
|
||||
import munit.CatsEffectSuite
|
||||
|
||||
class DirectoryTest extends CatsEffectSuite with TestLoggingConfig {
|
||||
val logger = docspell.logging.getLogger[IO]
|
||||
val tempDir = ResourceFixture(
|
||||
Files[IO].tempDirectory(Path("target").some, "directory-test-", None)
|
||||
)
|
||||
|
||||
tempDir.test("unwrap directory when non empty") { dir =>
|
||||
for {
|
||||
_ <- createDirectoryTree(dir, List("test/file1", "test/file2"))
|
||||
r <- Directory.unwrapSingle1(logger, dir)
|
||||
files <- Files[IO]
|
||||
.list(dir)
|
||||
.map(file => dir.relativize(file).toString)
|
||||
.compile
|
||||
.toVector
|
||||
_ = {
|
||||
assert(r)
|
||||
assertEquals(files.sorted, Vector("file1", "file2"))
|
||||
}
|
||||
} yield ()
|
||||
}
|
||||
|
||||
tempDir.test("unwrap directory when not empty repeat") { dir =>
|
||||
for {
|
||||
_ <- createDirectoryTree(dir, List("test/file1", "test/file2"))
|
||||
r <- Directory.unwrapSingle(logger, dir)
|
||||
files <- Files[IO]
|
||||
.list(dir)
|
||||
.map(file => dir.relativize(file).toString)
|
||||
.compile
|
||||
.toVector
|
||||
_ = {
|
||||
assert(r)
|
||||
assertEquals(files.sorted, Vector("file1", "file2"))
|
||||
}
|
||||
} yield ()
|
||||
}
|
||||
|
||||
tempDir.test("unwrap nested directory") { dir =>
|
||||
for {
|
||||
_ <- createDirectoryTree(dir, List("test0/test1/file1", "test0/test1/file2"))
|
||||
r <- Directory.unwrapSingle(logger, dir)
|
||||
files <- Files[IO]
|
||||
.list(dir)
|
||||
.map(file => dir.relativize(file).toString)
|
||||
.compile
|
||||
.toVector
|
||||
_ = {
|
||||
assert(r)
|
||||
assertEquals(files.sorted, Vector("file1", "file2"))
|
||||
}
|
||||
} yield ()
|
||||
}
|
||||
|
||||
tempDir.test("do nothing on empty directory") { dir =>
|
||||
for {
|
||||
r1 <- Directory.unwrapSingle1[IO](logger, dir)
|
||||
r2 <- Directory.unwrapSingle[IO](logger, dir)
|
||||
_ = {
|
||||
assert(!r1)
|
||||
assert(!r2)
|
||||
}
|
||||
} yield ()
|
||||
}
|
||||
|
||||
tempDir.test("do nothing when directory contains more than one entry") { dir =>
|
||||
for {
|
||||
_ <- createDirectoryTree(dir, List("test1/file1", "file2"))
|
||||
r1 <- Directory.unwrapSingle1[IO](logger, dir)
|
||||
r2 <- Directory.unwrapSingle[IO](logger, dir)
|
||||
_ = {
|
||||
assert(!r1)
|
||||
assert(!r2)
|
||||
}
|
||||
} yield ()
|
||||
}
|
||||
|
||||
tempDir.test("do nothing when directory contains more than one entry (2)") { dir =>
|
||||
for {
|
||||
_ <- createDirectoryTree(dir, List("file1", "file2"))
|
||||
r1 <- Directory.unwrapSingle1[IO](logger, dir)
|
||||
r2 <- Directory.unwrapSingle[IO](logger, dir)
|
||||
_ = {
|
||||
assert(!r1)
|
||||
assert(!r2)
|
||||
}
|
||||
} yield ()
|
||||
}
|
||||
|
||||
def createDirectoryTree(dir: Path, entries: List[String]): IO[Unit] =
|
||||
entries.traverse_ { name =>
|
||||
val out = dir / name
|
||||
out.parent
|
||||
.map(p => Files[IO].createDirectories(p))
|
||||
.getOrElse(IO.unit) *>
|
||||
Files[IO].createFile(out)
|
||||
}
|
||||
}
|
@ -7,19 +7,16 @@
|
||||
package docspell.files
|
||||
|
||||
import cats.data.OptionT
|
||||
import cats.effect.Sync
|
||||
import cats.effect.{Async, Sync}
|
||||
import cats.syntax.all._
|
||||
import fs2.Stream
|
||||
import fs2.Pipe
|
||||
import fs2.io.file.{Files, Path}
|
||||
|
||||
import docspell.common.{MimeType, MimeTypeHint}
|
||||
|
||||
import io.circe.Encoder
|
||||
import io.circe.syntax._
|
||||
import docspell.common.{Binary, MimeType, MimeTypeHint}
|
||||
|
||||
trait FileSupport {
|
||||
implicit final class FileOps[F[_]: Files: Sync](self: Path) {
|
||||
def detectMime: F[Option[MimeType]] =
|
||||
implicit final class FileOps(self: Path) {
|
||||
def detectMime[F[_]: Files: Sync]: F[Option[MimeType]] =
|
||||
Files[F].isReadable(self).flatMap { flag =>
|
||||
OptionT
|
||||
.whenF(flag) {
|
||||
@ -32,30 +29,18 @@ trait FileSupport {
|
||||
.value
|
||||
}
|
||||
|
||||
def asTextFile(alt: MimeType => F[Unit]): F[Option[Path]] =
|
||||
OptionT(detectMime).flatMapF { mime =>
|
||||
if (mime.matches(MimeType.text("plain"))) self.some.pure[F]
|
||||
else alt(mime).as(None: Option[Path])
|
||||
}.value
|
||||
|
||||
def readText: F[String] =
|
||||
Files[F]
|
||||
.readAll(self)
|
||||
.through(fs2.text.utf8.decode)
|
||||
.compile
|
||||
.string
|
||||
|
||||
def readAll: Stream[F, Byte] =
|
||||
Files[F].readAll(self)
|
||||
|
||||
def writeJson[A: Encoder](value: A): F[Unit] =
|
||||
Stream
|
||||
.emit(value.asJson.noSpaces)
|
||||
.through(fs2.text.utf8.encode)
|
||||
.through(Files[F].writeAll(self))
|
||||
.compile
|
||||
.drain
|
||||
def mimeType[F[_]: Files: Sync]: F[MimeType] =
|
||||
detectMime.map(_.getOrElse(MimeType.octetStream))
|
||||
}
|
||||
|
||||
def detectMime[F[_]: Sync]: Pipe[F, Binary[F], Binary[F]] =
|
||||
_.evalMap { bin =>
|
||||
val hint = MimeTypeHint.filename(bin.name).withAdvertised(bin.mime.asString)
|
||||
TikaMimetype.detect[F](bin.data, hint).map(mt => bin.copy(mime = mt))
|
||||
}
|
||||
|
||||
def toBinaryWithMime[F[_]: Async]: Pipe[F, Path, Binary[F]] =
|
||||
_.evalMap(file => file.mimeType.map(mt => Binary(file).copy(mime = mt)))
|
||||
}
|
||||
|
||||
object FileSupport extends FileSupport
|
||||
|
@ -1,180 +0,0 @@
|
||||
/*
|
||||
* Copyright 2020 Eike K. & Contributors
|
||||
*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package docspell.files
|
||||
|
||||
import java.io.InputStream
|
||||
import java.nio.charset.StandardCharsets
|
||||
import java.util.zip.{ZipEntry, ZipInputStream, ZipOutputStream}
|
||||
|
||||
import cats.data.OptionT
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import fs2.io.file.{Files, Path}
|
||||
import fs2.{Pipe, Stream}
|
||||
|
||||
import docspell.common.Binary
|
||||
import docspell.common.Glob
|
||||
import docspell.logging.Logger
|
||||
|
||||
object Zip {
|
||||
|
||||
def zip[F[_]: Async](
|
||||
logger: Logger[F],
|
||||
chunkSize: Int
|
||||
): Pipe[F, (String, Stream[F, Byte]), Byte] =
|
||||
in => zipJava(logger, chunkSize, in.through(deduplicate))
|
||||
|
||||
def unzip[F[_]: Async](
|
||||
chunkSize: Int,
|
||||
glob: Glob
|
||||
): Pipe[F, Byte, Binary[F]] =
|
||||
s => unzipStream[F](chunkSize, glob)(s)
|
||||
|
||||
def unzipStream[F[_]: Async](chunkSize: Int, glob: Glob)(
|
||||
data: Stream[F, Byte]
|
||||
): Stream[F, Binary[F]] =
|
||||
data
|
||||
.through(fs2.io.toInputStream[F])
|
||||
.flatMap(in => unzipJava(in, chunkSize, glob))
|
||||
|
||||
def saveTo[F[_]: Async](
|
||||
logger: Logger[F],
|
||||
targetDir: Path,
|
||||
moveUp: Boolean
|
||||
): Pipe[F, Binary[F], Path] =
|
||||
binaries =>
|
||||
binaries
|
||||
.filter(e => !e.name.endsWith("/"))
|
||||
.evalMap { entry =>
|
||||
val out = targetDir / entry.name
|
||||
val createParent =
|
||||
OptionT
|
||||
.fromOption[F](out.parent)
|
||||
.flatMapF(parent =>
|
||||
Files[F]
|
||||
.exists(parent)
|
||||
.map(flag => Option.when(!flag)(parent))
|
||||
)
|
||||
.semiflatMap(p => Files[F].createDirectories(p))
|
||||
.getOrElse(())
|
||||
|
||||
logger.trace(s"Unzip ${entry.name} -> $out") *>
|
||||
createParent *>
|
||||
entry.data.through(Files[F].writeAll(out)).compile.drain
|
||||
}
|
||||
.drain ++ Stream
|
||||
.eval(if (moveUp) moveContentsUp(logger)(targetDir) else ().pure[F])
|
||||
.as(targetDir)
|
||||
|
||||
private def moveContentsUp[F[_]: Sync: Files](logger: Logger[F])(dir: Path): F[Unit] =
|
||||
Files[F]
|
||||
.list(dir)
|
||||
.take(2)
|
||||
.compile
|
||||
.toList
|
||||
.flatMap {
|
||||
case subdir :: Nil =>
|
||||
Files[F].isDirectory(subdir).flatMap {
|
||||
case false => ().pure[F]
|
||||
case true =>
|
||||
Files[F]
|
||||
.list(subdir)
|
||||
.filter(p => p != dir)
|
||||
.evalTap(c => logger.trace(s"Move $c -> ${dir / c.fileName}"))
|
||||
.evalMap(child => Files[F].move(child, dir / child.fileName))
|
||||
.compile
|
||||
.drain
|
||||
}
|
||||
|
||||
case _ =>
|
||||
().pure[F]
|
||||
}
|
||||
|
||||
def unzipJava[F[_]: Async](
|
||||
in: InputStream,
|
||||
chunkSize: Int,
|
||||
glob: Glob
|
||||
): Stream[F, Binary[F]] = {
|
||||
val zin = new ZipInputStream(in)
|
||||
|
||||
val nextEntry = Resource.make(Sync[F].delay(Option(zin.getNextEntry))) {
|
||||
case Some(_) => Sync[F].delay(zin.closeEntry())
|
||||
case None => ().pure[F]
|
||||
}
|
||||
|
||||
Stream
|
||||
.resource(nextEntry)
|
||||
.repeat
|
||||
.unNoneTerminate
|
||||
.filter(ze => glob.matchFilenameOrPath(ze.getName()))
|
||||
.map { ze =>
|
||||
val name = ze.getName()
|
||||
val data =
|
||||
fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, false)
|
||||
Binary(name, data)
|
||||
}
|
||||
}
|
||||
|
||||
private def deduplicate[F[_]: Sync, A]: Pipe[F, (String, A), (String, A)] = {
|
||||
def makeName(name: String, count: Int): String =
|
||||
if (count <= 0) name
|
||||
else
|
||||
name.lastIndexOf('.') match {
|
||||
case n if n > 0 =>
|
||||
s"${name.substring(0, n)}_$count${name.substring(n)}"
|
||||
case _ =>
|
||||
s"${name}_$count"
|
||||
}
|
||||
|
||||
def unique(
|
||||
current: Set[String],
|
||||
name: String,
|
||||
counter: Int
|
||||
): (Set[String], String) = {
|
||||
val nextName = makeName(name, counter)
|
||||
if (current.contains(nextName))
|
||||
unique(current, name, counter + 1)
|
||||
else (current + nextName, nextName)
|
||||
}
|
||||
|
||||
in =>
|
||||
Stream
|
||||
.eval(Ref.of[F, Set[String]](Set.empty[String]))
|
||||
.flatMap { ref =>
|
||||
in.evalMap { element =>
|
||||
ref
|
||||
.modify(names => unique(names, element._1, 0))
|
||||
.map(n => (n, element._2))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def zipJava[F[_]: Async](
|
||||
logger: Logger[F],
|
||||
chunkSize: Int,
|
||||
entries: Stream[F, (String, Stream[F, Byte])]
|
||||
): Stream[F, Byte] =
|
||||
fs2.io.readOutputStream(chunkSize) { out =>
|
||||
val zip = new ZipOutputStream(out, StandardCharsets.UTF_8)
|
||||
val writeEntries =
|
||||
entries.evalMap { case (name, bytes) =>
|
||||
val javaOut =
|
||||
bytes.through(
|
||||
fs2.io.writeOutputStream[F](Sync[F].pure(zip), closeAfterUse = false)
|
||||
)
|
||||
val nextEntry =
|
||||
logger.debug(s"Adding $name to zip file…") *>
|
||||
Sync[F].delay(zip.putNextEntry(new ZipEntry(name)))
|
||||
Resource
|
||||
.make(nextEntry)(_ => Sync[F].delay(zip.closeEntry()))
|
||||
.use(_ => javaOut.compile.drain)
|
||||
}
|
||||
val closeStream = Sync[F].delay(zip.close())
|
||||
|
||||
writeEntries.onFinalize(closeStream).compile.drain
|
||||
}
|
||||
}
|
@ -7,10 +7,12 @@
|
||||
package docspell.files
|
||||
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import cats.syntax.option._
|
||||
import fs2.Stream
|
||||
import fs2.io.file.{Files, Path}
|
||||
|
||||
import docspell.common.Glob
|
||||
import docspell.common.syntax.file._
|
||||
import docspell.common.util.Zip
|
||||
import docspell.logging.TestLoggingConfig
|
||||
|
||||
import munit._
|
||||
@ -21,29 +23,101 @@ class ZipTest extends CatsEffectSuite with TestLoggingConfig {
|
||||
Files[IO].tempDirectory(Path("target").some, "zip-test-", None)
|
||||
)
|
||||
|
||||
test("unzip") {
|
||||
tempDir.test("unzip") { dir =>
|
||||
val zipFile = ExampleFiles.letters_zip.readURL[IO](8192)
|
||||
val unzip = zipFile.through(Zip.unzip(8192, Glob.all))
|
||||
val unzip: Stream[IO, Path] = zipFile
|
||||
.through(Zip[IO](logger.some, dir.some).unzip(8192))
|
||||
|
||||
unzip
|
||||
.evalMap { entry =>
|
||||
val x = entry.data.map(_ => 1).foldMonoid.compile.lastOrError
|
||||
x.map { size =>
|
||||
if (entry.name.endsWith(".pdf")) {
|
||||
assertEquals(entry.name, "letter-de.pdf")
|
||||
assertEquals(size, 34815)
|
||||
} else {
|
||||
assertEquals(entry.name, "letter-en.txt")
|
||||
assertEquals(size, 1131)
|
||||
}
|
||||
(for {
|
||||
file <- unzip
|
||||
length <- Stream.eval(Files[IO].size(file))
|
||||
sha <- Stream.eval(file.sha256Hex[IO])
|
||||
_ = {
|
||||
if (file.name == "letter-de.pdf") {
|
||||
assertEquals(length, 34815L)
|
||||
assertEquals(
|
||||
sha,
|
||||
"299c15429ce327099c322b36caaec56e7a6034106531c5d1b3fd085467a8d495"
|
||||
)
|
||||
} else {
|
||||
assertEquals(file.name, "letter-en.txt")
|
||||
assertEquals(length, 1131L)
|
||||
assertEquals(
|
||||
sha,
|
||||
"55eca47c65084126d7c3bbce941cadff0f642a7287ff8e0f3fc9c2c33a4bb7f0"
|
||||
)
|
||||
}
|
||||
}
|
||||
} yield ()).compile.drain
|
||||
}
|
||||
|
||||
tempDir.test("unzip directories and files") { dir =>
|
||||
val zipFile = ExampleFiles.zip_dirs_zip.readURL[IO](8192)
|
||||
val unzip: Stream[IO, Path] = zipFile
|
||||
.through(Zip[IO](logger.some, dir.some).unzip(8192))
|
||||
|
||||
val entries =
|
||||
for {
|
||||
file <- unzip
|
||||
sha <- Stream.eval(file.sha256Hex[IO])
|
||||
} yield (file.name, file, sha)
|
||||
|
||||
val expectedSha =
|
||||
"5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03"
|
||||
|
||||
entries
|
||||
.map {
|
||||
case ("file1.txt", file, realSha) =>
|
||||
assertEquals(realSha, expectedSha)
|
||||
val relFile = dir.relativize(file).dropLeft(1)
|
||||
assertEquals(relFile.toString, "file1.txt")
|
||||
|
||||
case ("file2.txt", file, realSha) =>
|
||||
assertEquals(realSha, expectedSha)
|
||||
val relFile = dir.relativize(file).dropLeft(1)
|
||||
assertEquals(relFile.toString, "dir1/file2.txt")
|
||||
|
||||
case ("file3.txt", file, realSha) =>
|
||||
assertEquals(realSha, expectedSha)
|
||||
val relFile = dir.relativize(file).dropLeft(1)
|
||||
assertEquals(relFile.toString, "dir1/dir11/file3.txt")
|
||||
|
||||
case ("file4.txt", file, realSha) =>
|
||||
assertEquals(realSha, expectedSha)
|
||||
val relFile = dir.relativize(file).dropLeft(1)
|
||||
assertEquals(relFile.toString, "dir2/file4.txt")
|
||||
|
||||
case (name, _, _) =>
|
||||
fail(s"Unexpected file: $name")
|
||||
}
|
||||
.compile
|
||||
.drain
|
||||
}
|
||||
|
||||
tempDir.test("unzipTo directory tree") { _ =>
|
||||
// val zipFile = ExampleFiles.zip_dirs_zip.readURL[IO](8192)
|
||||
// zipFile.through(Zip.unzip(G))
|
||||
}
|
||||
// tempDir.test("test runtime") { _ =>
|
||||
// val archive = Path("./local/large-archive.zip")
|
||||
//
|
||||
// for {
|
||||
//
|
||||
// timer1 <- Duration.stopTime[IO]
|
||||
// es1 <- Files[IO]
|
||||
// .readAll(archive)
|
||||
// .through(Zip[IO]().unzip(64 * 1024))
|
||||
// .compile
|
||||
// .toVector
|
||||
// duration1 <- timer1
|
||||
//
|
||||
// timer2 <- Duration.stopTime[IO]
|
||||
// es2 <- fs2.Stream
|
||||
// .emit(archive)
|
||||
// .covary[IO]
|
||||
// .through(Zip[IO]().unzipFiles(64 * 1024))
|
||||
// .compile
|
||||
// .toVector
|
||||
// duration2 <- timer2
|
||||
//
|
||||
// _ <- IO.println(s">>>>1. ${duration1.formatExact}, entries: $es1")
|
||||
// _ <- IO.println(s">>>>2. ${duration2.formatExact}, entries: $es2")
|
||||
// } yield ()
|
||||
// }
|
||||
}
|
||||
|
@ -15,14 +15,14 @@ import docspell.addons.{AddonTriggerType, InputEnv, Middleware}
|
||||
import docspell.backend.joex.AddonOps.ExecResult
|
||||
import docspell.backend.joex.{AddonOps, LoggerExtension}
|
||||
import docspell.common._
|
||||
import docspell.files.FileSupport
|
||||
import docspell.common.syntax.file._
|
||||
import docspell.joex.process.ItemData
|
||||
import docspell.logging.Logger
|
||||
import docspell.scheduler.Task
|
||||
import docspell.store.Store
|
||||
import docspell.store.queries.QAttachment
|
||||
|
||||
object GenericItemAddonTask extends LoggerExtension with FileSupport {
|
||||
object GenericItemAddonTask extends LoggerExtension {
|
||||
|
||||
private val itemSubdir = "item"
|
||||
private val itemDataJson = s"$itemSubdir/item-data.json"
|
||||
|
@ -16,7 +16,7 @@ import docspell.backend.ops.ODownloadAll
|
||||
import docspell.backend.ops.ODownloadAll.model.DownloadSummary
|
||||
import docspell.backend.task.DownloadZipArgs
|
||||
import docspell.common._
|
||||
import docspell.files.Zip
|
||||
import docspell.common.util.Zip
|
||||
import docspell.scheduler.Task
|
||||
import docspell.store.Store
|
||||
import docspell.store.queries.{ItemFileMeta, QItem}
|
||||
@ -50,7 +50,7 @@ object DownloadZipTask {
|
||||
|
||||
val storeZipFile =
|
||||
allFiles
|
||||
.through(Zip.zip(ctx.logger, chunkSize))
|
||||
.through(Zip[F](ctx.logger.some).zip(chunkSize))
|
||||
.through(
|
||||
store.fileRepo.save(
|
||||
ctx.args.accountId.collective,
|
||||
|
@ -14,7 +14,7 @@ import fs2.Stream
|
||||
|
||||
import docspell.backend.JobFactory
|
||||
import docspell.common._
|
||||
import docspell.files.Zip
|
||||
import docspell.common.util.Zip
|
||||
import docspell.logging.Logger
|
||||
import docspell.scheduler._
|
||||
import docspell.store.Store
|
||||
@ -44,7 +44,7 @@ object MultiUploadArchiveTask {
|
||||
extractZip(store, ctx.args)(file)
|
||||
.evalTap(entry =>
|
||||
ctx.logger.debug(
|
||||
s"Create job for entry: ${entry.files.flatMap(_.name)}"
|
||||
s"Create job for entry: ${entry.files.flatMap(_.name).mkString(", ")}"
|
||||
)
|
||||
)
|
||||
.evalMap(makeJob[F](ctx, jobStore))
|
||||
@ -109,7 +109,8 @@ object MultiUploadArchiveTask {
|
||||
)(file: ProcessItemArgs.File): Stream[F, ProcessItemArgs] =
|
||||
store.fileRepo
|
||||
.getBytes(file.fileMetaId)
|
||||
.through(Zip.unzip[F](8192, args.meta.fileFilter.getOrElse(Glob.all)))
|
||||
.through(Zip[F]().unzip(glob = args.meta.fileFilter.getOrElse(Glob.all)))
|
||||
.through(Binary.toBinary[F])
|
||||
.flatMap { entry =>
|
||||
val hint = MimeTypeHint(entry.name.some, entry.mime.asString.some)
|
||||
entry.data
|
||||
|
@ -16,7 +16,7 @@ import cats.kernel.Order
|
||||
import fs2.Stream
|
||||
|
||||
import docspell.common._
|
||||
import docspell.files.Zip
|
||||
import docspell.common.util.Zip
|
||||
import docspell.joex.mail._
|
||||
import docspell.scheduler._
|
||||
import docspell.store.Store
|
||||
@ -146,7 +146,8 @@ object ExtractArchive {
|
||||
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
|
||||
ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *>
|
||||
zipData
|
||||
.through(Zip.unzip[F](8192, glob))
|
||||
.through(Zip[F](ctx.logger.some).unzip(glob = glob))
|
||||
.through(Binary.toBinary[F])
|
||||
.zipWithIndex
|
||||
.flatMap(handleEntry(ctx, store, ra, pos, archive, None))
|
||||
.foldMonoid
|
||||
|
Loading…
x
Reference in New Issue
Block a user