Experiment with addons

Addons allow to execute external programs in some context inside
docspell. Currently it is possible to run them after processing files.
Addons are provided by URLs to zip files.
This commit is contained in:
eikek
2022-04-22 14:07:28 +02:00
parent e04a76faa4
commit 7fdd78ad06
166 changed files with 8181 additions and 115 deletions

View File

@ -0,0 +1,61 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.files
import cats.data.OptionT
import cats.effect.Sync
import cats.syntax.all._
import fs2.Stream
import fs2.io.file.{Files, Path}
import docspell.common.{MimeType, MimeTypeHint}
import io.circe.Encoder
import io.circe.syntax._
trait FileSupport {
implicit final class FileOps[F[_]: Files: Sync](self: Path) {
def detectMime: F[Option[MimeType]] =
Files[F].isReadable(self).flatMap { flag =>
OptionT
.whenF(flag) {
TikaMimetype
.detect(
Files[F].readAll(self),
MimeTypeHint.filename(self.fileName.toString)
)
}
.value
}
def asTextFile(alt: MimeType => F[Unit]): F[Option[Path]] =
OptionT(detectMime).flatMapF { mime =>
if (mime.matches(MimeType.text("plain"))) self.some.pure[F]
else alt(mime).as(None: Option[Path])
}.value
def readText: F[String] =
Files[F]
.readAll(self)
.through(fs2.text.utf8.decode)
.compile
.string
def readAll: Stream[F, Byte] =
Files[F].readAll(self)
def writeJson[A: Encoder](value: A): F[Unit] =
Stream
.emit(value.asJson.noSpaces)
.through(fs2.text.utf8.encode)
.through(Files[F].writeAll(self))
.compile
.drain
}
}
object FileSupport extends FileSupport

View File

@ -8,11 +8,12 @@ package docspell.files
import java.io.InputStream
import java.nio.charset.StandardCharsets
import java.nio.file.Paths
import java.util.zip.{ZipEntry, ZipInputStream, ZipOutputStream}
import cats.data.OptionT
import cats.effect._
import cats.implicits._
import fs2.io.file.{Files, Path}
import fs2.{Pipe, Stream}
import docspell.common.Binary
@ -27,16 +28,72 @@ object Zip {
): Pipe[F, (String, Stream[F, Byte]), Byte] =
in => zipJava(logger, chunkSize, in.through(deduplicate))
def unzipP[F[_]: Async](chunkSize: Int, glob: Glob): Pipe[F, Byte, Binary[F]] =
s => unzip[F](chunkSize, glob)(s)
def unzip[F[_]: Async](
chunkSize: Int,
glob: Glob
): Pipe[F, Byte, Binary[F]] =
s => unzipStream[F](chunkSize, glob)(s)
def unzip[F[_]: Async](chunkSize: Int, glob: Glob)(
def unzipStream[F[_]: Async](chunkSize: Int, glob: Glob)(
data: Stream[F, Byte]
): Stream[F, Binary[F]] =
data
.through(fs2.io.toInputStream[F])
.flatMap(in => unzipJava(in, chunkSize, glob))
def saveTo[F[_]: Async](
logger: Logger[F],
targetDir: Path,
moveUp: Boolean
): Pipe[F, Binary[F], Path] =
binaries =>
binaries
.filter(e => !e.name.endsWith("/"))
.evalMap { entry =>
val out = targetDir / entry.name
val createParent =
OptionT
.fromOption[F](out.parent)
.flatMapF(parent =>
Files[F]
.exists(parent)
.map(flag => Option.when(!flag)(parent))
)
.semiflatMap(p => Files[F].createDirectories(p))
.getOrElse(())
logger.trace(s"Unzip ${entry.name} -> $out") *>
createParent *>
entry.data.through(Files[F].writeAll(out)).compile.drain
}
.drain ++ Stream
.eval(if (moveUp) moveContentsUp(logger)(targetDir) else ().pure[F])
.as(targetDir)
private def moveContentsUp[F[_]: Sync: Files](logger: Logger[F])(dir: Path): F[Unit] =
Files[F]
.list(dir)
.take(2)
.compile
.toList
.flatMap {
case subdir :: Nil =>
Files[F].isDirectory(subdir).flatMap {
case false => ().pure[F]
case true =>
Files[F]
.list(subdir)
.filter(p => p != dir)
.evalTap(c => logger.trace(s"Move $c -> ${dir / c.fileName}"))
.evalMap(child => Files[F].move(child, dir / child.fileName))
.compile
.drain
}
case _ =>
().pure[F]
}
def unzipJava[F[_]: Async](
in: InputStream,
chunkSize: Int,
@ -55,7 +112,7 @@ object Zip {
.unNoneTerminate
.filter(ze => glob.matchFilenameOrPath(ze.getName()))
.map { ze =>
val name = Paths.get(ze.getName()).getFileName.toString
val name = ze.getName()
val data =
fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, false)
Binary(name, data)

Binary file not shown.

Binary file not shown.

View File

@ -7,20 +7,25 @@
package docspell.files
import cats.effect._
import cats.effect.unsafe.implicits.global
import cats.implicits._
import fs2.io.file.{Files, Path}
import docspell.common.Glob
import docspell.logging.TestLoggingConfig
import munit._
class ZipTest extends FunSuite {
class ZipTest extends CatsEffectSuite with TestLoggingConfig {
val logger = docspell.logging.getLogger[IO]
val tempDir = ResourceFixture(
Files[IO].tempDirectory(Path("target").some, "zip-test-", None)
)
test("unzip") {
val zipFile = ExampleFiles.letters_zip.readURL[IO](8192)
val uncomp = zipFile.through(Zip.unzip(8192, Glob.all))
val unzip = zipFile.through(Zip.unzip(8192, Glob.all))
uncomp
unzip
.evalMap { entry =>
val x = entry.data.map(_ => 1).foldMonoid.compile.lastOrError
x.map { size =>
@ -35,6 +40,10 @@ class ZipTest extends FunSuite {
}
.compile
.drain
.unsafeRunSync()
}
tempDir.test("unzipTo directory tree") { _ =>
// val zipFile = ExampleFiles.zip_dirs_zip.readURL[IO](8192)
// zipFile.through(Zip.unzip(G))
}
}