From 02b8078f01e4188aa1a6f5e11f0e72dbe0ae20cd Mon Sep 17 00:00:00 2001 From: eikek Date: Tue, 22 Jun 2021 22:48:14 +0200 Subject: [PATCH] Use fs2 Files api --- .../src/main/scala/docspell/common/File.scala | 114 +++++------------- .../main/scala/docspell/extract/ocr/Ocr.scala | 6 +- 2 files changed, 35 insertions(+), 85 deletions(-) diff --git a/modules/common/src/main/scala/docspell/common/File.scala b/modules/common/src/main/scala/docspell/common/File.scala index 002d92d3..c27c369c 100644 --- a/modules/common/src/main/scala/docspell/common/File.scala +++ b/modules/common/src/main/scala/docspell/common/File.scala @@ -1,91 +1,47 @@ package docspell.common -import java.io.IOException -import java.nio.file.attribute.BasicFileAttributes -import java.nio.file.{Files => JFiles, _} -import java.util.concurrent.atomic.AtomicInteger - -import scala.jdk.CollectionConverters._ +import java.nio.file.Path +import cats.FlatMap +import cats.Monad import cats.effect._ import cats.implicits._ +import fs2.Stream import fs2.io.file.Files -import fs2.{Chunk, Stream} import docspell.common.syntax.all._ import io.circe.Decoder -import scodec.bits.ByteVector -//TODO use io.fs2.files.Files api + object File { - def mkDir[F[_]: Sync](dir: Path): F[Path] = - Sync[F].blocking(JFiles.createDirectories(dir)) + def mkDir[F[_]: Files](dir: Path): F[Path] = + Files[F].createDirectories(dir) - def mkTempDir[F[_]: Sync](parent: Path, prefix: String): F[Path] = - mkDir(parent).map(p => JFiles.createTempDirectory(p, prefix)) + def exists[F[_]: Files](file: Path): F[Boolean] = + Files[F].exists(file) - def mkTempFile[F[_]: Sync]( - parent: Path, - prefix: String, - suffix: Option[String] = None - ): F[Path] = - mkDir(parent).map(p => JFiles.createTempFile(p, prefix, suffix.orNull)) + def size[F[_]: Files](file: Path): F[Long] = + Files[F].size(file) - def deleteDirectory[F[_]: Sync](dir: Path): F[Int] = - Sync[F].delay { - val count = new AtomicInteger(0) - JFiles.walkFileTree( - dir, - new SimpleFileVisitor[Path]() { - override def visitFile( - file: Path, - attrs: BasicFileAttributes - ): FileVisitResult = { - JFiles.deleteIfExists(file) - count.incrementAndGet() - FileVisitResult.CONTINUE - } - override def postVisitDirectory(dir: Path, e: IOException): FileVisitResult = - Option(e) match { - case Some(ex) => throw ex - case None => - JFiles.deleteIfExists(dir) - FileVisitResult.CONTINUE - } - } - ) - count.get - } + def existsNonEmpty[F[_]: Files: Monad](file: Path, minSize: Long = 0): F[Boolean] = + exists[F](file).flatMap(b => if (b) size[F](file).map(_ > minSize) else false.pure[F]) - def exists[F[_]: Sync](file: Path): F[Boolean] = - Sync[F].delay(JFiles.exists(file)) + def delete[F[_]: Files: FlatMap](path: Path): F[Unit] = + for { + isDir <- Files[F].isDirectory(path) + _ <- + if (isDir) Files[F].deleteDirectoryRecursively(path) + else Files[F].deleteIfExists(path) + } yield () - def size[F[_]: Sync](file: Path): F[Long] = - Sync[F].delay(JFiles.size(file)) + def withTempDir[F[_]: Files](parent: Path, prefix: String): Resource[F, Path] = + Resource + .eval(mkDir[F](parent)) + .flatMap(_ => Files[F].tempDirectory(parent.some, prefix)) - def existsNonEmpty[F[_]: Sync](file: Path, minSize: Long = 0): F[Boolean] = - Sync[F].delay(JFiles.exists(file) && JFiles.size(file) > minSize) - - def deleteFile[F[_]: Sync](file: Path): F[Unit] = - Sync[F].delay(JFiles.deleteIfExists(file)).map(_ => ()) - - def delete[F[_]: Sync](path: Path): F[Int] = - if (JFiles.isDirectory(path)) deleteDirectory(path) - else deleteFile(path).map(_ => 1) - - def withTempDir[F[_]: Sync](parent: Path, prefix: String): Resource[F, Path] = - Resource.make(mkTempDir(parent, prefix))(p => delete(p).map(_ => ())) - - def listJFiles[F[_]: Sync](pred: Path => Boolean, dir: Path): F[List[Path]] = - Sync[F].delay { - val javaList = - JFiles - .list(dir) - .filter(p => pred(p)) - .collect(java.util.stream.Collectors.toList()) - javaList.asScala.toList.sortBy(_.getFileName.toString) - } + def listFiles[F[_]: Files](pred: Path => Boolean, dir: Path): Stream[F, Path] = + Files[F].directoryStream(dir, pred) def readAll[F[_]: Files]( file: Path, @@ -97,17 +53,13 @@ object File { readAll[F](file, 8192).through(fs2.text.utf8Decode).compile.foldMonoid def writeString[F[_]: Files: Concurrent](file: Path, content: String): F[Path] = - ByteVector.encodeUtf8(content) match { - case Right(bv) => - Stream - .chunk(Chunk.byteVector(bv)) - .through(Files[F].writeAll(file)) - .compile - .drain - .map(_ => file) - case Left(ex) => - Concurrent[F].raiseError(ex) - } + Stream + .emit(content) + .through(fs2.text.utf8Encode) + .through(Files[F].writeAll(file)) + .compile + .drain + .map(_ => file) def readJson[F[_]: Async, A](file: Path)(implicit d: Decoder[A]): F[A] = readText[F](file).map(_.parseJsonAs[A]).rethrow diff --git a/modules/extract/src/main/scala/docspell/extract/ocr/Ocr.scala b/modules/extract/src/main/scala/docspell/extract/ocr/Ocr.scala index 6a476697..6c1b8a5b 100644 --- a/modules/extract/src/main/scala/docspell/extract/ocr/Ocr.scala +++ b/modules/extract/src/main/scala/docspell/extract/ocr/Ocr.scala @@ -80,8 +80,7 @@ object Ocr { ) SystemCommand .execSuccess(cmd, logger, wd = Some(wd), stdin = pdf) - .evalMap(_ => File.listJFiles(pathEndsWith(".tif"), wd)) - .flatMap(fs => Stream.emits(fs)) + .flatMap(_ => File.listFiles(pathEndsWith(".tif"), wd)) } /** Run ghostscript to extract all pdf pages into tiff files. The @@ -101,8 +100,7 @@ object Ocr { ) SystemCommand .execSuccess[F](cmd, logger, wd = Some(wd)) - .evalMap(_ => File.listJFiles(pathEndsWith(".tif"), wd)) - .flatMap(fs => Stream.emits(fs)) + .flatMap(_ => File.listFiles(pathEndsWith(".tif"), wd)) } private def pathEndsWith(ext: String): Path => Boolean =