diff --git a/modules/analysis/src/main/scala/docspell/analysis/NlpSettings.scala b/modules/analysis/src/main/scala/docspell/analysis/NlpSettings.scala index b3a2bff2..9801ab17 100644 --- a/modules/analysis/src/main/scala/docspell/analysis/NlpSettings.scala +++ b/modules/analysis/src/main/scala/docspell/analysis/NlpSettings.scala @@ -6,7 +6,7 @@ package docspell.analysis -import java.nio.file.Path +import fs2.io.file.Path import docspell.common._ diff --git a/modules/analysis/src/main/scala/docspell/analysis/classifier/ClassifierModel.scala b/modules/analysis/src/main/scala/docspell/analysis/classifier/ClassifierModel.scala index 81e54f10..55905da9 100644 --- a/modules/analysis/src/main/scala/docspell/analysis/classifier/ClassifierModel.scala +++ b/modules/analysis/src/main/scala/docspell/analysis/classifier/ClassifierModel.scala @@ -6,6 +6,6 @@ package docspell.analysis.classifier -import java.nio.file.Path +import fs2.io.file.Path case class ClassifierModel(model: Path) diff --git a/modules/analysis/src/main/scala/docspell/analysis/classifier/StanfordTextClassifier.scala b/modules/analysis/src/main/scala/docspell/analysis/classifier/StanfordTextClassifier.scala index a4a416c0..3c91fefd 100644 --- a/modules/analysis/src/main/scala/docspell/analysis/classifier/StanfordTextClassifier.scala +++ b/modules/analysis/src/main/scala/docspell/analysis/classifier/StanfordTextClassifier.scala @@ -6,13 +6,11 @@ package docspell.analysis.classifier -import java.nio.file.Path - import cats.effect.Ref import cats.effect._ import cats.implicits._ import fs2.Stream -import fs2.io.file.Files +import fs2.io.file.{Files, Path} import docspell.analysis.classifier import docspell.analysis.classifier.TextClassifier._ @@ -51,7 +49,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig) case Some(text) => Sync[F].delay { val cls = ColumnDataClassifier.getClassifier( - model.model.normalize().toAbsolutePath.toString + model.model.normalize.absolute.toString ) val cat = cls.classOf(cls.makeDatumFromLine("\t\t" + normalisedText(text))) Option(cat) @@ -71,8 +69,8 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig) _ <- logger.debug(s"Training classifier from $props") res <- Sync[F].delay { val cdc = new ColumnDataClassifier(Properties.fromMap(amendProps(in, props))) - cdc.trainClassifier(in.train.toString()) - val score = cdc.testClassifier(in.test.toString()) + cdc.trainClassifier(in.train.toString) + val score = cdc.testClassifier(in.test.toString) TrainResult(score.first(), classifier.ClassifierModel(in.modelFile)) } _ <- logger.debug(s"Trained with result $res") @@ -88,7 +86,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig) val fileLines = File .readAll[F](in.file, 4096) - .through(fs2.text.utf8Decode) + .through(fs2.text.utf8.decode) .through(fs2.text.lines) for { @@ -99,7 +97,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig) fileLines .take(nTest) .intersperse("\n") - .through(fs2.text.utf8Encode) + .through(fs2.text.utf8.encode) .through(Files[F].writeAll(td.test)) .compile .drain @@ -107,7 +105,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig) fileLines .drop(nTest) .intersperse("\n") - .through(fs2.text.utf8Encode) + .through(fs2.text.utf8.encode) .through(Files[F].writeAll(td.train)) .compile .drain @@ -124,7 +122,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig) .map(d => s"${d.cls}\t${fixRef(d.ref)}\t${normalisedText(d.text)}") .evalTap(_ => counter.update(_ + 1)) .intersperse("\r\n") - .through(fs2.text.utf8Encode) + .through(fs2.text.utf8.encode) .through(Files[F].writeAll(target)) .compile .drain diff --git a/modules/analysis/src/main/scala/docspell/analysis/classifier/TextClassifierConfig.scala b/modules/analysis/src/main/scala/docspell/analysis/classifier/TextClassifierConfig.scala index a27a7b15..05032af5 100644 --- a/modules/analysis/src/main/scala/docspell/analysis/classifier/TextClassifierConfig.scala +++ b/modules/analysis/src/main/scala/docspell/analysis/classifier/TextClassifierConfig.scala @@ -6,9 +6,8 @@ package docspell.analysis.classifier -import java.nio.file.Path - import cats.data.NonEmptyList +import fs2.io.file.Path case class TextClassifierConfig( workingDir: Path, diff --git a/modules/analysis/src/main/scala/docspell/analysis/nlp/Properties.scala b/modules/analysis/src/main/scala/docspell/analysis/nlp/Properties.scala index d2f942f3..591565d7 100644 --- a/modules/analysis/src/main/scala/docspell/analysis/nlp/Properties.scala +++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/Properties.scala @@ -6,9 +6,10 @@ package docspell.analysis.nlp -import java.nio.file.Path import java.util.{Properties => JProps} +import fs2.io.file.Path + import docspell.analysis.nlp.Properties.Implicits._ import docspell.common._ import docspell.common.syntax.FileSyntax._ diff --git a/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerAnnotator.scala b/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerAnnotator.scala index ad0ebc0f..e036e910 100644 --- a/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerAnnotator.scala +++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerAnnotator.scala @@ -6,11 +6,10 @@ package docspell.analysis.nlp -import java.nio.file.Path - import scala.jdk.CollectionConverters._ import cats.effect._ +import fs2.io.file.Path import docspell.common._ diff --git a/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerSettings.scala b/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerSettings.scala index 3ca25578..7fe8282f 100644 --- a/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerSettings.scala +++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerSettings.scala @@ -6,7 +6,7 @@ package docspell.analysis.nlp -import java.nio.file.Path +import fs2.io.file.Path import docspell.analysis.NlpSettings import docspell.common.Language.NLPLanguage diff --git a/modules/analysis/src/test/scala/docspell/analysis/classifier/StanfordTextClassifierSuite.scala b/modules/analysis/src/test/scala/docspell/analysis/classifier/StanfordTextClassifierSuite.scala index 87588041..f35338e0 100644 --- a/modules/analysis/src/test/scala/docspell/analysis/classifier/StanfordTextClassifierSuite.scala +++ b/modules/analysis/src/test/scala/docspell/analysis/classifier/StanfordTextClassifierSuite.scala @@ -24,7 +24,7 @@ class StanfordTextClassifierSuite extends FunSuite { val logger = Logger.log4s[IO](org.log4s.getLogger) test("learn from data") { - val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map())) + val cfg = TextClassifierConfig(File.path(Paths.get("target")), NonEmptyList.of(Map())) val data = Stream @@ -52,8 +52,8 @@ class StanfordTextClassifierSuite extends FunSuite { } test("run classifier") { - val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map())) - val things = File.withTempDir[IO](Paths.get("target"), "testcls") + val cfg = TextClassifierConfig(File.path(Paths.get("target")), NonEmptyList.of(Map())) + val things = File.withTempDir[IO](File.path(Paths.get("target")), "testcls") things .use { dir => diff --git a/modules/analysis/src/test/scala/docspell/analysis/nlp/StanfordNerAnnotatorSuite.scala b/modules/analysis/src/test/scala/docspell/analysis/nlp/StanfordNerAnnotatorSuite.scala index 1e6463bd..718ad102 100644 --- a/modules/analysis/src/test/scala/docspell/analysis/nlp/StanfordNerAnnotatorSuite.scala +++ b/modules/analysis/src/test/scala/docspell/analysis/nlp/StanfordNerAnnotatorSuite.scala @@ -13,7 +13,6 @@ import cats.effect.unsafe.implicits.global import docspell.analysis.Env import docspell.common._ -import docspell.common.syntax.FileSyntax._ import docspell.files.TestFiles import edu.stanford.nlp.pipeline.StanfordCoreNLP @@ -100,7 +99,7 @@ class StanfordNerAnnotatorSuite extends FunSuite { |""".stripMargin File - .withTempDir[IO](Paths.get("target"), "test-regex-ner") + .withTempDir[IO](File.path(Paths.get("target")), "test-regex-ner") .use { dir => for { out <- File.writeString[IO](dir / "regex.txt", regexNerContent) diff --git a/modules/common/src/main/scala/docspell/common/Binary.scala b/modules/common/src/main/scala/docspell/common/Binary.scala index 67693b56..eff042a6 100644 --- a/modules/common/src/main/scala/docspell/common/Binary.scala +++ b/modules/common/src/main/scala/docspell/common/Binary.scala @@ -29,7 +29,7 @@ object Binary { Binary[F]( name, MimeType.octetStream, - Stream.emit(content).through(fs2.text.utf8Encode) + Stream.emit(content).through(fs2.text.utf8.encode) ) def text[F[_]](name: String, content: String): Binary[F] = @@ -46,7 +46,7 @@ object Binary { def decode[F[_]](cs: Charset): Pipe[F, Byte, String] = if (cs == StandardCharsets.UTF_8) - fs2.text.utf8Decode + fs2.text.utf8.decode else util.decode[F](cs) diff --git a/modules/common/src/main/scala/docspell/common/File.scala b/modules/common/src/main/scala/docspell/common/File.scala index 9dd2c3e4..2f9b1420 100644 --- a/modules/common/src/main/scala/docspell/common/File.scala +++ b/modules/common/src/main/scala/docspell/common/File.scala @@ -6,14 +6,14 @@ package docspell.common -import java.nio.file.Path +import java.nio.file.{Path => JPath} import cats.FlatMap import cats.Monad import cats.effect._ import cats.implicits._ import fs2.Stream -import fs2.io.file.Files +import fs2.io.file.{Files, Flags, Path} import docspell.common.syntax.all._ @@ -21,7 +21,9 @@ import io.circe.Decoder object File { - def mkDir[F[_]: Files](dir: Path): F[Path] = + def path(jp: JPath): Path = Path.fromNioPath(jp) + + def mkDir[F[_]: Files](dir: Path): F[Unit] = Files[F].createDirectories(dir) def exists[F[_]: Files](file: Path): F[Boolean] = @@ -37,31 +39,36 @@ object File { for { isDir <- Files[F].isDirectory(path) _ <- - if (isDir) Files[F].deleteDirectoryRecursively(path) + if (isDir) Files[F].deleteRecursively(path) else Files[F].deleteIfExists(path) } yield () def withTempDir[F[_]: Files](parent: Path, prefix: String): Resource[F, Path] = Resource .eval(mkDir[F](parent)) - .flatMap(_ => Files[F].tempDirectory(parent.some, prefix)) + .flatMap(_ => Files[F].tempDirectory(parent.some, prefix, None)) def listFiles[F[_]: Files](pred: Path => Boolean, dir: Path): Stream[F, Path] = - Files[F].directoryStream(dir, pred) + Files[F].list(dir).filter(pred) def readAll[F[_]: Files]( file: Path, chunkSize: Int ): Stream[F, Byte] = - Files[F].readAll(file, chunkSize) + Files[F].readAll(file, chunkSize, Flags.Read) + + def readAll[F[_]: Files]( + file: Path + ): Stream[F, Byte] = + Files[F].readAll(file) def readText[F[_]: Files: Concurrent](file: Path): F[String] = - readAll[F](file, 8192).through(fs2.text.utf8Decode).compile.foldMonoid + readAll[F](file, 8192).through(fs2.text.utf8.decode).compile.foldMonoid def writeString[F[_]: Files: Concurrent](file: Path, content: String): F[Path] = Stream .emit(content) - .through(fs2.text.utf8Encode) + .through(fs2.text.utf8.encode) .through(Files[F].writeAll(file)) .compile .drain diff --git a/modules/common/src/main/scala/docspell/common/LenientUri.scala b/modules/common/src/main/scala/docspell/common/LenientUri.scala index 1cc4cb90..ae0b135c 100644 --- a/modules/common/src/main/scala/docspell/common/LenientUri.scala +++ b/modules/common/src/main/scala/docspell/common/LenientUri.scala @@ -82,7 +82,7 @@ case class LenientUri( ) def readText[F[_]: Sync](chunkSize: Int): F[String] = - readURL[F](chunkSize).through(fs2.text.utf8Decode).compile.foldMonoid + readURL[F](chunkSize).through(fs2.text.utf8.decode).compile.foldMonoid def host: Option[String] = authority.map(a => diff --git a/modules/common/src/main/scala/docspell/common/SystemCommand.scala b/modules/common/src/main/scala/docspell/common/SystemCommand.scala index 3af39b57..5abef822 100644 --- a/modules/common/src/main/scala/docspell/common/SystemCommand.scala +++ b/modules/common/src/main/scala/docspell/common/SystemCommand.scala @@ -8,13 +8,13 @@ package docspell.common import java.io.InputStream import java.lang.ProcessBuilder.Redirect -import java.nio.file.Path import java.util.concurrent.TimeUnit import scala.jdk.CollectionConverters._ import cats.effect._ import cats.implicits._ +import fs2.io.file.Path import fs2.{Stream, io, text} object SystemCommand { @@ -102,7 +102,7 @@ object SystemCommand { .redirectError(Redirect.PIPE) .redirectOutput(Redirect.PIPE) - wd.map(_.toFile).foreach(pb.directory) + wd.map(_.toNioPath.toFile).foreach(pb.directory) pb.start() } ) @@ -115,7 +115,7 @@ object SystemCommand { private def inputStreamToString[F[_]: Sync](in: InputStream): F[String] = io.readInputStream(Sync[F].pure(in), 16 * 1024, closeAfterUse = false) - .through(text.utf8Decode) + .through(text.utf8.decode) .chunks .map(_.toVector.mkString) .fold1(_ + _) diff --git a/modules/common/src/main/scala/docspell/common/config/Implicits.scala b/modules/common/src/main/scala/docspell/common/config/Implicits.scala index 436deaec..c294cb66 100644 --- a/modules/common/src/main/scala/docspell/common/config/Implicits.scala +++ b/modules/common/src/main/scala/docspell/common/config/Implicits.scala @@ -6,8 +6,12 @@ package docspell.common.config +import java.nio.file.{Path => JPath} + import scala.reflect.ClassTag +import fs2.io.file.Path + import docspell.common._ import com.github.eikek.calev.CalEvent @@ -16,6 +20,10 @@ import pureconfig.error.{CannotConvert, FailureReason} import scodec.bits.ByteVector object Implicits { + + implicit val pathReader: ConfigReader[Path] = + ConfigReader[JPath].map(Path.fromNioPath) + implicit val lenientUriReader: ConfigReader[LenientUri] = ConfigReader[String].emap(reason(LenientUri.parse)) diff --git a/modules/common/src/main/scala/docspell/common/syntax/FileSyntax.scala b/modules/common/src/main/scala/docspell/common/syntax/FileSyntax.scala index 63b2a928..1c985710 100644 --- a/modules/common/src/main/scala/docspell/common/syntax/FileSyntax.scala +++ b/modules/common/src/main/scala/docspell/common/syntax/FileSyntax.scala @@ -6,20 +6,17 @@ package docspell.common.syntax -import java.nio.file.Path +import fs2.io.file.Path trait FileSyntax { implicit final class PathOps(p: Path) { def absolutePath: Path = - p.normalize().toAbsolutePath + p.absolute def absolutePathAsString: String = absolutePath.toString - - def /(next: String): Path = - p.resolve(next) } } diff --git a/modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala b/modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala index ba248c10..131ec564 100644 --- a/modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala +++ b/modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala @@ -6,11 +6,9 @@ package docspell.convert.extern -import java.nio.file.Path - import cats.effect._ import cats.implicits._ -import fs2.io.file.Files +import fs2.io.file.{Files, Path} import fs2.{Pipe, Stream} import docspell.common._ @@ -30,8 +28,8 @@ private[extern] object ExternConv { Stream .resource(File.withTempDir[F](wd, s"docspell-$name")) .flatMap { dir => - val inFile = dir.resolve("infile").toAbsolutePath.normalize - val out = dir.resolve("out.pdf").toAbsolutePath.normalize + val inFile = dir.resolve("infile").absolute.normalize + val out = dir.resolve("out.pdf").absolute.normalize val sysCfg = cmdCfg.replace( Map( @@ -77,7 +75,7 @@ private[extern] object ExternConv { )(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] = File.existsNonEmpty[F](out).flatMap { case true if result.rc == 0 => - val outTxt = out.resolveSibling(out.getFileName.toString + ".txt") + val outTxt = out.resolveSibling(out.fileName.toString + ".txt") File.existsNonEmpty[F](outTxt).flatMap { case true => successPdfTxt( diff --git a/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdf.scala b/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdf.scala index cab17c1f..3510ce64 100644 --- a/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdf.scala +++ b/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdf.scala @@ -6,10 +6,9 @@ package docspell.convert.extern -import java.nio.file.Path - import cats.effect._ import fs2.Stream +import fs2.io.file.Path import docspell.common._ import docspell.convert.ConversionResult diff --git a/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdfConfig.scala b/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdfConfig.scala index 2b78c65c..ed5d2335 100644 --- a/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdfConfig.scala +++ b/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdfConfig.scala @@ -6,7 +6,7 @@ package docspell.convert.extern -import java.nio.file.Path +import fs2.io.file.Path import docspell.common.SystemCommand diff --git a/modules/convert/src/main/scala/docspell/convert/extern/Tesseract.scala b/modules/convert/src/main/scala/docspell/convert/extern/Tesseract.scala index 296d32a7..302cf63d 100644 --- a/modules/convert/src/main/scala/docspell/convert/extern/Tesseract.scala +++ b/modules/convert/src/main/scala/docspell/convert/extern/Tesseract.scala @@ -6,10 +6,9 @@ package docspell.convert.extern -import java.nio.file.Path - import cats.effect._ import fs2.Stream +import fs2.io.file.Path import docspell.common._ import docspell.convert.ConversionResult diff --git a/modules/convert/src/main/scala/docspell/convert/extern/TesseractConfig.scala b/modules/convert/src/main/scala/docspell/convert/extern/TesseractConfig.scala index e3a18524..22bf30cc 100644 --- a/modules/convert/src/main/scala/docspell/convert/extern/TesseractConfig.scala +++ b/modules/convert/src/main/scala/docspell/convert/extern/TesseractConfig.scala @@ -6,7 +6,7 @@ package docspell.convert.extern -import java.nio.file.Path +import fs2.io.file.Path import docspell.common.SystemCommand diff --git a/modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala b/modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala index 916ea353..c765b29d 100644 --- a/modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala +++ b/modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala @@ -6,10 +6,9 @@ package docspell.convert.extern -import java.nio.file.Path - import cats.effect._ import fs2.Stream +import fs2.io.file.Path import docspell.common._ import docspell.convert.ConversionResult diff --git a/modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala b/modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala index bbbcb5ab..bd3c5044 100644 --- a/modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala +++ b/modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala @@ -6,7 +6,7 @@ package docspell.convert.extern -import java.nio.file.Path +import fs2.io.file.Path import docspell.common.SystemCommand diff --git a/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdf.scala b/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdf.scala index 539b9dfe..e94c626f 100644 --- a/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdf.scala +++ b/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdf.scala @@ -7,10 +7,10 @@ package docspell.convert.extern import java.nio.charset.Charset -import java.nio.file.Path import cats.effect._ import cats.implicits._ +import fs2.io.file.Path import fs2.{Chunk, Stream} import docspell.common._ diff --git a/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdfConfig.scala b/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdfConfig.scala index 41392ff0..68b02b43 100644 --- a/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdfConfig.scala +++ b/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdfConfig.scala @@ -6,7 +6,7 @@ package docspell.convert.extern -import java.nio.file.Path +import fs2.io.file.Path import docspell.common.SystemCommand diff --git a/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala b/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala index e5d330b6..ebe57b9c 100644 --- a/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala +++ b/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala @@ -26,7 +26,7 @@ import munit._ class ConversionTest extends FunSuite with FileChecks { val logger = Logger.log4s[IO](org.log4s.getLogger) - val target = Paths.get("target") + val target = File.path(Paths.get("target")) val convertConfig = ConvertConfig( 8192, diff --git a/modules/convert/src/test/scala/docspell/convert/FileChecks.scala b/modules/convert/src/test/scala/docspell/convert/FileChecks.scala index a46375bb..a8410160 100644 --- a/modules/convert/src/test/scala/docspell/convert/FileChecks.scala +++ b/modules/convert/src/test/scala/docspell/convert/FileChecks.scala @@ -7,14 +7,15 @@ package docspell.convert import java.nio.charset.StandardCharsets -import java.nio.file.{Files, Path} +import java.nio.file.Files import cats.data.Kleisli import cats.effect.IO import cats.effect.unsafe.implicits.global +import fs2.io.file.Path import fs2.{Pipe, Stream} -import docspell.common.MimeType +import docspell.common._ import docspell.convert.ConversionResult.Handler import docspell.files.TikaMimetype @@ -23,7 +24,7 @@ trait FileChecks { implicit class FileCheckOps(p: Path) { def isNonEmpty: Boolean = - Files.exists(p) && Files.size(p) > 0 + Files.exists(p.toNioPath) && Files.size(p.toNioPath) > 0 def isType(mime: MimeType): Boolean = TikaMimetype.detect[IO](p).map(_ == mime).unsafeRunSync() @@ -36,7 +37,12 @@ trait FileChecks { } def storeFile(file: Path): Pipe[IO, Byte, Path] = - in => Stream.eval(in.compile.to(Array).flatMap(bytes => IO(Files.write(file, bytes)))) + in => + Stream + .eval( + in.compile.to(Array).flatMap(bytes => IO(Files.write(file.toNioPath, bytes))) + ) + .map(p => File.path(p)) def storePdfHandler(file: Path): Handler[IO, Path] = storePdfTxtHandler(file, file.resolveSibling("unexpected.txt")).map(_._1) @@ -47,8 +53,8 @@ trait FileChecks { for { pout <- pdf.through(storeFile(filePdf)).compile.lastOrError str <- txt - tout <- IO(Files.write(fileTxt, str.getBytes(StandardCharsets.UTF_8))) - } yield (pout, tout) + tout <- IO(Files.write(fileTxt.toNioPath, str.getBytes(StandardCharsets.UTF_8))) + } yield (pout, File.path(tout)) case ConversionResult.SuccessPdf(pdf) => pdf.through(storeFile(filePdf)).compile.lastOrError.map(p => (p, fileTxt)) diff --git a/modules/convert/src/test/scala/docspell/convert/extern/ExternConvTest.scala b/modules/convert/src/test/scala/docspell/convert/extern/ExternConvTest.scala index 77c97d3d..59b73031 100644 --- a/modules/convert/src/test/scala/docspell/convert/extern/ExternConvTest.scala +++ b/modules/convert/src/test/scala/docspell/convert/extern/ExternConvTest.scala @@ -7,10 +7,11 @@ package docspell.convert.extern import java.nio.charset.StandardCharsets -import java.nio.file.{Path, Paths} +import java.nio.file.Paths import cats.effect._ import cats.effect.unsafe.implicits.global +import fs2.io.file.Path import docspell.common._ import docspell.convert._ @@ -21,7 +22,7 @@ import munit._ class ExternConvTest extends FunSuite with FileChecks { val utf8 = StandardCharsets.UTF_8 val logger = Logger.log4s[IO](org.log4s.getLogger) - val target = Paths.get("target") + val target = File.path(Paths.get("target")) test("convert html to pdf") { val cfg = SystemCommand.Config( diff --git a/modules/extract/src/main/scala/docspell/extract/ocr/Ocr.scala b/modules/extract/src/main/scala/docspell/extract/ocr/Ocr.scala index 7c0fa1c4..fcf5836d 100644 --- a/modules/extract/src/main/scala/docspell/extract/ocr/Ocr.scala +++ b/modules/extract/src/main/scala/docspell/extract/ocr/Ocr.scala @@ -6,10 +6,9 @@ package docspell.extract.ocr -import java.nio.file.Path - import cats.effect._ import fs2.Stream +import fs2.io.file.Path import docspell.common._ @@ -100,7 +99,7 @@ object Ocr { ): Stream[F, Path] = { val cmd = ghostscript.replace( Map( - "{{infile}}" -> pdf.toAbsolutePath.toString, + "{{infile}}" -> pdf.absolute.toString, "{{outfile}}" -> "%d.tif" ) ) @@ -110,7 +109,7 @@ object Ocr { } private def pathEndsWith(ext: String): Path => Boolean = - p => p.getFileName.toString.endsWith(ext) + p => p.fileName.toString.endsWith(ext) /** Run unpaper to optimize the image for ocr. The * files are stored to a temporary location on disk and returned. @@ -118,18 +117,18 @@ object Ocr { private[extract] def runUnpaperFile[F[_]: Async]( img: Path, unpaper: SystemCommand.Config, - wd: Path, + wd: Option[Path], logger: Logger[F] ): Stream[F, Path] = { - val targetFile = img.resolveSibling("u-" + img.getFileName.toString).toAbsolutePath + val targetFile = img.resolveSibling("u-" + img.fileName.toString).absolute val cmd = unpaper.replace( Map( - "{{infile}}" -> img.toAbsolutePath.toString, + "{{infile}}" -> img.absolute.toString, "{{outfile}}" -> targetFile.toString ) ) SystemCommand - .execSuccess[F](cmd, logger, wd = Some(wd)) + .execSuccess[F](cmd, logger, wd = wd) .map(_ => targetFile) .handleErrorWith { th => logger @@ -151,13 +150,13 @@ object Ocr { ): Stream[F, String] = // tesseract cannot cope with absolute filenames // so use the parent as working dir - runUnpaperFile(img, config.unpaper.command, img.getParent, logger).flatMap { uimg => + runUnpaperFile(img, config.unpaper.command, img.parent, logger).flatMap { uimg => val cmd = config.tesseract.command .replace( - Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang)) + Map("{{file}}" -> uimg.fileName.toString, "{{lang}}" -> fixLanguage(lang)) ) SystemCommand - .execSuccess[F](cmd, logger, wd = Some(uimg.getParent)) + .execSuccess[F](cmd, logger, wd = uimg.parent) .map(_.stdout) } diff --git a/modules/extract/src/main/scala/docspell/extract/ocr/OcrConfig.scala b/modules/extract/src/main/scala/docspell/extract/ocr/OcrConfig.scala index 0f6e0e87..256b1ff5 100644 --- a/modules/extract/src/main/scala/docspell/extract/ocr/OcrConfig.scala +++ b/modules/extract/src/main/scala/docspell/extract/ocr/OcrConfig.scala @@ -6,7 +6,9 @@ package docspell.extract.ocr -import java.nio.file.{Path, Paths} +import java.nio.file.Paths + +import fs2.io.file.Path import docspell.common._ @@ -44,7 +46,9 @@ object OcrConfig { ), Duration.seconds(30) ), - Paths.get(System.getProperty("java.io.tmpdir")).resolve("docspell-extraction") + File.path( + Paths.get(System.getProperty("java.io.tmpdir")).resolve("docspell-extraction") + ) ), unpaper = Unpaper( SystemCommand diff --git a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala index 9bd2fddc..1740f3b5 100644 --- a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala +++ b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala @@ -7,13 +7,13 @@ package docspell.extract.pdfbox import java.io.InputStream -import java.nio.file.Path import scala.util.{Try, Using} import cats.effect.Sync import cats.implicits._ import fs2.Stream +import fs2.io.file.Path import docspell.common.Timestamp import docspell.extract.internal.Text @@ -48,7 +48,7 @@ object PdfboxExtract { Using(PDDocument.load(is))(readText).toEither.flatten def getText(inFile: Path): Either[Throwable, Text] = - Using(PDDocument.load(inFile.toFile))(readText).toEither.flatten + Using(PDDocument.load(inFile.toNioPath.toFile))(readText).toEither.flatten private def readText(doc: PDDocument): Either[Throwable, Text] = Try { @@ -68,7 +68,7 @@ object PdfboxExtract { Using(PDDocument.load(is))(readMetaData).toEither.flatten def getMetaData(inFile: Path): Either[Throwable, PdfMetaData] = - Using(PDDocument.load(inFile.toFile))(readMetaData).toEither.flatten + Using(PDDocument.load(inFile.toNioPath.toFile))(readMetaData).toEither.flatten private def readMetaData(doc: PDDocument): Either[Throwable, PdfMetaData] = Try { diff --git a/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxPreviewTest.scala b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxPreviewTest.scala index 3c7a4830..ddafa271 100644 --- a/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxPreviewTest.scala +++ b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxPreviewTest.scala @@ -6,12 +6,11 @@ package docspell.extract.pdfbox -import java.nio.file.Path - import cats.effect._ import cats.effect.unsafe.implicits.global import fs2.Stream import fs2.io.file.Files +import fs2.io.file.Path import docspell.files.ExampleFiles diff --git a/modules/files/src/main/scala/docspell/files/ImageSize.scala b/modules/files/src/main/scala/docspell/files/ImageSize.scala index 20046fc9..667fc703 100644 --- a/modules/files/src/main/scala/docspell/files/ImageSize.scala +++ b/modules/files/src/main/scala/docspell/files/ImageSize.scala @@ -7,7 +7,6 @@ package docspell.files import java.io.{ByteArrayInputStream, InputStream} -import java.nio.file.Path import javax.imageio.stream.{FileImageInputStream, ImageInputStream} import javax.imageio.{ImageIO, ImageReader} @@ -17,6 +16,7 @@ import scala.util.{Try, Using} import cats.effect._ import cats.implicits._ import fs2.Stream +import fs2.io.file.Path object ImageSize { @@ -24,7 +24,7 @@ object ImageSize { * the whole image into memory. */ def get(file: Path): Option[Dimension] = - Using(new FileImageInputStream(file.toFile))(getDimension).toOption.flatten + Using(new FileImageInputStream(file.toNioPath.toFile))(getDimension).toOption.flatten /** Return the image size from its header without reading * the whole image into memory. diff --git a/modules/files/src/main/scala/docspell/files/TikaMimetype.scala b/modules/files/src/main/scala/docspell/files/TikaMimetype.scala index f9bd0912..1ac0adf7 100644 --- a/modules/files/src/main/scala/docspell/files/TikaMimetype.scala +++ b/modules/files/src/main/scala/docspell/files/TikaMimetype.scala @@ -8,7 +8,7 @@ package docspell.files import java.io.BufferedInputStream import java.nio.charset.Charset -import java.nio.file.{Files, Path} +import java.nio.file.Files import scala.jdk.CollectionConverters._ import scala.util.Using @@ -16,6 +16,7 @@ import scala.util.Using import cats.effect.Sync import cats.implicits._ import fs2.Stream +import fs2.io.file.Path import docspell.common._ @@ -100,8 +101,8 @@ object TikaMimetype { def detect[F[_]: Sync](file: Path): F[MimeType] = Sync[F].delay { - val hint = MimeTypeHint.filename(file.getFileName.toString) - Using(new BufferedInputStream(Files.newInputStream(file), 64)) { in => + val hint = MimeTypeHint.filename(file.fileName.toString) + Using(new BufferedInputStream(Files.newInputStream(file.toNioPath), 64)) { in => convert(tika.detect(in, makeMetadata(hint))) }.toEither }.rethrow diff --git a/modules/joex/src/main/scala/docspell/joex/Config.scala b/modules/joex/src/main/scala/docspell/joex/Config.scala index 69746793..05326c0d 100644 --- a/modules/joex/src/main/scala/docspell/joex/Config.scala +++ b/modules/joex/src/main/scala/docspell/joex/Config.scala @@ -6,9 +6,8 @@ package docspell.joex -import java.nio.file.Path - import cats.data.NonEmptyList +import fs2.io.file.Path import docspell.analysis.TextAnalysisConfig import docspell.analysis.classifier.TextClassifierConfig diff --git a/modules/joex/src/main/scala/docspell/joex/analysis/NerFile.scala b/modules/joex/src/main/scala/docspell/joex/analysis/NerFile.scala index 1851f5aa..2c57447f 100644 --- a/modules/joex/src/main/scala/docspell/joex/analysis/NerFile.scala +++ b/modules/joex/src/main/scala/docspell/joex/analysis/NerFile.scala @@ -6,10 +6,9 @@ package docspell.joex.analysis -import java.nio.file.Path - import cats.effect._ import cats.implicits._ +import fs2.io.file.Path import docspell.analysis.split.TextSplitter import docspell.common._ diff --git a/modules/joex/src/main/scala/docspell/joex/analysis/RegexNerFile.scala b/modules/joex/src/main/scala/docspell/joex/analysis/RegexNerFile.scala index b3ec75cd..e2a8d78f 100644 --- a/modules/joex/src/main/scala/docspell/joex/analysis/RegexNerFile.scala +++ b/modules/joex/src/main/scala/docspell/joex/analysis/RegexNerFile.scala @@ -6,11 +6,10 @@ package docspell.joex.analysis -import java.nio.file.Path - import cats.effect._ import cats.effect.std.Semaphore import cats.implicits._ +import fs2.io.file.Path import docspell.common._ import docspell.common.syntax.all._ @@ -112,8 +111,11 @@ object RegexNerFile { writer.permit.use(_ => for { file <- Sync[F].pure(nf.jsonFilePath(cfg.directory)) - _ <- File.mkDir(file.getParent) - _ <- File.writeString(file, nf.copy(creation = now).asJson.spaces2) + _ <- file.parent match { + case Some(p) => File.mkDir(p) + case None => ().pure[F] + } + _ <- File.writeString(file, nf.copy(creation = now).asJson.spaces2) } yield () ) @@ -129,7 +131,10 @@ object RegexNerFile { _ <- logger.fdebug( s"Writing custom NER file for collective '${collective.id}'" ) - _ <- File.mkDir(jsonFile.getParent) + _ <- jsonFile.parent match { + case Some(p) => File.mkDir(p) + case None => ().pure[F] + } _ <- File.writeString(nf.nerFilePath(cfg.directory), text) _ <- File.writeString(jsonFile, nf.asJson.spaces2) } yield () diff --git a/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala b/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala index 0775e78e..2e07dd9f 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala @@ -6,12 +6,11 @@ package docspell.joex.learn -import java.nio.file.Path - import cats.data.OptionT import cats.effect._ import cats.implicits._ import fs2.io.file.Files +import fs2.io.file.Path import docspell.analysis.classifier.{ClassifierModel, TextClassifier} import docspell.common._ diff --git a/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala b/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala index e949787b..63a9c667 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala @@ -42,7 +42,7 @@ object StoreClassifierModel { RClassifierModel.findByName(collective, modelName.name).map(_.map(_.fileId)) ) _ <- logger.debug(s"Storing new trained model for: ${modelName.name}") - fileData = Files[F].readAll(trainedModel.model, 4096) + fileData = Files[F].readAll(trainedModel.model) newFile <- store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError _ <- store.transact( diff --git a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala index 73ffc0d0..2506a5ff 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala @@ -309,7 +309,7 @@ trait Conversions { ): F[UploadData[F]] = { def parseMeta(body: Stream[F, Byte]): F[ItemUploadMeta] = body - .through(fs2.text.utf8Decode) + .through(fs2.text.utf8.decode) .parseJsonAs[ItemUploadMeta] .map( _.fold( diff --git a/modules/restserver/src/main/scala/docspell/restserver/http4s/Responses.scala b/modules/restserver/src/main/scala/docspell/restserver/http4s/Responses.scala index a13a0e7d..85834a85 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/http4s/Responses.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/http4s/Responses.scala @@ -9,7 +9,7 @@ package docspell.restserver.http4s import cats.data.NonEmptyList import cats.data.OptionT import cats.effect.Sync -import fs2.text.utf8Encode +import fs2.text.utf8 import fs2.{Pure, Stream} import org.http4s._ @@ -20,14 +20,14 @@ object Responses { private[this] val pureForbidden: Response[Pure] = Response( Status.Forbidden, - body = Stream("Forbidden").through(utf8Encode), + body = Stream("Forbidden").through(utf8.encode), headers = Headers(`Content-Type`(MediaType.text.plain, Charset.`UTF-8`) :: Nil) ) private[this] val pureUnauthorized: Response[Pure] = Response( Status.Unauthorized, - body = Stream("Unauthorized").through(utf8Encode), + body = Stream("Unauthorized").through(utf8.encode), headers = Headers(`Content-Type`(MediaType.text.plain, Charset.`UTF-8`) :: Nil) ) diff --git a/modules/restserver/src/main/scala/docspell/restserver/webapp/TemplateRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/webapp/TemplateRoutes.scala index 24923369..ae68f909 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/webapp/TemplateRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/webapp/TemplateRoutes.scala @@ -92,7 +92,7 @@ object TemplateRoutes { Stream .bracket(Sync[F].delay(url.openStream))(in => Sync[F].delay(in.close())) .flatMap(in => fs2.io.readInputStream(in.pure[F], 64 * 1024, false)) - .through(text.utf8Decode) + .through(text.utf8.decode) .compile .fold("")(_ + _) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 4b5ebbf4..58eb5bf2 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -16,10 +16,10 @@ object Dependencies { val EmilVersion = "0.10.0-M2" val FlexmarkVersion = "0.62.2" val FlywayVersion = "7.12.1" - val Fs2Version = "3.0.6" + val Fs2Version = "3.1.0" val Fs2CronVersion = "0.7.1" val H2Version = "1.4.200" - val Http4sVersion = "0.23.0" + val Http4sVersion = "0.23.1" val Icu4jVersion = "69.1" val JsoupVersion = "1.14.1" val KindProjectorVersion = "0.10.3"