mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-04 10:29:34 +00:00
Adopt deprecated APIs from fs2; use fs2.Path
This commit is contained in:
parent
f92aeb6a0f
commit
1901fe1a8c
@ -6,7 +6,7 @@
|
||||
|
||||
package docspell.analysis
|
||||
|
||||
import java.nio.file.Path
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common._
|
||||
|
||||
|
@ -6,6 +6,6 @@
|
||||
|
||||
package docspell.analysis.classifier
|
||||
|
||||
import java.nio.file.Path
|
||||
import fs2.io.file.Path
|
||||
|
||||
case class ClassifierModel(model: Path)
|
||||
|
@ -6,13 +6,11 @@
|
||||
|
||||
package docspell.analysis.classifier
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect.Ref
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import fs2.Stream
|
||||
import fs2.io.file.Files
|
||||
import fs2.io.file.{Files, Path}
|
||||
|
||||
import docspell.analysis.classifier
|
||||
import docspell.analysis.classifier.TextClassifier._
|
||||
@ -51,7 +49,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
|
||||
case Some(text) =>
|
||||
Sync[F].delay {
|
||||
val cls = ColumnDataClassifier.getClassifier(
|
||||
model.model.normalize().toAbsolutePath.toString
|
||||
model.model.normalize.absolute.toString
|
||||
)
|
||||
val cat = cls.classOf(cls.makeDatumFromLine("\t\t" + normalisedText(text)))
|
||||
Option(cat)
|
||||
@ -71,8 +69,8 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
|
||||
_ <- logger.debug(s"Training classifier from $props")
|
||||
res <- Sync[F].delay {
|
||||
val cdc = new ColumnDataClassifier(Properties.fromMap(amendProps(in, props)))
|
||||
cdc.trainClassifier(in.train.toString())
|
||||
val score = cdc.testClassifier(in.test.toString())
|
||||
cdc.trainClassifier(in.train.toString)
|
||||
val score = cdc.testClassifier(in.test.toString)
|
||||
TrainResult(score.first(), classifier.ClassifierModel(in.modelFile))
|
||||
}
|
||||
_ <- logger.debug(s"Trained with result $res")
|
||||
@ -88,7 +86,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
|
||||
val fileLines =
|
||||
File
|
||||
.readAll[F](in.file, 4096)
|
||||
.through(fs2.text.utf8Decode)
|
||||
.through(fs2.text.utf8.decode)
|
||||
.through(fs2.text.lines)
|
||||
|
||||
for {
|
||||
@ -99,7 +97,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
|
||||
fileLines
|
||||
.take(nTest)
|
||||
.intersperse("\n")
|
||||
.through(fs2.text.utf8Encode)
|
||||
.through(fs2.text.utf8.encode)
|
||||
.through(Files[F].writeAll(td.test))
|
||||
.compile
|
||||
.drain
|
||||
@ -107,7 +105,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
|
||||
fileLines
|
||||
.drop(nTest)
|
||||
.intersperse("\n")
|
||||
.through(fs2.text.utf8Encode)
|
||||
.through(fs2.text.utf8.encode)
|
||||
.through(Files[F].writeAll(td.train))
|
||||
.compile
|
||||
.drain
|
||||
@ -124,7 +122,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
|
||||
.map(d => s"${d.cls}\t${fixRef(d.ref)}\t${normalisedText(d.text)}")
|
||||
.evalTap(_ => counter.update(_ + 1))
|
||||
.intersperse("\r\n")
|
||||
.through(fs2.text.utf8Encode)
|
||||
.through(fs2.text.utf8.encode)
|
||||
.through(Files[F].writeAll(target))
|
||||
.compile
|
||||
.drain
|
||||
|
@ -6,9 +6,8 @@
|
||||
|
||||
package docspell.analysis.classifier
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
import fs2.io.file.Path
|
||||
|
||||
case class TextClassifierConfig(
|
||||
workingDir: Path,
|
||||
|
@ -6,9 +6,10 @@
|
||||
|
||||
package docspell.analysis.nlp
|
||||
|
||||
import java.nio.file.Path
|
||||
import java.util.{Properties => JProps}
|
||||
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.analysis.nlp.Properties.Implicits._
|
||||
import docspell.common._
|
||||
import docspell.common.syntax.FileSyntax._
|
||||
|
@ -6,11 +6,10 @@
|
||||
|
||||
package docspell.analysis.nlp
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import scala.jdk.CollectionConverters._
|
||||
|
||||
import cats.effect._
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common._
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
package docspell.analysis.nlp
|
||||
|
||||
import java.nio.file.Path
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.analysis.NlpSettings
|
||||
import docspell.common.Language.NLPLanguage
|
||||
|
@ -24,7 +24,7 @@ class StanfordTextClassifierSuite extends FunSuite {
|
||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||
|
||||
test("learn from data") {
|
||||
val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map()))
|
||||
val cfg = TextClassifierConfig(File.path(Paths.get("target")), NonEmptyList.of(Map()))
|
||||
|
||||
val data =
|
||||
Stream
|
||||
@ -52,8 +52,8 @@ class StanfordTextClassifierSuite extends FunSuite {
|
||||
}
|
||||
|
||||
test("run classifier") {
|
||||
val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map()))
|
||||
val things = File.withTempDir[IO](Paths.get("target"), "testcls")
|
||||
val cfg = TextClassifierConfig(File.path(Paths.get("target")), NonEmptyList.of(Map()))
|
||||
val things = File.withTempDir[IO](File.path(Paths.get("target")), "testcls")
|
||||
|
||||
things
|
||||
.use { dir =>
|
||||
|
@ -13,7 +13,6 @@ import cats.effect.unsafe.implicits.global
|
||||
|
||||
import docspell.analysis.Env
|
||||
import docspell.common._
|
||||
import docspell.common.syntax.FileSyntax._
|
||||
import docspell.files.TestFiles
|
||||
|
||||
import edu.stanford.nlp.pipeline.StanfordCoreNLP
|
||||
@ -100,7 +99,7 @@ class StanfordNerAnnotatorSuite extends FunSuite {
|
||||
|""".stripMargin
|
||||
|
||||
File
|
||||
.withTempDir[IO](Paths.get("target"), "test-regex-ner")
|
||||
.withTempDir[IO](File.path(Paths.get("target")), "test-regex-ner")
|
||||
.use { dir =>
|
||||
for {
|
||||
out <- File.writeString[IO](dir / "regex.txt", regexNerContent)
|
||||
|
@ -29,7 +29,7 @@ object Binary {
|
||||
Binary[F](
|
||||
name,
|
||||
MimeType.octetStream,
|
||||
Stream.emit(content).through(fs2.text.utf8Encode)
|
||||
Stream.emit(content).through(fs2.text.utf8.encode)
|
||||
)
|
||||
|
||||
def text[F[_]](name: String, content: String): Binary[F] =
|
||||
@ -46,7 +46,7 @@ object Binary {
|
||||
|
||||
def decode[F[_]](cs: Charset): Pipe[F, Byte, String] =
|
||||
if (cs == StandardCharsets.UTF_8)
|
||||
fs2.text.utf8Decode
|
||||
fs2.text.utf8.decode
|
||||
else
|
||||
util.decode[F](cs)
|
||||
|
||||
|
@ -6,14 +6,14 @@
|
||||
|
||||
package docspell.common
|
||||
|
||||
import java.nio.file.Path
|
||||
import java.nio.file.{Path => JPath}
|
||||
|
||||
import cats.FlatMap
|
||||
import cats.Monad
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import fs2.Stream
|
||||
import fs2.io.file.Files
|
||||
import fs2.io.file.{Files, Flags, Path}
|
||||
|
||||
import docspell.common.syntax.all._
|
||||
|
||||
@ -21,7 +21,9 @@ import io.circe.Decoder
|
||||
|
||||
object File {
|
||||
|
||||
def mkDir[F[_]: Files](dir: Path): F[Path] =
|
||||
def path(jp: JPath): Path = Path.fromNioPath(jp)
|
||||
|
||||
def mkDir[F[_]: Files](dir: Path): F[Unit] =
|
||||
Files[F].createDirectories(dir)
|
||||
|
||||
def exists[F[_]: Files](file: Path): F[Boolean] =
|
||||
@ -37,31 +39,36 @@ object File {
|
||||
for {
|
||||
isDir <- Files[F].isDirectory(path)
|
||||
_ <-
|
||||
if (isDir) Files[F].deleteDirectoryRecursively(path)
|
||||
if (isDir) Files[F].deleteRecursively(path)
|
||||
else Files[F].deleteIfExists(path)
|
||||
} yield ()
|
||||
|
||||
def withTempDir[F[_]: Files](parent: Path, prefix: String): Resource[F, Path] =
|
||||
Resource
|
||||
.eval(mkDir[F](parent))
|
||||
.flatMap(_ => Files[F].tempDirectory(parent.some, prefix))
|
||||
.flatMap(_ => Files[F].tempDirectory(parent.some, prefix, None))
|
||||
|
||||
def listFiles[F[_]: Files](pred: Path => Boolean, dir: Path): Stream[F, Path] =
|
||||
Files[F].directoryStream(dir, pred)
|
||||
Files[F].list(dir).filter(pred)
|
||||
|
||||
def readAll[F[_]: Files](
|
||||
file: Path,
|
||||
chunkSize: Int
|
||||
): Stream[F, Byte] =
|
||||
Files[F].readAll(file, chunkSize)
|
||||
Files[F].readAll(file, chunkSize, Flags.Read)
|
||||
|
||||
def readAll[F[_]: Files](
|
||||
file: Path
|
||||
): Stream[F, Byte] =
|
||||
Files[F].readAll(file)
|
||||
|
||||
def readText[F[_]: Files: Concurrent](file: Path): F[String] =
|
||||
readAll[F](file, 8192).through(fs2.text.utf8Decode).compile.foldMonoid
|
||||
readAll[F](file, 8192).through(fs2.text.utf8.decode).compile.foldMonoid
|
||||
|
||||
def writeString[F[_]: Files: Concurrent](file: Path, content: String): F[Path] =
|
||||
Stream
|
||||
.emit(content)
|
||||
.through(fs2.text.utf8Encode)
|
||||
.through(fs2.text.utf8.encode)
|
||||
.through(Files[F].writeAll(file))
|
||||
.compile
|
||||
.drain
|
||||
|
@ -82,7 +82,7 @@ case class LenientUri(
|
||||
)
|
||||
|
||||
def readText[F[_]: Sync](chunkSize: Int): F[String] =
|
||||
readURL[F](chunkSize).through(fs2.text.utf8Decode).compile.foldMonoid
|
||||
readURL[F](chunkSize).through(fs2.text.utf8.decode).compile.foldMonoid
|
||||
|
||||
def host: Option[String] =
|
||||
authority.map(a =>
|
||||
|
@ -8,13 +8,13 @@ package docspell.common
|
||||
|
||||
import java.io.InputStream
|
||||
import java.lang.ProcessBuilder.Redirect
|
||||
import java.nio.file.Path
|
||||
import java.util.concurrent.TimeUnit
|
||||
|
||||
import scala.jdk.CollectionConverters._
|
||||
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import fs2.io.file.Path
|
||||
import fs2.{Stream, io, text}
|
||||
|
||||
object SystemCommand {
|
||||
@ -102,7 +102,7 @@ object SystemCommand {
|
||||
.redirectError(Redirect.PIPE)
|
||||
.redirectOutput(Redirect.PIPE)
|
||||
|
||||
wd.map(_.toFile).foreach(pb.directory)
|
||||
wd.map(_.toNioPath.toFile).foreach(pb.directory)
|
||||
pb.start()
|
||||
}
|
||||
)
|
||||
@ -115,7 +115,7 @@ object SystemCommand {
|
||||
|
||||
private def inputStreamToString[F[_]: Sync](in: InputStream): F[String] =
|
||||
io.readInputStream(Sync[F].pure(in), 16 * 1024, closeAfterUse = false)
|
||||
.through(text.utf8Decode)
|
||||
.through(text.utf8.decode)
|
||||
.chunks
|
||||
.map(_.toVector.mkString)
|
||||
.fold1(_ + _)
|
||||
|
@ -6,8 +6,12 @@
|
||||
|
||||
package docspell.common.config
|
||||
|
||||
import java.nio.file.{Path => JPath}
|
||||
|
||||
import scala.reflect.ClassTag
|
||||
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common._
|
||||
|
||||
import com.github.eikek.calev.CalEvent
|
||||
@ -16,6 +20,10 @@ import pureconfig.error.{CannotConvert, FailureReason}
|
||||
import scodec.bits.ByteVector
|
||||
|
||||
object Implicits {
|
||||
|
||||
implicit val pathReader: ConfigReader[Path] =
|
||||
ConfigReader[JPath].map(Path.fromNioPath)
|
||||
|
||||
implicit val lenientUriReader: ConfigReader[LenientUri] =
|
||||
ConfigReader[String].emap(reason(LenientUri.parse))
|
||||
|
||||
|
@ -6,20 +6,17 @@
|
||||
|
||||
package docspell.common.syntax
|
||||
|
||||
import java.nio.file.Path
|
||||
import fs2.io.file.Path
|
||||
|
||||
trait FileSyntax {
|
||||
|
||||
implicit final class PathOps(p: Path) {
|
||||
|
||||
def absolutePath: Path =
|
||||
p.normalize().toAbsolutePath
|
||||
p.absolute
|
||||
|
||||
def absolutePathAsString: String =
|
||||
absolutePath.toString
|
||||
|
||||
def /(next: String): Path =
|
||||
p.resolve(next)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,11 +6,9 @@
|
||||
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import fs2.io.file.Files
|
||||
import fs2.io.file.{Files, Path}
|
||||
import fs2.{Pipe, Stream}
|
||||
|
||||
import docspell.common._
|
||||
@ -30,8 +28,8 @@ private[extern] object ExternConv {
|
||||
Stream
|
||||
.resource(File.withTempDir[F](wd, s"docspell-$name"))
|
||||
.flatMap { dir =>
|
||||
val inFile = dir.resolve("infile").toAbsolutePath.normalize
|
||||
val out = dir.resolve("out.pdf").toAbsolutePath.normalize
|
||||
val inFile = dir.resolve("infile").absolute.normalize
|
||||
val out = dir.resolve("out.pdf").absolute.normalize
|
||||
val sysCfg =
|
||||
cmdCfg.replace(
|
||||
Map(
|
||||
@ -77,7 +75,7 @@ private[extern] object ExternConv {
|
||||
)(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] =
|
||||
File.existsNonEmpty[F](out).flatMap {
|
||||
case true if result.rc == 0 =>
|
||||
val outTxt = out.resolveSibling(out.getFileName.toString + ".txt")
|
||||
val outTxt = out.resolveSibling(out.fileName.toString + ".txt")
|
||||
File.existsNonEmpty[F](outTxt).flatMap {
|
||||
case true =>
|
||||
successPdfTxt(
|
||||
|
@ -6,10 +6,9 @@
|
||||
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import fs2.Stream
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common._
|
||||
import docspell.convert.ConversionResult
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common.SystemCommand
|
||||
|
||||
|
@ -6,10 +6,9 @@
|
||||
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import fs2.Stream
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common._
|
||||
import docspell.convert.ConversionResult
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common.SystemCommand
|
||||
|
||||
|
@ -6,10 +6,9 @@
|
||||
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import fs2.Stream
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common._
|
||||
import docspell.convert.ConversionResult
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common.SystemCommand
|
||||
|
||||
|
@ -7,10 +7,10 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.charset.Charset
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import fs2.io.file.Path
|
||||
import fs2.{Chunk, Stream}
|
||||
|
||||
import docspell.common._
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common.SystemCommand
|
||||
|
||||
|
@ -26,7 +26,7 @@ import munit._
|
||||
class ConversionTest extends FunSuite with FileChecks {
|
||||
|
||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||
val target = Paths.get("target")
|
||||
val target = File.path(Paths.get("target"))
|
||||
|
||||
val convertConfig = ConvertConfig(
|
||||
8192,
|
||||
|
@ -7,14 +7,15 @@
|
||||
package docspell.convert
|
||||
|
||||
import java.nio.charset.StandardCharsets
|
||||
import java.nio.file.{Files, Path}
|
||||
import java.nio.file.Files
|
||||
|
||||
import cats.data.Kleisli
|
||||
import cats.effect.IO
|
||||
import cats.effect.unsafe.implicits.global
|
||||
import fs2.io.file.Path
|
||||
import fs2.{Pipe, Stream}
|
||||
|
||||
import docspell.common.MimeType
|
||||
import docspell.common._
|
||||
import docspell.convert.ConversionResult.Handler
|
||||
import docspell.files.TikaMimetype
|
||||
|
||||
@ -23,7 +24,7 @@ trait FileChecks {
|
||||
implicit class FileCheckOps(p: Path) {
|
||||
|
||||
def isNonEmpty: Boolean =
|
||||
Files.exists(p) && Files.size(p) > 0
|
||||
Files.exists(p.toNioPath) && Files.size(p.toNioPath) > 0
|
||||
|
||||
def isType(mime: MimeType): Boolean =
|
||||
TikaMimetype.detect[IO](p).map(_ == mime).unsafeRunSync()
|
||||
@ -36,7 +37,12 @@ trait FileChecks {
|
||||
}
|
||||
|
||||
def storeFile(file: Path): Pipe[IO, Byte, Path] =
|
||||
in => Stream.eval(in.compile.to(Array).flatMap(bytes => IO(Files.write(file, bytes))))
|
||||
in =>
|
||||
Stream
|
||||
.eval(
|
||||
in.compile.to(Array).flatMap(bytes => IO(Files.write(file.toNioPath, bytes)))
|
||||
)
|
||||
.map(p => File.path(p))
|
||||
|
||||
def storePdfHandler(file: Path): Handler[IO, Path] =
|
||||
storePdfTxtHandler(file, file.resolveSibling("unexpected.txt")).map(_._1)
|
||||
@ -47,8 +53,8 @@ trait FileChecks {
|
||||
for {
|
||||
pout <- pdf.through(storeFile(filePdf)).compile.lastOrError
|
||||
str <- txt
|
||||
tout <- IO(Files.write(fileTxt, str.getBytes(StandardCharsets.UTF_8)))
|
||||
} yield (pout, tout)
|
||||
tout <- IO(Files.write(fileTxt.toNioPath, str.getBytes(StandardCharsets.UTF_8)))
|
||||
} yield (pout, File.path(tout))
|
||||
|
||||
case ConversionResult.SuccessPdf(pdf) =>
|
||||
pdf.through(storeFile(filePdf)).compile.lastOrError.map(p => (p, fileTxt))
|
||||
|
@ -7,10 +7,11 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.charset.StandardCharsets
|
||||
import java.nio.file.{Path, Paths}
|
||||
import java.nio.file.Paths
|
||||
|
||||
import cats.effect._
|
||||
import cats.effect.unsafe.implicits.global
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common._
|
||||
import docspell.convert._
|
||||
@ -21,7 +22,7 @@ import munit._
|
||||
class ExternConvTest extends FunSuite with FileChecks {
|
||||
val utf8 = StandardCharsets.UTF_8
|
||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||
val target = Paths.get("target")
|
||||
val target = File.path(Paths.get("target"))
|
||||
|
||||
test("convert html to pdf") {
|
||||
val cfg = SystemCommand.Config(
|
||||
|
@ -6,10 +6,9 @@
|
||||
|
||||
package docspell.extract.ocr
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import fs2.Stream
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common._
|
||||
|
||||
@ -100,7 +99,7 @@ object Ocr {
|
||||
): Stream[F, Path] = {
|
||||
val cmd = ghostscript.replace(
|
||||
Map(
|
||||
"{{infile}}" -> pdf.toAbsolutePath.toString,
|
||||
"{{infile}}" -> pdf.absolute.toString,
|
||||
"{{outfile}}" -> "%d.tif"
|
||||
)
|
||||
)
|
||||
@ -110,7 +109,7 @@ object Ocr {
|
||||
}
|
||||
|
||||
private def pathEndsWith(ext: String): Path => Boolean =
|
||||
p => p.getFileName.toString.endsWith(ext)
|
||||
p => p.fileName.toString.endsWith(ext)
|
||||
|
||||
/** Run unpaper to optimize the image for ocr. The
|
||||
* files are stored to a temporary location on disk and returned.
|
||||
@ -118,18 +117,18 @@ object Ocr {
|
||||
private[extract] def runUnpaperFile[F[_]: Async](
|
||||
img: Path,
|
||||
unpaper: SystemCommand.Config,
|
||||
wd: Path,
|
||||
wd: Option[Path],
|
||||
logger: Logger[F]
|
||||
): Stream[F, Path] = {
|
||||
val targetFile = img.resolveSibling("u-" + img.getFileName.toString).toAbsolutePath
|
||||
val targetFile = img.resolveSibling("u-" + img.fileName.toString).absolute
|
||||
val cmd = unpaper.replace(
|
||||
Map(
|
||||
"{{infile}}" -> img.toAbsolutePath.toString,
|
||||
"{{infile}}" -> img.absolute.toString,
|
||||
"{{outfile}}" -> targetFile.toString
|
||||
)
|
||||
)
|
||||
SystemCommand
|
||||
.execSuccess[F](cmd, logger, wd = Some(wd))
|
||||
.execSuccess[F](cmd, logger, wd = wd)
|
||||
.map(_ => targetFile)
|
||||
.handleErrorWith { th =>
|
||||
logger
|
||||
@ -151,13 +150,13 @@ object Ocr {
|
||||
): Stream[F, String] =
|
||||
// tesseract cannot cope with absolute filenames
|
||||
// so use the parent as working dir
|
||||
runUnpaperFile(img, config.unpaper.command, img.getParent, logger).flatMap { uimg =>
|
||||
runUnpaperFile(img, config.unpaper.command, img.parent, logger).flatMap { uimg =>
|
||||
val cmd = config.tesseract.command
|
||||
.replace(
|
||||
Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang))
|
||||
Map("{{file}}" -> uimg.fileName.toString, "{{lang}}" -> fixLanguage(lang))
|
||||
)
|
||||
SystemCommand
|
||||
.execSuccess[F](cmd, logger, wd = Some(uimg.getParent))
|
||||
.execSuccess[F](cmd, logger, wd = uimg.parent)
|
||||
.map(_.stdout)
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,9 @@
|
||||
|
||||
package docspell.extract.ocr
|
||||
|
||||
import java.nio.file.{Path, Paths}
|
||||
import java.nio.file.Paths
|
||||
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common._
|
||||
|
||||
@ -44,7 +46,9 @@ object OcrConfig {
|
||||
),
|
||||
Duration.seconds(30)
|
||||
),
|
||||
Paths.get(System.getProperty("java.io.tmpdir")).resolve("docspell-extraction")
|
||||
File.path(
|
||||
Paths.get(System.getProperty("java.io.tmpdir")).resolve("docspell-extraction")
|
||||
)
|
||||
),
|
||||
unpaper = Unpaper(
|
||||
SystemCommand
|
||||
|
@ -7,13 +7,13 @@
|
||||
package docspell.extract.pdfbox
|
||||
|
||||
import java.io.InputStream
|
||||
import java.nio.file.Path
|
||||
|
||||
import scala.util.{Try, Using}
|
||||
|
||||
import cats.effect.Sync
|
||||
import cats.implicits._
|
||||
import fs2.Stream
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common.Timestamp
|
||||
import docspell.extract.internal.Text
|
||||
@ -48,7 +48,7 @@ object PdfboxExtract {
|
||||
Using(PDDocument.load(is))(readText).toEither.flatten
|
||||
|
||||
def getText(inFile: Path): Either[Throwable, Text] =
|
||||
Using(PDDocument.load(inFile.toFile))(readText).toEither.flatten
|
||||
Using(PDDocument.load(inFile.toNioPath.toFile))(readText).toEither.flatten
|
||||
|
||||
private def readText(doc: PDDocument): Either[Throwable, Text] =
|
||||
Try {
|
||||
@ -68,7 +68,7 @@ object PdfboxExtract {
|
||||
Using(PDDocument.load(is))(readMetaData).toEither.flatten
|
||||
|
||||
def getMetaData(inFile: Path): Either[Throwable, PdfMetaData] =
|
||||
Using(PDDocument.load(inFile.toFile))(readMetaData).toEither.flatten
|
||||
Using(PDDocument.load(inFile.toNioPath.toFile))(readMetaData).toEither.flatten
|
||||
|
||||
private def readMetaData(doc: PDDocument): Either[Throwable, PdfMetaData] =
|
||||
Try {
|
||||
|
@ -6,12 +6,11 @@
|
||||
|
||||
package docspell.extract.pdfbox
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import cats.effect.unsafe.implicits.global
|
||||
import fs2.Stream
|
||||
import fs2.io.file.Files
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.files.ExampleFiles
|
||||
|
||||
|
@ -7,7 +7,6 @@
|
||||
package docspell.files
|
||||
|
||||
import java.io.{ByteArrayInputStream, InputStream}
|
||||
import java.nio.file.Path
|
||||
import javax.imageio.stream.{FileImageInputStream, ImageInputStream}
|
||||
import javax.imageio.{ImageIO, ImageReader}
|
||||
|
||||
@ -17,6 +16,7 @@ import scala.util.{Try, Using}
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import fs2.Stream
|
||||
import fs2.io.file.Path
|
||||
|
||||
object ImageSize {
|
||||
|
||||
@ -24,7 +24,7 @@ object ImageSize {
|
||||
* the whole image into memory.
|
||||
*/
|
||||
def get(file: Path): Option[Dimension] =
|
||||
Using(new FileImageInputStream(file.toFile))(getDimension).toOption.flatten
|
||||
Using(new FileImageInputStream(file.toNioPath.toFile))(getDimension).toOption.flatten
|
||||
|
||||
/** Return the image size from its header without reading
|
||||
* the whole image into memory.
|
||||
|
@ -8,7 +8,7 @@ package docspell.files
|
||||
|
||||
import java.io.BufferedInputStream
|
||||
import java.nio.charset.Charset
|
||||
import java.nio.file.{Files, Path}
|
||||
import java.nio.file.Files
|
||||
|
||||
import scala.jdk.CollectionConverters._
|
||||
import scala.util.Using
|
||||
@ -16,6 +16,7 @@ import scala.util.Using
|
||||
import cats.effect.Sync
|
||||
import cats.implicits._
|
||||
import fs2.Stream
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common._
|
||||
|
||||
@ -100,8 +101,8 @@ object TikaMimetype {
|
||||
|
||||
def detect[F[_]: Sync](file: Path): F[MimeType] =
|
||||
Sync[F].delay {
|
||||
val hint = MimeTypeHint.filename(file.getFileName.toString)
|
||||
Using(new BufferedInputStream(Files.newInputStream(file), 64)) { in =>
|
||||
val hint = MimeTypeHint.filename(file.fileName.toString)
|
||||
Using(new BufferedInputStream(Files.newInputStream(file.toNioPath), 64)) { in =>
|
||||
convert(tika.detect(in, makeMetadata(hint)))
|
||||
}.toEither
|
||||
}.rethrow
|
||||
|
@ -6,9 +6,8 @@
|
||||
|
||||
package docspell.joex
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.analysis.TextAnalysisConfig
|
||||
import docspell.analysis.classifier.TextClassifierConfig
|
||||
|
@ -6,10 +6,9 @@
|
||||
|
||||
package docspell.joex.analysis
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.analysis.split.TextSplitter
|
||||
import docspell.common._
|
||||
|
@ -6,11 +6,10 @@
|
||||
|
||||
package docspell.joex.analysis
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import cats.effect.std.Semaphore
|
||||
import cats.implicits._
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common._
|
||||
import docspell.common.syntax.all._
|
||||
@ -112,8 +111,11 @@ object RegexNerFile {
|
||||
writer.permit.use(_ =>
|
||||
for {
|
||||
file <- Sync[F].pure(nf.jsonFilePath(cfg.directory))
|
||||
_ <- File.mkDir(file.getParent)
|
||||
_ <- File.writeString(file, nf.copy(creation = now).asJson.spaces2)
|
||||
_ <- file.parent match {
|
||||
case Some(p) => File.mkDir(p)
|
||||
case None => ().pure[F]
|
||||
}
|
||||
_ <- File.writeString(file, nf.copy(creation = now).asJson.spaces2)
|
||||
} yield ()
|
||||
)
|
||||
|
||||
@ -129,7 +131,10 @@ object RegexNerFile {
|
||||
_ <- logger.fdebug(
|
||||
s"Writing custom NER file for collective '${collective.id}'"
|
||||
)
|
||||
_ <- File.mkDir(jsonFile.getParent)
|
||||
_ <- jsonFile.parent match {
|
||||
case Some(p) => File.mkDir(p)
|
||||
case None => ().pure[F]
|
||||
}
|
||||
_ <- File.writeString(nf.nerFilePath(cfg.directory), text)
|
||||
_ <- File.writeString(jsonFile, nf.asJson.spaces2)
|
||||
} yield ()
|
||||
|
@ -6,12 +6,11 @@
|
||||
|
||||
package docspell.joex.learn
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.data.OptionT
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import fs2.io.file.Files
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.analysis.classifier.{ClassifierModel, TextClassifier}
|
||||
import docspell.common._
|
||||
|
@ -42,7 +42,7 @@ object StoreClassifierModel {
|
||||
RClassifierModel.findByName(collective, modelName.name).map(_.map(_.fileId))
|
||||
)
|
||||
_ <- logger.debug(s"Storing new trained model for: ${modelName.name}")
|
||||
fileData = Files[F].readAll(trainedModel.model, 4096)
|
||||
fileData = Files[F].readAll(trainedModel.model)
|
||||
newFile <-
|
||||
store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError
|
||||
_ <- store.transact(
|
||||
|
@ -309,7 +309,7 @@ trait Conversions {
|
||||
): F[UploadData[F]] = {
|
||||
def parseMeta(body: Stream[F, Byte]): F[ItemUploadMeta] =
|
||||
body
|
||||
.through(fs2.text.utf8Decode)
|
||||
.through(fs2.text.utf8.decode)
|
||||
.parseJsonAs[ItemUploadMeta]
|
||||
.map(
|
||||
_.fold(
|
||||
|
@ -9,7 +9,7 @@ package docspell.restserver.http4s
|
||||
import cats.data.NonEmptyList
|
||||
import cats.data.OptionT
|
||||
import cats.effect.Sync
|
||||
import fs2.text.utf8Encode
|
||||
import fs2.text.utf8
|
||||
import fs2.{Pure, Stream}
|
||||
|
||||
import org.http4s._
|
||||
@ -20,14 +20,14 @@ object Responses {
|
||||
private[this] val pureForbidden: Response[Pure] =
|
||||
Response(
|
||||
Status.Forbidden,
|
||||
body = Stream("Forbidden").through(utf8Encode),
|
||||
body = Stream("Forbidden").through(utf8.encode),
|
||||
headers = Headers(`Content-Type`(MediaType.text.plain, Charset.`UTF-8`) :: Nil)
|
||||
)
|
||||
|
||||
private[this] val pureUnauthorized: Response[Pure] =
|
||||
Response(
|
||||
Status.Unauthorized,
|
||||
body = Stream("Unauthorized").through(utf8Encode),
|
||||
body = Stream("Unauthorized").through(utf8.encode),
|
||||
headers = Headers(`Content-Type`(MediaType.text.plain, Charset.`UTF-8`) :: Nil)
|
||||
)
|
||||
|
||||
|
@ -92,7 +92,7 @@ object TemplateRoutes {
|
||||
Stream
|
||||
.bracket(Sync[F].delay(url.openStream))(in => Sync[F].delay(in.close()))
|
||||
.flatMap(in => fs2.io.readInputStream(in.pure[F], 64 * 1024, false))
|
||||
.through(text.utf8Decode)
|
||||
.through(text.utf8.decode)
|
||||
.compile
|
||||
.fold("")(_ + _)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user