mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-05 10:59:33 +00:00
Merge pull request #988 from scala-steward/update/fs2-core-3.1.0
Update fs2-core, fs2-io to 3.1.0
This commit is contained in:
commit
14e99c7b16
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
package docspell.analysis
|
package docspell.analysis
|
||||||
|
|
||||||
import java.nio.file.Path
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
|
||||||
|
@ -6,6 +6,6 @@
|
|||||||
|
|
||||||
package docspell.analysis.classifier
|
package docspell.analysis.classifier
|
||||||
|
|
||||||
import java.nio.file.Path
|
import fs2.io.file.Path
|
||||||
|
|
||||||
case class ClassifierModel(model: Path)
|
case class ClassifierModel(model: Path)
|
||||||
|
@ -6,13 +6,11 @@
|
|||||||
|
|
||||||
package docspell.analysis.classifier
|
package docspell.analysis.classifier
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.effect.Ref
|
import cats.effect.Ref
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
import fs2.io.file.Files
|
import fs2.io.file.{Files, Path}
|
||||||
|
|
||||||
import docspell.analysis.classifier
|
import docspell.analysis.classifier
|
||||||
import docspell.analysis.classifier.TextClassifier._
|
import docspell.analysis.classifier.TextClassifier._
|
||||||
@ -51,7 +49,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
|
|||||||
case Some(text) =>
|
case Some(text) =>
|
||||||
Sync[F].delay {
|
Sync[F].delay {
|
||||||
val cls = ColumnDataClassifier.getClassifier(
|
val cls = ColumnDataClassifier.getClassifier(
|
||||||
model.model.normalize().toAbsolutePath.toString
|
model.model.normalize.absolute.toString
|
||||||
)
|
)
|
||||||
val cat = cls.classOf(cls.makeDatumFromLine("\t\t" + normalisedText(text)))
|
val cat = cls.classOf(cls.makeDatumFromLine("\t\t" + normalisedText(text)))
|
||||||
Option(cat)
|
Option(cat)
|
||||||
@ -71,8 +69,8 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
|
|||||||
_ <- logger.debug(s"Training classifier from $props")
|
_ <- logger.debug(s"Training classifier from $props")
|
||||||
res <- Sync[F].delay {
|
res <- Sync[F].delay {
|
||||||
val cdc = new ColumnDataClassifier(Properties.fromMap(amendProps(in, props)))
|
val cdc = new ColumnDataClassifier(Properties.fromMap(amendProps(in, props)))
|
||||||
cdc.trainClassifier(in.train.toString())
|
cdc.trainClassifier(in.train.toString)
|
||||||
val score = cdc.testClassifier(in.test.toString())
|
val score = cdc.testClassifier(in.test.toString)
|
||||||
TrainResult(score.first(), classifier.ClassifierModel(in.modelFile))
|
TrainResult(score.first(), classifier.ClassifierModel(in.modelFile))
|
||||||
}
|
}
|
||||||
_ <- logger.debug(s"Trained with result $res")
|
_ <- logger.debug(s"Trained with result $res")
|
||||||
@ -88,7 +86,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
|
|||||||
val fileLines =
|
val fileLines =
|
||||||
File
|
File
|
||||||
.readAll[F](in.file, 4096)
|
.readAll[F](in.file, 4096)
|
||||||
.through(fs2.text.utf8Decode)
|
.through(fs2.text.utf8.decode)
|
||||||
.through(fs2.text.lines)
|
.through(fs2.text.lines)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
@ -99,7 +97,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
|
|||||||
fileLines
|
fileLines
|
||||||
.take(nTest)
|
.take(nTest)
|
||||||
.intersperse("\n")
|
.intersperse("\n")
|
||||||
.through(fs2.text.utf8Encode)
|
.through(fs2.text.utf8.encode)
|
||||||
.through(Files[F].writeAll(td.test))
|
.through(Files[F].writeAll(td.test))
|
||||||
.compile
|
.compile
|
||||||
.drain
|
.drain
|
||||||
@ -107,7 +105,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
|
|||||||
fileLines
|
fileLines
|
||||||
.drop(nTest)
|
.drop(nTest)
|
||||||
.intersperse("\n")
|
.intersperse("\n")
|
||||||
.through(fs2.text.utf8Encode)
|
.through(fs2.text.utf8.encode)
|
||||||
.through(Files[F].writeAll(td.train))
|
.through(Files[F].writeAll(td.train))
|
||||||
.compile
|
.compile
|
||||||
.drain
|
.drain
|
||||||
@ -124,7 +122,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
|
|||||||
.map(d => s"${d.cls}\t${fixRef(d.ref)}\t${normalisedText(d.text)}")
|
.map(d => s"${d.cls}\t${fixRef(d.ref)}\t${normalisedText(d.text)}")
|
||||||
.evalTap(_ => counter.update(_ + 1))
|
.evalTap(_ => counter.update(_ + 1))
|
||||||
.intersperse("\r\n")
|
.intersperse("\r\n")
|
||||||
.through(fs2.text.utf8Encode)
|
.through(fs2.text.utf8.encode)
|
||||||
.through(Files[F].writeAll(target))
|
.through(Files[F].writeAll(target))
|
||||||
.compile
|
.compile
|
||||||
.drain
|
.drain
|
||||||
|
@ -6,9 +6,8 @@
|
|||||||
|
|
||||||
package docspell.analysis.classifier
|
package docspell.analysis.classifier
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.data.NonEmptyList
|
import cats.data.NonEmptyList
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
case class TextClassifierConfig(
|
case class TextClassifierConfig(
|
||||||
workingDir: Path,
|
workingDir: Path,
|
||||||
|
@ -6,9 +6,10 @@
|
|||||||
|
|
||||||
package docspell.analysis.nlp
|
package docspell.analysis.nlp
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
import java.util.{Properties => JProps}
|
import java.util.{Properties => JProps}
|
||||||
|
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.analysis.nlp.Properties.Implicits._
|
import docspell.analysis.nlp.Properties.Implicits._
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.common.syntax.FileSyntax._
|
import docspell.common.syntax.FileSyntax._
|
||||||
|
@ -6,11 +6,10 @@
|
|||||||
|
|
||||||
package docspell.analysis.nlp
|
package docspell.analysis.nlp
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import scala.jdk.CollectionConverters._
|
import scala.jdk.CollectionConverters._
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
package docspell.analysis.nlp
|
package docspell.analysis.nlp
|
||||||
|
|
||||||
import java.nio.file.Path
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.analysis.NlpSettings
|
import docspell.analysis.NlpSettings
|
||||||
import docspell.common.Language.NLPLanguage
|
import docspell.common.Language.NLPLanguage
|
||||||
|
@ -24,7 +24,7 @@ class StanfordTextClassifierSuite extends FunSuite {
|
|||||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||||
|
|
||||||
test("learn from data") {
|
test("learn from data") {
|
||||||
val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map()))
|
val cfg = TextClassifierConfig(File.path(Paths.get("target")), NonEmptyList.of(Map()))
|
||||||
|
|
||||||
val data =
|
val data =
|
||||||
Stream
|
Stream
|
||||||
@ -52,8 +52,8 @@ class StanfordTextClassifierSuite extends FunSuite {
|
|||||||
}
|
}
|
||||||
|
|
||||||
test("run classifier") {
|
test("run classifier") {
|
||||||
val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map()))
|
val cfg = TextClassifierConfig(File.path(Paths.get("target")), NonEmptyList.of(Map()))
|
||||||
val things = File.withTempDir[IO](Paths.get("target"), "testcls")
|
val things = File.withTempDir[IO](File.path(Paths.get("target")), "testcls")
|
||||||
|
|
||||||
things
|
things
|
||||||
.use { dir =>
|
.use { dir =>
|
||||||
|
@ -13,7 +13,6 @@ import cats.effect.unsafe.implicits.global
|
|||||||
|
|
||||||
import docspell.analysis.Env
|
import docspell.analysis.Env
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.common.syntax.FileSyntax._
|
|
||||||
import docspell.files.TestFiles
|
import docspell.files.TestFiles
|
||||||
|
|
||||||
import edu.stanford.nlp.pipeline.StanfordCoreNLP
|
import edu.stanford.nlp.pipeline.StanfordCoreNLP
|
||||||
@ -100,7 +99,7 @@ class StanfordNerAnnotatorSuite extends FunSuite {
|
|||||||
|""".stripMargin
|
|""".stripMargin
|
||||||
|
|
||||||
File
|
File
|
||||||
.withTempDir[IO](Paths.get("target"), "test-regex-ner")
|
.withTempDir[IO](File.path(Paths.get("target")), "test-regex-ner")
|
||||||
.use { dir =>
|
.use { dir =>
|
||||||
for {
|
for {
|
||||||
out <- File.writeString[IO](dir / "regex.txt", regexNerContent)
|
out <- File.writeString[IO](dir / "regex.txt", regexNerContent)
|
||||||
|
@ -29,7 +29,7 @@ object Binary {
|
|||||||
Binary[F](
|
Binary[F](
|
||||||
name,
|
name,
|
||||||
MimeType.octetStream,
|
MimeType.octetStream,
|
||||||
Stream.emit(content).through(fs2.text.utf8Encode)
|
Stream.emit(content).through(fs2.text.utf8.encode)
|
||||||
)
|
)
|
||||||
|
|
||||||
def text[F[_]](name: String, content: String): Binary[F] =
|
def text[F[_]](name: String, content: String): Binary[F] =
|
||||||
@ -46,7 +46,7 @@ object Binary {
|
|||||||
|
|
||||||
def decode[F[_]](cs: Charset): Pipe[F, Byte, String] =
|
def decode[F[_]](cs: Charset): Pipe[F, Byte, String] =
|
||||||
if (cs == StandardCharsets.UTF_8)
|
if (cs == StandardCharsets.UTF_8)
|
||||||
fs2.text.utf8Decode
|
fs2.text.utf8.decode
|
||||||
else
|
else
|
||||||
util.decode[F](cs)
|
util.decode[F](cs)
|
||||||
|
|
||||||
|
@ -6,14 +6,14 @@
|
|||||||
|
|
||||||
package docspell.common
|
package docspell.common
|
||||||
|
|
||||||
import java.nio.file.Path
|
import java.nio.file.{Path => JPath}
|
||||||
|
|
||||||
import cats.FlatMap
|
import cats.FlatMap
|
||||||
import cats.Monad
|
import cats.Monad
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
import fs2.io.file.Files
|
import fs2.io.file.{Files, Flags, Path}
|
||||||
|
|
||||||
import docspell.common.syntax.all._
|
import docspell.common.syntax.all._
|
||||||
|
|
||||||
@ -21,7 +21,9 @@ import io.circe.Decoder
|
|||||||
|
|
||||||
object File {
|
object File {
|
||||||
|
|
||||||
def mkDir[F[_]: Files](dir: Path): F[Path] =
|
def path(jp: JPath): Path = Path.fromNioPath(jp)
|
||||||
|
|
||||||
|
def mkDir[F[_]: Files](dir: Path): F[Unit] =
|
||||||
Files[F].createDirectories(dir)
|
Files[F].createDirectories(dir)
|
||||||
|
|
||||||
def exists[F[_]: Files](file: Path): F[Boolean] =
|
def exists[F[_]: Files](file: Path): F[Boolean] =
|
||||||
@ -37,31 +39,36 @@ object File {
|
|||||||
for {
|
for {
|
||||||
isDir <- Files[F].isDirectory(path)
|
isDir <- Files[F].isDirectory(path)
|
||||||
_ <-
|
_ <-
|
||||||
if (isDir) Files[F].deleteDirectoryRecursively(path)
|
if (isDir) Files[F].deleteRecursively(path)
|
||||||
else Files[F].deleteIfExists(path)
|
else Files[F].deleteIfExists(path)
|
||||||
} yield ()
|
} yield ()
|
||||||
|
|
||||||
def withTempDir[F[_]: Files](parent: Path, prefix: String): Resource[F, Path] =
|
def withTempDir[F[_]: Files](parent: Path, prefix: String): Resource[F, Path] =
|
||||||
Resource
|
Resource
|
||||||
.eval(mkDir[F](parent))
|
.eval(mkDir[F](parent))
|
||||||
.flatMap(_ => Files[F].tempDirectory(parent.some, prefix))
|
.flatMap(_ => Files[F].tempDirectory(parent.some, prefix, None))
|
||||||
|
|
||||||
def listFiles[F[_]: Files](pred: Path => Boolean, dir: Path): Stream[F, Path] =
|
def listFiles[F[_]: Files](pred: Path => Boolean, dir: Path): Stream[F, Path] =
|
||||||
Files[F].directoryStream(dir, pred)
|
Files[F].list(dir).filter(pred)
|
||||||
|
|
||||||
def readAll[F[_]: Files](
|
def readAll[F[_]: Files](
|
||||||
file: Path,
|
file: Path,
|
||||||
chunkSize: Int
|
chunkSize: Int
|
||||||
): Stream[F, Byte] =
|
): Stream[F, Byte] =
|
||||||
Files[F].readAll(file, chunkSize)
|
Files[F].readAll(file, chunkSize, Flags.Read)
|
||||||
|
|
||||||
|
def readAll[F[_]: Files](
|
||||||
|
file: Path
|
||||||
|
): Stream[F, Byte] =
|
||||||
|
Files[F].readAll(file)
|
||||||
|
|
||||||
def readText[F[_]: Files: Concurrent](file: Path): F[String] =
|
def readText[F[_]: Files: Concurrent](file: Path): F[String] =
|
||||||
readAll[F](file, 8192).through(fs2.text.utf8Decode).compile.foldMonoid
|
readAll[F](file, 8192).through(fs2.text.utf8.decode).compile.foldMonoid
|
||||||
|
|
||||||
def writeString[F[_]: Files: Concurrent](file: Path, content: String): F[Path] =
|
def writeString[F[_]: Files: Concurrent](file: Path, content: String): F[Path] =
|
||||||
Stream
|
Stream
|
||||||
.emit(content)
|
.emit(content)
|
||||||
.through(fs2.text.utf8Encode)
|
.through(fs2.text.utf8.encode)
|
||||||
.through(Files[F].writeAll(file))
|
.through(Files[F].writeAll(file))
|
||||||
.compile
|
.compile
|
||||||
.drain
|
.drain
|
||||||
|
@ -82,7 +82,7 @@ case class LenientUri(
|
|||||||
)
|
)
|
||||||
|
|
||||||
def readText[F[_]: Sync](chunkSize: Int): F[String] =
|
def readText[F[_]: Sync](chunkSize: Int): F[String] =
|
||||||
readURL[F](chunkSize).through(fs2.text.utf8Decode).compile.foldMonoid
|
readURL[F](chunkSize).through(fs2.text.utf8.decode).compile.foldMonoid
|
||||||
|
|
||||||
def host: Option[String] =
|
def host: Option[String] =
|
||||||
authority.map(a =>
|
authority.map(a =>
|
||||||
|
@ -8,13 +8,13 @@ package docspell.common
|
|||||||
|
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
import java.lang.ProcessBuilder.Redirect
|
import java.lang.ProcessBuilder.Redirect
|
||||||
import java.nio.file.Path
|
|
||||||
import java.util.concurrent.TimeUnit
|
import java.util.concurrent.TimeUnit
|
||||||
|
|
||||||
import scala.jdk.CollectionConverters._
|
import scala.jdk.CollectionConverters._
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
import fs2.io.file.Path
|
||||||
import fs2.{Stream, io, text}
|
import fs2.{Stream, io, text}
|
||||||
|
|
||||||
object SystemCommand {
|
object SystemCommand {
|
||||||
@ -102,7 +102,7 @@ object SystemCommand {
|
|||||||
.redirectError(Redirect.PIPE)
|
.redirectError(Redirect.PIPE)
|
||||||
.redirectOutput(Redirect.PIPE)
|
.redirectOutput(Redirect.PIPE)
|
||||||
|
|
||||||
wd.map(_.toFile).foreach(pb.directory)
|
wd.map(_.toNioPath.toFile).foreach(pb.directory)
|
||||||
pb.start()
|
pb.start()
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@ -115,7 +115,7 @@ object SystemCommand {
|
|||||||
|
|
||||||
private def inputStreamToString[F[_]: Sync](in: InputStream): F[String] =
|
private def inputStreamToString[F[_]: Sync](in: InputStream): F[String] =
|
||||||
io.readInputStream(Sync[F].pure(in), 16 * 1024, closeAfterUse = false)
|
io.readInputStream(Sync[F].pure(in), 16 * 1024, closeAfterUse = false)
|
||||||
.through(text.utf8Decode)
|
.through(text.utf8.decode)
|
||||||
.chunks
|
.chunks
|
||||||
.map(_.toVector.mkString)
|
.map(_.toVector.mkString)
|
||||||
.fold1(_ + _)
|
.fold1(_ + _)
|
||||||
|
@ -6,8 +6,12 @@
|
|||||||
|
|
||||||
package docspell.common.config
|
package docspell.common.config
|
||||||
|
|
||||||
|
import java.nio.file.{Path => JPath}
|
||||||
|
|
||||||
import scala.reflect.ClassTag
|
import scala.reflect.ClassTag
|
||||||
|
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
|
||||||
import com.github.eikek.calev.CalEvent
|
import com.github.eikek.calev.CalEvent
|
||||||
@ -16,6 +20,10 @@ import pureconfig.error.{CannotConvert, FailureReason}
|
|||||||
import scodec.bits.ByteVector
|
import scodec.bits.ByteVector
|
||||||
|
|
||||||
object Implicits {
|
object Implicits {
|
||||||
|
|
||||||
|
implicit val pathReader: ConfigReader[Path] =
|
||||||
|
ConfigReader[JPath].map(Path.fromNioPath)
|
||||||
|
|
||||||
implicit val lenientUriReader: ConfigReader[LenientUri] =
|
implicit val lenientUriReader: ConfigReader[LenientUri] =
|
||||||
ConfigReader[String].emap(reason(LenientUri.parse))
|
ConfigReader[String].emap(reason(LenientUri.parse))
|
||||||
|
|
||||||
|
@ -6,20 +6,17 @@
|
|||||||
|
|
||||||
package docspell.common.syntax
|
package docspell.common.syntax
|
||||||
|
|
||||||
import java.nio.file.Path
|
import fs2.io.file.Path
|
||||||
|
|
||||||
trait FileSyntax {
|
trait FileSyntax {
|
||||||
|
|
||||||
implicit final class PathOps(p: Path) {
|
implicit final class PathOps(p: Path) {
|
||||||
|
|
||||||
def absolutePath: Path =
|
def absolutePath: Path =
|
||||||
p.normalize().toAbsolutePath
|
p.absolute
|
||||||
|
|
||||||
def absolutePathAsString: String =
|
def absolutePathAsString: String =
|
||||||
absolutePath.toString
|
absolutePath.toString
|
||||||
|
|
||||||
def /(next: String): Path =
|
|
||||||
p.resolve(next)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,11 +6,9 @@
|
|||||||
|
|
||||||
package docspell.convert.extern
|
package docspell.convert.extern
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.io.file.Files
|
import fs2.io.file.{Files, Path}
|
||||||
import fs2.{Pipe, Stream}
|
import fs2.{Pipe, Stream}
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
@ -30,8 +28,8 @@ private[extern] object ExternConv {
|
|||||||
Stream
|
Stream
|
||||||
.resource(File.withTempDir[F](wd, s"docspell-$name"))
|
.resource(File.withTempDir[F](wd, s"docspell-$name"))
|
||||||
.flatMap { dir =>
|
.flatMap { dir =>
|
||||||
val inFile = dir.resolve("infile").toAbsolutePath.normalize
|
val inFile = dir.resolve("infile").absolute.normalize
|
||||||
val out = dir.resolve("out.pdf").toAbsolutePath.normalize
|
val out = dir.resolve("out.pdf").absolute.normalize
|
||||||
val sysCfg =
|
val sysCfg =
|
||||||
cmdCfg.replace(
|
cmdCfg.replace(
|
||||||
Map(
|
Map(
|
||||||
@ -77,7 +75,7 @@ private[extern] object ExternConv {
|
|||||||
)(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] =
|
)(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] =
|
||||||
File.existsNonEmpty[F](out).flatMap {
|
File.existsNonEmpty[F](out).flatMap {
|
||||||
case true if result.rc == 0 =>
|
case true if result.rc == 0 =>
|
||||||
val outTxt = out.resolveSibling(out.getFileName.toString + ".txt")
|
val outTxt = out.resolveSibling(out.fileName.toString + ".txt")
|
||||||
File.existsNonEmpty[F](outTxt).flatMap {
|
File.existsNonEmpty[F](outTxt).flatMap {
|
||||||
case true =>
|
case true =>
|
||||||
successPdfTxt(
|
successPdfTxt(
|
||||||
|
@ -6,10 +6,9 @@
|
|||||||
|
|
||||||
package docspell.convert.extern
|
package docspell.convert.extern
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.convert.ConversionResult
|
import docspell.convert.ConversionResult
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
package docspell.convert.extern
|
package docspell.convert.extern
|
||||||
|
|
||||||
import java.nio.file.Path
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common.SystemCommand
|
import docspell.common.SystemCommand
|
||||||
|
|
||||||
|
@ -6,10 +6,9 @@
|
|||||||
|
|
||||||
package docspell.convert.extern
|
package docspell.convert.extern
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.convert.ConversionResult
|
import docspell.convert.ConversionResult
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
package docspell.convert.extern
|
package docspell.convert.extern
|
||||||
|
|
||||||
import java.nio.file.Path
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common.SystemCommand
|
import docspell.common.SystemCommand
|
||||||
|
|
||||||
|
@ -6,10 +6,9 @@
|
|||||||
|
|
||||||
package docspell.convert.extern
|
package docspell.convert.extern
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.convert.ConversionResult
|
import docspell.convert.ConversionResult
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
package docspell.convert.extern
|
package docspell.convert.extern
|
||||||
|
|
||||||
import java.nio.file.Path
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common.SystemCommand
|
import docspell.common.SystemCommand
|
||||||
|
|
||||||
|
@ -7,10 +7,10 @@
|
|||||||
package docspell.convert.extern
|
package docspell.convert.extern
|
||||||
|
|
||||||
import java.nio.charset.Charset
|
import java.nio.charset.Charset
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
import fs2.io.file.Path
|
||||||
import fs2.{Chunk, Stream}
|
import fs2.{Chunk, Stream}
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
package docspell.convert.extern
|
package docspell.convert.extern
|
||||||
|
|
||||||
import java.nio.file.Path
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common.SystemCommand
|
import docspell.common.SystemCommand
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ import munit._
|
|||||||
class ConversionTest extends FunSuite with FileChecks {
|
class ConversionTest extends FunSuite with FileChecks {
|
||||||
|
|
||||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||||
val target = Paths.get("target")
|
val target = File.path(Paths.get("target"))
|
||||||
|
|
||||||
val convertConfig = ConvertConfig(
|
val convertConfig = ConvertConfig(
|
||||||
8192,
|
8192,
|
||||||
|
@ -7,14 +7,15 @@
|
|||||||
package docspell.convert
|
package docspell.convert
|
||||||
|
|
||||||
import java.nio.charset.StandardCharsets
|
import java.nio.charset.StandardCharsets
|
||||||
import java.nio.file.{Files, Path}
|
import java.nio.file.Files
|
||||||
|
|
||||||
import cats.data.Kleisli
|
import cats.data.Kleisli
|
||||||
import cats.effect.IO
|
import cats.effect.IO
|
||||||
import cats.effect.unsafe.implicits.global
|
import cats.effect.unsafe.implicits.global
|
||||||
|
import fs2.io.file.Path
|
||||||
import fs2.{Pipe, Stream}
|
import fs2.{Pipe, Stream}
|
||||||
|
|
||||||
import docspell.common.MimeType
|
import docspell.common._
|
||||||
import docspell.convert.ConversionResult.Handler
|
import docspell.convert.ConversionResult.Handler
|
||||||
import docspell.files.TikaMimetype
|
import docspell.files.TikaMimetype
|
||||||
|
|
||||||
@ -23,7 +24,7 @@ trait FileChecks {
|
|||||||
implicit class FileCheckOps(p: Path) {
|
implicit class FileCheckOps(p: Path) {
|
||||||
|
|
||||||
def isNonEmpty: Boolean =
|
def isNonEmpty: Boolean =
|
||||||
Files.exists(p) && Files.size(p) > 0
|
Files.exists(p.toNioPath) && Files.size(p.toNioPath) > 0
|
||||||
|
|
||||||
def isType(mime: MimeType): Boolean =
|
def isType(mime: MimeType): Boolean =
|
||||||
TikaMimetype.detect[IO](p).map(_ == mime).unsafeRunSync()
|
TikaMimetype.detect[IO](p).map(_ == mime).unsafeRunSync()
|
||||||
@ -36,7 +37,12 @@ trait FileChecks {
|
|||||||
}
|
}
|
||||||
|
|
||||||
def storeFile(file: Path): Pipe[IO, Byte, Path] =
|
def storeFile(file: Path): Pipe[IO, Byte, Path] =
|
||||||
in => Stream.eval(in.compile.to(Array).flatMap(bytes => IO(Files.write(file, bytes))))
|
in =>
|
||||||
|
Stream
|
||||||
|
.eval(
|
||||||
|
in.compile.to(Array).flatMap(bytes => IO(Files.write(file.toNioPath, bytes)))
|
||||||
|
)
|
||||||
|
.map(p => File.path(p))
|
||||||
|
|
||||||
def storePdfHandler(file: Path): Handler[IO, Path] =
|
def storePdfHandler(file: Path): Handler[IO, Path] =
|
||||||
storePdfTxtHandler(file, file.resolveSibling("unexpected.txt")).map(_._1)
|
storePdfTxtHandler(file, file.resolveSibling("unexpected.txt")).map(_._1)
|
||||||
@ -47,8 +53,8 @@ trait FileChecks {
|
|||||||
for {
|
for {
|
||||||
pout <- pdf.through(storeFile(filePdf)).compile.lastOrError
|
pout <- pdf.through(storeFile(filePdf)).compile.lastOrError
|
||||||
str <- txt
|
str <- txt
|
||||||
tout <- IO(Files.write(fileTxt, str.getBytes(StandardCharsets.UTF_8)))
|
tout <- IO(Files.write(fileTxt.toNioPath, str.getBytes(StandardCharsets.UTF_8)))
|
||||||
} yield (pout, tout)
|
} yield (pout, File.path(tout))
|
||||||
|
|
||||||
case ConversionResult.SuccessPdf(pdf) =>
|
case ConversionResult.SuccessPdf(pdf) =>
|
||||||
pdf.through(storeFile(filePdf)).compile.lastOrError.map(p => (p, fileTxt))
|
pdf.through(storeFile(filePdf)).compile.lastOrError.map(p => (p, fileTxt))
|
||||||
|
@ -7,10 +7,11 @@
|
|||||||
package docspell.convert.extern
|
package docspell.convert.extern
|
||||||
|
|
||||||
import java.nio.charset.StandardCharsets
|
import java.nio.charset.StandardCharsets
|
||||||
import java.nio.file.{Path, Paths}
|
import java.nio.file.Paths
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.effect.unsafe.implicits.global
|
import cats.effect.unsafe.implicits.global
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.convert._
|
import docspell.convert._
|
||||||
@ -21,7 +22,7 @@ import munit._
|
|||||||
class ExternConvTest extends FunSuite with FileChecks {
|
class ExternConvTest extends FunSuite with FileChecks {
|
||||||
val utf8 = StandardCharsets.UTF_8
|
val utf8 = StandardCharsets.UTF_8
|
||||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||||
val target = Paths.get("target")
|
val target = File.path(Paths.get("target"))
|
||||||
|
|
||||||
test("convert html to pdf") {
|
test("convert html to pdf") {
|
||||||
val cfg = SystemCommand.Config(
|
val cfg = SystemCommand.Config(
|
||||||
|
@ -6,10 +6,9 @@
|
|||||||
|
|
||||||
package docspell.extract.ocr
|
package docspell.extract.ocr
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
|
||||||
@ -100,7 +99,7 @@ object Ocr {
|
|||||||
): Stream[F, Path] = {
|
): Stream[F, Path] = {
|
||||||
val cmd = ghostscript.replace(
|
val cmd = ghostscript.replace(
|
||||||
Map(
|
Map(
|
||||||
"{{infile}}" -> pdf.toAbsolutePath.toString,
|
"{{infile}}" -> pdf.absolute.toString,
|
||||||
"{{outfile}}" -> "%d.tif"
|
"{{outfile}}" -> "%d.tif"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -110,7 +109,7 @@ object Ocr {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private def pathEndsWith(ext: String): Path => Boolean =
|
private def pathEndsWith(ext: String): Path => Boolean =
|
||||||
p => p.getFileName.toString.endsWith(ext)
|
p => p.fileName.toString.endsWith(ext)
|
||||||
|
|
||||||
/** Run unpaper to optimize the image for ocr. The
|
/** Run unpaper to optimize the image for ocr. The
|
||||||
* files are stored to a temporary location on disk and returned.
|
* files are stored to a temporary location on disk and returned.
|
||||||
@ -118,18 +117,18 @@ object Ocr {
|
|||||||
private[extract] def runUnpaperFile[F[_]: Async](
|
private[extract] def runUnpaperFile[F[_]: Async](
|
||||||
img: Path,
|
img: Path,
|
||||||
unpaper: SystemCommand.Config,
|
unpaper: SystemCommand.Config,
|
||||||
wd: Path,
|
wd: Option[Path],
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
): Stream[F, Path] = {
|
): Stream[F, Path] = {
|
||||||
val targetFile = img.resolveSibling("u-" + img.getFileName.toString).toAbsolutePath
|
val targetFile = img.resolveSibling("u-" + img.fileName.toString).absolute
|
||||||
val cmd = unpaper.replace(
|
val cmd = unpaper.replace(
|
||||||
Map(
|
Map(
|
||||||
"{{infile}}" -> img.toAbsolutePath.toString,
|
"{{infile}}" -> img.absolute.toString,
|
||||||
"{{outfile}}" -> targetFile.toString
|
"{{outfile}}" -> targetFile.toString
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
SystemCommand
|
SystemCommand
|
||||||
.execSuccess[F](cmd, logger, wd = Some(wd))
|
.execSuccess[F](cmd, logger, wd = wd)
|
||||||
.map(_ => targetFile)
|
.map(_ => targetFile)
|
||||||
.handleErrorWith { th =>
|
.handleErrorWith { th =>
|
||||||
logger
|
logger
|
||||||
@ -151,13 +150,13 @@ object Ocr {
|
|||||||
): Stream[F, String] =
|
): Stream[F, String] =
|
||||||
// tesseract cannot cope with absolute filenames
|
// tesseract cannot cope with absolute filenames
|
||||||
// so use the parent as working dir
|
// so use the parent as working dir
|
||||||
runUnpaperFile(img, config.unpaper.command, img.getParent, logger).flatMap { uimg =>
|
runUnpaperFile(img, config.unpaper.command, img.parent, logger).flatMap { uimg =>
|
||||||
val cmd = config.tesseract.command
|
val cmd = config.tesseract.command
|
||||||
.replace(
|
.replace(
|
||||||
Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang))
|
Map("{{file}}" -> uimg.fileName.toString, "{{lang}}" -> fixLanguage(lang))
|
||||||
)
|
)
|
||||||
SystemCommand
|
SystemCommand
|
||||||
.execSuccess[F](cmd, logger, wd = Some(uimg.getParent))
|
.execSuccess[F](cmd, logger, wd = uimg.parent)
|
||||||
.map(_.stdout)
|
.map(_.stdout)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,7 +6,9 @@
|
|||||||
|
|
||||||
package docspell.extract.ocr
|
package docspell.extract.ocr
|
||||||
|
|
||||||
import java.nio.file.{Path, Paths}
|
import java.nio.file.Paths
|
||||||
|
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
|
||||||
@ -44,7 +46,9 @@ object OcrConfig {
|
|||||||
),
|
),
|
||||||
Duration.seconds(30)
|
Duration.seconds(30)
|
||||||
),
|
),
|
||||||
Paths.get(System.getProperty("java.io.tmpdir")).resolve("docspell-extraction")
|
File.path(
|
||||||
|
Paths.get(System.getProperty("java.io.tmpdir")).resolve("docspell-extraction")
|
||||||
|
)
|
||||||
),
|
),
|
||||||
unpaper = Unpaper(
|
unpaper = Unpaper(
|
||||||
SystemCommand
|
SystemCommand
|
||||||
|
@ -7,13 +7,13 @@
|
|||||||
package docspell.extract.pdfbox
|
package docspell.extract.pdfbox
|
||||||
|
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import scala.util.{Try, Using}
|
import scala.util.{Try, Using}
|
||||||
|
|
||||||
import cats.effect.Sync
|
import cats.effect.Sync
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common.Timestamp
|
import docspell.common.Timestamp
|
||||||
import docspell.extract.internal.Text
|
import docspell.extract.internal.Text
|
||||||
@ -48,7 +48,7 @@ object PdfboxExtract {
|
|||||||
Using(PDDocument.load(is))(readText).toEither.flatten
|
Using(PDDocument.load(is))(readText).toEither.flatten
|
||||||
|
|
||||||
def getText(inFile: Path): Either[Throwable, Text] =
|
def getText(inFile: Path): Either[Throwable, Text] =
|
||||||
Using(PDDocument.load(inFile.toFile))(readText).toEither.flatten
|
Using(PDDocument.load(inFile.toNioPath.toFile))(readText).toEither.flatten
|
||||||
|
|
||||||
private def readText(doc: PDDocument): Either[Throwable, Text] =
|
private def readText(doc: PDDocument): Either[Throwable, Text] =
|
||||||
Try {
|
Try {
|
||||||
@ -68,7 +68,7 @@ object PdfboxExtract {
|
|||||||
Using(PDDocument.load(is))(readMetaData).toEither.flatten
|
Using(PDDocument.load(is))(readMetaData).toEither.flatten
|
||||||
|
|
||||||
def getMetaData(inFile: Path): Either[Throwable, PdfMetaData] =
|
def getMetaData(inFile: Path): Either[Throwable, PdfMetaData] =
|
||||||
Using(PDDocument.load(inFile.toFile))(readMetaData).toEither.flatten
|
Using(PDDocument.load(inFile.toNioPath.toFile))(readMetaData).toEither.flatten
|
||||||
|
|
||||||
private def readMetaData(doc: PDDocument): Either[Throwable, PdfMetaData] =
|
private def readMetaData(doc: PDDocument): Either[Throwable, PdfMetaData] =
|
||||||
Try {
|
Try {
|
||||||
|
@ -6,12 +6,11 @@
|
|||||||
|
|
||||||
package docspell.extract.pdfbox
|
package docspell.extract.pdfbox
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.effect.unsafe.implicits.global
|
import cats.effect.unsafe.implicits.global
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
import fs2.io.file.Files
|
import fs2.io.file.Files
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.files.ExampleFiles
|
import docspell.files.ExampleFiles
|
||||||
|
|
||||||
|
@ -7,7 +7,6 @@
|
|||||||
package docspell.files
|
package docspell.files
|
||||||
|
|
||||||
import java.io.{ByteArrayInputStream, InputStream}
|
import java.io.{ByteArrayInputStream, InputStream}
|
||||||
import java.nio.file.Path
|
|
||||||
import javax.imageio.stream.{FileImageInputStream, ImageInputStream}
|
import javax.imageio.stream.{FileImageInputStream, ImageInputStream}
|
||||||
import javax.imageio.{ImageIO, ImageReader}
|
import javax.imageio.{ImageIO, ImageReader}
|
||||||
|
|
||||||
@ -17,6 +16,7 @@ import scala.util.{Try, Using}
|
|||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
object ImageSize {
|
object ImageSize {
|
||||||
|
|
||||||
@ -24,7 +24,7 @@ object ImageSize {
|
|||||||
* the whole image into memory.
|
* the whole image into memory.
|
||||||
*/
|
*/
|
||||||
def get(file: Path): Option[Dimension] =
|
def get(file: Path): Option[Dimension] =
|
||||||
Using(new FileImageInputStream(file.toFile))(getDimension).toOption.flatten
|
Using(new FileImageInputStream(file.toNioPath.toFile))(getDimension).toOption.flatten
|
||||||
|
|
||||||
/** Return the image size from its header without reading
|
/** Return the image size from its header without reading
|
||||||
* the whole image into memory.
|
* the whole image into memory.
|
||||||
|
@ -8,7 +8,7 @@ package docspell.files
|
|||||||
|
|
||||||
import java.io.BufferedInputStream
|
import java.io.BufferedInputStream
|
||||||
import java.nio.charset.Charset
|
import java.nio.charset.Charset
|
||||||
import java.nio.file.{Files, Path}
|
import java.nio.file.Files
|
||||||
|
|
||||||
import scala.jdk.CollectionConverters._
|
import scala.jdk.CollectionConverters._
|
||||||
import scala.util.Using
|
import scala.util.Using
|
||||||
@ -16,6 +16,7 @@ import scala.util.Using
|
|||||||
import cats.effect.Sync
|
import cats.effect.Sync
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
|
||||||
@ -100,8 +101,8 @@ object TikaMimetype {
|
|||||||
|
|
||||||
def detect[F[_]: Sync](file: Path): F[MimeType] =
|
def detect[F[_]: Sync](file: Path): F[MimeType] =
|
||||||
Sync[F].delay {
|
Sync[F].delay {
|
||||||
val hint = MimeTypeHint.filename(file.getFileName.toString)
|
val hint = MimeTypeHint.filename(file.fileName.toString)
|
||||||
Using(new BufferedInputStream(Files.newInputStream(file), 64)) { in =>
|
Using(new BufferedInputStream(Files.newInputStream(file.toNioPath), 64)) { in =>
|
||||||
convert(tika.detect(in, makeMetadata(hint)))
|
convert(tika.detect(in, makeMetadata(hint)))
|
||||||
}.toEither
|
}.toEither
|
||||||
}.rethrow
|
}.rethrow
|
||||||
|
@ -6,9 +6,8 @@
|
|||||||
|
|
||||||
package docspell.joex
|
package docspell.joex
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.data.NonEmptyList
|
import cats.data.NonEmptyList
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.analysis.TextAnalysisConfig
|
import docspell.analysis.TextAnalysisConfig
|
||||||
import docspell.analysis.classifier.TextClassifierConfig
|
import docspell.analysis.classifier.TextClassifierConfig
|
||||||
|
@ -6,10 +6,9 @@
|
|||||||
|
|
||||||
package docspell.joex.analysis
|
package docspell.joex.analysis
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.analysis.split.TextSplitter
|
import docspell.analysis.split.TextSplitter
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
@ -6,11 +6,10 @@
|
|||||||
|
|
||||||
package docspell.joex.analysis
|
package docspell.joex.analysis
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.effect.std.Semaphore
|
import cats.effect.std.Semaphore
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.common.syntax.all._
|
import docspell.common.syntax.all._
|
||||||
@ -112,8 +111,11 @@ object RegexNerFile {
|
|||||||
writer.permit.use(_ =>
|
writer.permit.use(_ =>
|
||||||
for {
|
for {
|
||||||
file <- Sync[F].pure(nf.jsonFilePath(cfg.directory))
|
file <- Sync[F].pure(nf.jsonFilePath(cfg.directory))
|
||||||
_ <- File.mkDir(file.getParent)
|
_ <- file.parent match {
|
||||||
_ <- File.writeString(file, nf.copy(creation = now).asJson.spaces2)
|
case Some(p) => File.mkDir(p)
|
||||||
|
case None => ().pure[F]
|
||||||
|
}
|
||||||
|
_ <- File.writeString(file, nf.copy(creation = now).asJson.spaces2)
|
||||||
} yield ()
|
} yield ()
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -129,7 +131,10 @@ object RegexNerFile {
|
|||||||
_ <- logger.fdebug(
|
_ <- logger.fdebug(
|
||||||
s"Writing custom NER file for collective '${collective.id}'"
|
s"Writing custom NER file for collective '${collective.id}'"
|
||||||
)
|
)
|
||||||
_ <- File.mkDir(jsonFile.getParent)
|
_ <- jsonFile.parent match {
|
||||||
|
case Some(p) => File.mkDir(p)
|
||||||
|
case None => ().pure[F]
|
||||||
|
}
|
||||||
_ <- File.writeString(nf.nerFilePath(cfg.directory), text)
|
_ <- File.writeString(nf.nerFilePath(cfg.directory), text)
|
||||||
_ <- File.writeString(jsonFile, nf.asJson.spaces2)
|
_ <- File.writeString(jsonFile, nf.asJson.spaces2)
|
||||||
} yield ()
|
} yield ()
|
||||||
|
@ -6,12 +6,11 @@
|
|||||||
|
|
||||||
package docspell.joex.learn
|
package docspell.joex.learn
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.data.OptionT
|
import cats.data.OptionT
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.io.file.Files
|
import fs2.io.file.Files
|
||||||
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.analysis.classifier.{ClassifierModel, TextClassifier}
|
import docspell.analysis.classifier.{ClassifierModel, TextClassifier}
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
@ -42,7 +42,7 @@ object StoreClassifierModel {
|
|||||||
RClassifierModel.findByName(collective, modelName.name).map(_.map(_.fileId))
|
RClassifierModel.findByName(collective, modelName.name).map(_.map(_.fileId))
|
||||||
)
|
)
|
||||||
_ <- logger.debug(s"Storing new trained model for: ${modelName.name}")
|
_ <- logger.debug(s"Storing new trained model for: ${modelName.name}")
|
||||||
fileData = Files[F].readAll(trainedModel.model, 4096)
|
fileData = Files[F].readAll(trainedModel.model)
|
||||||
newFile <-
|
newFile <-
|
||||||
store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError
|
store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError
|
||||||
_ <- store.transact(
|
_ <- store.transact(
|
||||||
|
@ -309,7 +309,7 @@ trait Conversions {
|
|||||||
): F[UploadData[F]] = {
|
): F[UploadData[F]] = {
|
||||||
def parseMeta(body: Stream[F, Byte]): F[ItemUploadMeta] =
|
def parseMeta(body: Stream[F, Byte]): F[ItemUploadMeta] =
|
||||||
body
|
body
|
||||||
.through(fs2.text.utf8Decode)
|
.through(fs2.text.utf8.decode)
|
||||||
.parseJsonAs[ItemUploadMeta]
|
.parseJsonAs[ItemUploadMeta]
|
||||||
.map(
|
.map(
|
||||||
_.fold(
|
_.fold(
|
||||||
|
@ -9,7 +9,7 @@ package docspell.restserver.http4s
|
|||||||
import cats.data.NonEmptyList
|
import cats.data.NonEmptyList
|
||||||
import cats.data.OptionT
|
import cats.data.OptionT
|
||||||
import cats.effect.Sync
|
import cats.effect.Sync
|
||||||
import fs2.text.utf8Encode
|
import fs2.text.utf8
|
||||||
import fs2.{Pure, Stream}
|
import fs2.{Pure, Stream}
|
||||||
|
|
||||||
import org.http4s._
|
import org.http4s._
|
||||||
@ -20,14 +20,14 @@ object Responses {
|
|||||||
private[this] val pureForbidden: Response[Pure] =
|
private[this] val pureForbidden: Response[Pure] =
|
||||||
Response(
|
Response(
|
||||||
Status.Forbidden,
|
Status.Forbidden,
|
||||||
body = Stream("Forbidden").through(utf8Encode),
|
body = Stream("Forbidden").through(utf8.encode),
|
||||||
headers = Headers(`Content-Type`(MediaType.text.plain, Charset.`UTF-8`) :: Nil)
|
headers = Headers(`Content-Type`(MediaType.text.plain, Charset.`UTF-8`) :: Nil)
|
||||||
)
|
)
|
||||||
|
|
||||||
private[this] val pureUnauthorized: Response[Pure] =
|
private[this] val pureUnauthorized: Response[Pure] =
|
||||||
Response(
|
Response(
|
||||||
Status.Unauthorized,
|
Status.Unauthorized,
|
||||||
body = Stream("Unauthorized").through(utf8Encode),
|
body = Stream("Unauthorized").through(utf8.encode),
|
||||||
headers = Headers(`Content-Type`(MediaType.text.plain, Charset.`UTF-8`) :: Nil)
|
headers = Headers(`Content-Type`(MediaType.text.plain, Charset.`UTF-8`) :: Nil)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -92,7 +92,7 @@ object TemplateRoutes {
|
|||||||
Stream
|
Stream
|
||||||
.bracket(Sync[F].delay(url.openStream))(in => Sync[F].delay(in.close()))
|
.bracket(Sync[F].delay(url.openStream))(in => Sync[F].delay(in.close()))
|
||||||
.flatMap(in => fs2.io.readInputStream(in.pure[F], 64 * 1024, false))
|
.flatMap(in => fs2.io.readInputStream(in.pure[F], 64 * 1024, false))
|
||||||
.through(text.utf8Decode)
|
.through(text.utf8.decode)
|
||||||
.compile
|
.compile
|
||||||
.fold("")(_ + _)
|
.fold("")(_ + _)
|
||||||
|
|
||||||
|
@ -16,10 +16,10 @@ object Dependencies {
|
|||||||
val EmilVersion = "0.10.0-M2"
|
val EmilVersion = "0.10.0-M2"
|
||||||
val FlexmarkVersion = "0.62.2"
|
val FlexmarkVersion = "0.62.2"
|
||||||
val FlywayVersion = "7.12.1"
|
val FlywayVersion = "7.12.1"
|
||||||
val Fs2Version = "3.0.6"
|
val Fs2Version = "3.1.0"
|
||||||
val Fs2CronVersion = "0.7.1"
|
val Fs2CronVersion = "0.7.1"
|
||||||
val H2Version = "1.4.200"
|
val H2Version = "1.4.200"
|
||||||
val Http4sVersion = "0.23.0"
|
val Http4sVersion = "0.23.1"
|
||||||
val Icu4jVersion = "69.1"
|
val Icu4jVersion = "69.1"
|
||||||
val JsoupVersion = "1.14.1"
|
val JsoupVersion = "1.14.1"
|
||||||
val KindProjectorVersion = "0.10.3"
|
val KindProjectorVersion = "0.10.3"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user