mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-03-28 17:55:06 +00:00
Upgrade code base to CE3
This commit is contained in:
parent
903ec26e54
commit
bd791b4593
@ -1,7 +1,3 @@
|
|||||||
updates.ignore = [
|
updates.ignore = [
|
||||||
{ groupId = "org.apache.poi" },
|
{ groupId = "org.apache.poi" },
|
||||||
]
|
]
|
||||||
|
|
||||||
updates.pin = [
|
|
||||||
{ groupId = "co.fs2", version = "2." }
|
|
||||||
]
|
|
@ -32,10 +32,7 @@ object TextAnalyser {
|
|||||||
labels ++ dates.map(dl => dl.label.copy(label = dl.date.toString))
|
labels ++ dates.map(dl => dl.label.copy(label = dl.date.toString))
|
||||||
}
|
}
|
||||||
|
|
||||||
def create[F[_]: Concurrent: Timer: ContextShift](
|
def create[F[_]: Async](cfg: TextAnalysisConfig): Resource[F, TextAnalyser[F]] =
|
||||||
cfg: TextAnalysisConfig,
|
|
||||||
blocker: Blocker
|
|
||||||
): Resource[F, TextAnalyser[F]] =
|
|
||||||
Resource
|
Resource
|
||||||
.eval(Nlp(cfg.nlpConfig))
|
.eval(Nlp(cfg.nlpConfig))
|
||||||
.map(stanfordNer =>
|
.map(stanfordNer =>
|
||||||
@ -56,7 +53,7 @@ object TextAnalyser {
|
|||||||
} yield Result(spans ++ list, dates)
|
} yield Result(spans ++ list, dates)
|
||||||
|
|
||||||
def classifier: TextClassifier[F] =
|
def classifier: TextClassifier[F] =
|
||||||
new StanfordTextClassifier[F](cfg.classifier, blocker)
|
new StanfordTextClassifier[F](cfg.classifier)
|
||||||
|
|
||||||
private def textLimit(logger: Logger[F], text: String): F[String] =
|
private def textLimit(logger: Logger[F], text: String): F[String] =
|
||||||
if (cfg.maxLength <= 0)
|
if (cfg.maxLength <= 0)
|
||||||
@ -82,7 +79,7 @@ object TextAnalyser {
|
|||||||
|
|
||||||
/** Provides the nlp pipeline based on the configuration. */
|
/** Provides the nlp pipeline based on the configuration. */
|
||||||
private object Nlp {
|
private object Nlp {
|
||||||
def apply[F[_]: Concurrent: Timer](
|
def apply[F[_]: Async](
|
||||||
cfg: TextAnalysisConfig.NlpConfig
|
cfg: TextAnalysisConfig.NlpConfig
|
||||||
): F[Input[F] => F[Vector[NerLabel]]] =
|
): F[Input[F] => F[Vector[NerLabel]]] =
|
||||||
cfg.mode match {
|
cfg.mode match {
|
||||||
@ -104,7 +101,7 @@ object TextAnalyser {
|
|||||||
text: String
|
text: String
|
||||||
)
|
)
|
||||||
|
|
||||||
def annotate[F[_]: BracketThrow](
|
def annotate[F[_]: Async](
|
||||||
cache: PipelineCache[F]
|
cache: PipelineCache[F]
|
||||||
)(input: Input[F]): F[Vector[NerLabel]] =
|
)(input: Input[F]): F[Vector[NerLabel]] =
|
||||||
cache
|
cache
|
||||||
|
@ -2,10 +2,11 @@ package docspell.analysis.classifier
|
|||||||
|
|
||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
|
|
||||||
|
import cats.effect.Ref
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.effect.concurrent.Ref
|
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
import fs2.io.file.Files
|
||||||
|
|
||||||
import docspell.analysis.classifier
|
import docspell.analysis.classifier
|
||||||
import docspell.analysis.classifier.TextClassifier._
|
import docspell.analysis.classifier.TextClassifier._
|
||||||
@ -15,10 +16,8 @@ import docspell.common.syntax.FileSyntax._
|
|||||||
|
|
||||||
import edu.stanford.nlp.classify.ColumnDataClassifier
|
import edu.stanford.nlp.classify.ColumnDataClassifier
|
||||||
|
|
||||||
final class StanfordTextClassifier[F[_]: Sync: ContextShift](
|
final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
|
||||||
cfg: TextClassifierConfig,
|
extends TextClassifier[F] {
|
||||||
blocker: Blocker
|
|
||||||
) extends TextClassifier[F] {
|
|
||||||
|
|
||||||
def trainClassifier[A](
|
def trainClassifier[A](
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
@ -28,7 +27,7 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
|
|||||||
.withTempDir(cfg.workingDir, "trainclassifier")
|
.withTempDir(cfg.workingDir, "trainclassifier")
|
||||||
.use { dir =>
|
.use { dir =>
|
||||||
for {
|
for {
|
||||||
rawData <- writeDataFile(blocker, dir, data)
|
rawData <- writeDataFile(dir, data)
|
||||||
_ <- logger.debug(s"Learning from ${rawData.count} items.")
|
_ <- logger.debug(s"Learning from ${rawData.count} items.")
|
||||||
trainData <- splitData(logger, rawData)
|
trainData <- splitData(logger, rawData)
|
||||||
scores <- cfg.classifierConfigs.traverse(m => train(logger, trainData, m))
|
scores <- cfg.classifierConfigs.traverse(m => train(logger, trainData, m))
|
||||||
@ -81,8 +80,8 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
|
|||||||
TrainData(in.file.resolveSibling("train.txt"), in.file.resolveSibling("test.txt"))
|
TrainData(in.file.resolveSibling("train.txt"), in.file.resolveSibling("test.txt"))
|
||||||
|
|
||||||
val fileLines =
|
val fileLines =
|
||||||
fs2.io.file
|
File
|
||||||
.readAll(in.file, blocker, 4096)
|
.readAll[F](in.file, 4096)
|
||||||
.through(fs2.text.utf8Decode)
|
.through(fs2.text.utf8Decode)
|
||||||
.through(fs2.text.lines)
|
.through(fs2.text.lines)
|
||||||
|
|
||||||
@ -95,7 +94,7 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
|
|||||||
.take(nTest)
|
.take(nTest)
|
||||||
.intersperse("\n")
|
.intersperse("\n")
|
||||||
.through(fs2.text.utf8Encode)
|
.through(fs2.text.utf8Encode)
|
||||||
.through(fs2.io.file.writeAll(td.test, blocker))
|
.through(Files[F].writeAll(td.test))
|
||||||
.compile
|
.compile
|
||||||
.drain
|
.drain
|
||||||
_ <-
|
_ <-
|
||||||
@ -103,13 +102,13 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
|
|||||||
.drop(nTest)
|
.drop(nTest)
|
||||||
.intersperse("\n")
|
.intersperse("\n")
|
||||||
.through(fs2.text.utf8Encode)
|
.through(fs2.text.utf8Encode)
|
||||||
.through(fs2.io.file.writeAll(td.train, blocker))
|
.through(Files[F].writeAll(td.train))
|
||||||
.compile
|
.compile
|
||||||
.drain
|
.drain
|
||||||
} yield td
|
} yield td
|
||||||
}
|
}
|
||||||
|
|
||||||
def writeDataFile(blocker: Blocker, dir: Path, data: Stream[F, Data]): F[RawData] = {
|
def writeDataFile(dir: Path, data: Stream[F, Data]): F[RawData] = {
|
||||||
val target = dir.resolve("rawdata")
|
val target = dir.resolve("rawdata")
|
||||||
for {
|
for {
|
||||||
counter <- Ref.of[F, Long](0L)
|
counter <- Ref.of[F, Long](0L)
|
||||||
@ -120,7 +119,7 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
|
|||||||
.evalTap(_ => counter.update(_ + 1))
|
.evalTap(_ => counter.update(_ + 1))
|
||||||
.intersperse("\r\n")
|
.intersperse("\r\n")
|
||||||
.through(fs2.text.utf8Encode)
|
.through(fs2.text.utf8Encode)
|
||||||
.through(fs2.io.file.writeAll(target, blocker))
|
.through(Files[F].writeAll(target))
|
||||||
.compile
|
.compile
|
||||||
.drain
|
.drain
|
||||||
lines <- counter.get
|
lines <- counter.get
|
||||||
|
@ -19,7 +19,7 @@ object DateFind {
|
|||||||
.splitToken(text, " \t.,\n\r/".toSet)
|
.splitToken(text, " \t.,\n\r/".toSet)
|
||||||
.filter(w => lang != Language.Latvian || w.value != "gada")
|
.filter(w => lang != Language.Latvian || w.value != "gada")
|
||||||
.sliding(3)
|
.sliding(3)
|
||||||
.filter(_.length == 3)
|
.filter(_.size == 3)
|
||||||
.flatMap(q =>
|
.flatMap(q =>
|
||||||
Stream.emits(
|
Stream.emits(
|
||||||
SimpleDate
|
SimpleDate
|
||||||
@ -28,9 +28,9 @@ object DateFind {
|
|||||||
NerDateLabel(
|
NerDateLabel(
|
||||||
sd.toLocalDate,
|
sd.toLocalDate,
|
||||||
NerLabel(
|
NerLabel(
|
||||||
text.substring(q.head.begin, q(2).end),
|
text.substring(q.head.get.begin, q(2).end),
|
||||||
NerTag.Date,
|
NerTag.Date,
|
||||||
q.head.begin,
|
q.head.get.begin,
|
||||||
q(2).end
|
q(2).end
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -2,9 +2,8 @@ package docspell.analysis.nlp
|
|||||||
|
|
||||||
import scala.concurrent.duration.{Duration => _, _}
|
import scala.concurrent.duration.{Duration => _, _}
|
||||||
|
|
||||||
import cats.Applicative
|
import cats.effect.Ref
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.effect.concurrent.Ref
|
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.analysis.NlpSettings
|
import docspell.analysis.NlpSettings
|
||||||
@ -28,7 +27,7 @@ trait PipelineCache[F[_]] {
|
|||||||
object PipelineCache {
|
object PipelineCache {
|
||||||
private[this] val logger = getLogger
|
private[this] val logger = getLogger
|
||||||
|
|
||||||
def apply[F[_]: Concurrent: Timer](clearInterval: Duration)(
|
def apply[F[_]: Async](clearInterval: Duration)(
|
||||||
creator: NlpSettings => Annotator[F],
|
creator: NlpSettings => Annotator[F],
|
||||||
release: F[Unit]
|
release: F[Unit]
|
||||||
): F[PipelineCache[F]] =
|
): F[PipelineCache[F]] =
|
||||||
@ -38,7 +37,7 @@ object PipelineCache {
|
|||||||
_ <- Logger.log4s(logger).info("Creating nlp pipeline cache")
|
_ <- Logger.log4s(logger).info("Creating nlp pipeline cache")
|
||||||
} yield new Impl[F](data, creator, cacheClear)
|
} yield new Impl[F](data, creator, cacheClear)
|
||||||
|
|
||||||
final private class Impl[F[_]: Sync](
|
final private class Impl[F[_]: Async](
|
||||||
data: Ref[F, Map[String, Entry[Annotator[F]]]],
|
data: Ref[F, Map[String, Entry[Annotator[F]]]],
|
||||||
creator: NlpSettings => Annotator[F],
|
creator: NlpSettings => Annotator[F],
|
||||||
cacheClear: CacheClearing[F]
|
cacheClear: CacheClearing[F]
|
||||||
@ -97,20 +96,20 @@ object PipelineCache {
|
|||||||
}
|
}
|
||||||
|
|
||||||
object CacheClearing {
|
object CacheClearing {
|
||||||
def none[F[_]: Applicative]: CacheClearing[F] =
|
def none[F[_]]: CacheClearing[F] =
|
||||||
new CacheClearing[F] {
|
new CacheClearing[F] {
|
||||||
def withCache: Resource[F, Unit] =
|
def withCache: Resource[F, Unit] =
|
||||||
Resource.pure[F, Unit](())
|
Resource.pure[F, Unit](())
|
||||||
}
|
}
|
||||||
|
|
||||||
def create[F[_]: Concurrent: Timer, A](
|
def create[F[_]: Async, A](
|
||||||
data: Ref[F, Map[String, Entry[A]]],
|
data: Ref[F, Map[String, Entry[A]]],
|
||||||
interval: Duration,
|
interval: Duration,
|
||||||
release: F[Unit]
|
release: F[Unit]
|
||||||
): F[CacheClearing[F]] =
|
): F[CacheClearing[F]] =
|
||||||
for {
|
for {
|
||||||
counter <- Ref.of(0L)
|
counter <- Ref.of(0L)
|
||||||
cleaning <- Ref.of(None: Option[Fiber[F, Unit]])
|
cleaning <- Ref.of(None: Option[Fiber[F, Throwable, Unit]])
|
||||||
log = Logger.log4s(logger)
|
log = Logger.log4s(logger)
|
||||||
result <-
|
result <-
|
||||||
if (interval.millis <= 0)
|
if (interval.millis <= 0)
|
||||||
@ -135,10 +134,10 @@ object PipelineCache {
|
|||||||
final private class CacheClearingImpl[F[_], A](
|
final private class CacheClearingImpl[F[_], A](
|
||||||
data: Ref[F, Map[String, Entry[A]]],
|
data: Ref[F, Map[String, Entry[A]]],
|
||||||
counter: Ref[F, Long],
|
counter: Ref[F, Long],
|
||||||
cleaningFiber: Ref[F, Option[Fiber[F, Unit]]],
|
cleaningFiber: Ref[F, Option[Fiber[F, Throwable, Unit]]],
|
||||||
clearInterval: FiniteDuration,
|
clearInterval: FiniteDuration,
|
||||||
release: F[Unit]
|
release: F[Unit]
|
||||||
)(implicit T: Timer[F], F: Concurrent[F])
|
)(implicit F: Async[F])
|
||||||
extends CacheClearing[F] {
|
extends CacheClearing[F] {
|
||||||
private[this] val log = Logger.log4s[F](logger)
|
private[this] val log = Logger.log4s[F](logger)
|
||||||
|
|
||||||
@ -157,8 +156,8 @@ object PipelineCache {
|
|||||||
case None => ().pure[F]
|
case None => ().pure[F]
|
||||||
}
|
}
|
||||||
|
|
||||||
private def clearAllLater: F[Fiber[F, Unit]] =
|
private def clearAllLater: F[Fiber[F, Throwable, Unit]] =
|
||||||
F.start(T.sleep(clearInterval) *> clearAll)
|
F.start(F.sleep(clearInterval) *> clearAll)
|
||||||
|
|
||||||
private def logDontClear: F[Unit] =
|
private def logDontClear: F[Unit] =
|
||||||
log.info("Cancel stanford cache clearing, as it has been used in between.")
|
log.info("Cancel stanford cache clearing, as it has been used in between.")
|
||||||
|
@ -2,12 +2,12 @@ package docspell.analysis.classifier
|
|||||||
|
|
||||||
import java.nio.file.Paths
|
import java.nio.file.Paths
|
||||||
|
|
||||||
import scala.concurrent.ExecutionContext
|
|
||||||
|
|
||||||
import cats.data.Kleisli
|
import cats.data.Kleisli
|
||||||
import cats.data.NonEmptyList
|
import cats.data.NonEmptyList
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
import fs2.io.file.Files
|
||||||
|
|
||||||
import docspell.analysis.classifier.TextClassifier.Data
|
import docspell.analysis.classifier.TextClassifier.Data
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
@ -17,8 +17,6 @@ import munit._
|
|||||||
class StanfordTextClassifierSuite extends FunSuite {
|
class StanfordTextClassifierSuite extends FunSuite {
|
||||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||||
|
|
||||||
implicit val CS = IO.contextShift(ExecutionContext.global)
|
|
||||||
|
|
||||||
test("learn from data") {
|
test("learn from data") {
|
||||||
val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map()))
|
val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map()))
|
||||||
|
|
||||||
@ -38,34 +36,30 @@ class StanfordTextClassifierSuite extends FunSuite {
|
|||||||
})
|
})
|
||||||
.covary[IO]
|
.covary[IO]
|
||||||
|
|
||||||
val modelExists =
|
val modelExists = {
|
||||||
Blocker[IO].use { blocker =>
|
val classifier = new StanfordTextClassifier[IO](cfg)
|
||||||
val classifier = new StanfordTextClassifier[IO](cfg, blocker)
|
classifier.trainClassifier[Boolean](logger, data)(
|
||||||
classifier.trainClassifier[Boolean](logger, data)(
|
Kleisli(result => File.existsNonEmpty[IO](result.model))
|
||||||
Kleisli(result => File.existsNonEmpty[IO](result.model))
|
)
|
||||||
)
|
}
|
||||||
}
|
|
||||||
assertEquals(modelExists.unsafeRunSync(), true)
|
assertEquals(modelExists.unsafeRunSync(), true)
|
||||||
}
|
}
|
||||||
|
|
||||||
test("run classifier") {
|
test("run classifier") {
|
||||||
val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map()))
|
val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map()))
|
||||||
val things = for {
|
val things = File.withTempDir[IO](Paths.get("target"), "testcls")
|
||||||
dir <- File.withTempDir[IO](Paths.get("target"), "testcls")
|
|
||||||
blocker <- Blocker[IO]
|
|
||||||
} yield (dir, blocker)
|
|
||||||
|
|
||||||
things
|
things
|
||||||
.use { case (dir, blocker) =>
|
.use { dir =>
|
||||||
val classifier = new StanfordTextClassifier[IO](cfg, blocker)
|
val classifier = new StanfordTextClassifier[IO](cfg)
|
||||||
|
|
||||||
val modelFile = dir.resolve("test.ser.gz")
|
val modelFile = dir.resolve("test.ser.gz")
|
||||||
for {
|
for {
|
||||||
_ <-
|
_ <-
|
||||||
LenientUri
|
LenientUri
|
||||||
.fromJava(getClass.getResource("/test.ser.gz"))
|
.fromJava(getClass.getResource("/test.ser.gz"))
|
||||||
.readURL[IO](4096, blocker)
|
.readURL[IO](4096)
|
||||||
.through(fs2.io.file.writeAll(modelFile, blocker))
|
.through(Files[IO].writeAll(modelFile))
|
||||||
.compile
|
.compile
|
||||||
.drain
|
.drain
|
||||||
model = ClassifierModel(modelFile)
|
model = ClassifierModel(modelFile)
|
||||||
|
@ -3,6 +3,7 @@ package docspell.analysis.nlp
|
|||||||
import java.nio.file.Paths
|
import java.nio.file.Paths
|
||||||
|
|
||||||
import cats.effect.IO
|
import cats.effect.IO
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
|
|
||||||
import docspell.analysis.Env
|
import docspell.analysis.Env
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
@ -14,8 +14,8 @@ import docspell.store.queue.JobQueue
|
|||||||
import docspell.store.usertask.UserTaskStore
|
import docspell.store.usertask.UserTaskStore
|
||||||
|
|
||||||
import emil.javamail.{JavaMailEmil, Settings}
|
import emil.javamail.{JavaMailEmil, Settings}
|
||||||
|
import org.http4s.blaze.client.BlazeClientBuilder
|
||||||
import org.http4s.client.Client
|
import org.http4s.client.Client
|
||||||
import org.http4s.client.blaze.BlazeClientBuilder
|
|
||||||
|
|
||||||
trait BackendApp[F[_]] {
|
trait BackendApp[F[_]] {
|
||||||
|
|
||||||
@ -43,12 +43,11 @@ trait BackendApp[F[_]] {
|
|||||||
|
|
||||||
object BackendApp {
|
object BackendApp {
|
||||||
|
|
||||||
def create[F[_]: ConcurrentEffect: ContextShift](
|
def create[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
store: Store[F],
|
store: Store[F],
|
||||||
httpClient: Client[F],
|
httpClient: Client[F],
|
||||||
ftsClient: FtsClient[F],
|
ftsClient: FtsClient[F]
|
||||||
blocker: Blocker
|
|
||||||
): Resource[F, BackendApp[F]] =
|
): Resource[F, BackendApp[F]] =
|
||||||
for {
|
for {
|
||||||
utStore <- UserTaskStore(store)
|
utStore <- UserTaskStore(store)
|
||||||
@ -68,7 +67,7 @@ object BackendApp {
|
|||||||
itemSearchImpl <- OItemSearch(store)
|
itemSearchImpl <- OItemSearch(store)
|
||||||
fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl)
|
fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl)
|
||||||
javaEmil =
|
javaEmil =
|
||||||
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
|
JavaMailEmil(Settings.defaultSettings.copy(debug = cfg.mailDebug))
|
||||||
mailImpl <- OMail(store, javaEmil)
|
mailImpl <- OMail(store, javaEmil)
|
||||||
userTaskImpl <- OUserTask(utStore, queue, joexImpl)
|
userTaskImpl <- OUserTask(utStore, queue, joexImpl)
|
||||||
folderImpl <- OFolder(store)
|
folderImpl <- OFolder(store)
|
||||||
@ -98,16 +97,15 @@ object BackendApp {
|
|||||||
val clientSettings = clientSettingsImpl
|
val clientSettings = clientSettingsImpl
|
||||||
}
|
}
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect: ContextShift](
|
def apply[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
connectEC: ExecutionContext,
|
connectEC: ExecutionContext,
|
||||||
httpClientEc: ExecutionContext,
|
httpClientEc: ExecutionContext
|
||||||
blocker: Blocker
|
|
||||||
)(ftsFactory: Client[F] => Resource[F, FtsClient[F]]): Resource[F, BackendApp[F]] =
|
)(ftsFactory: Client[F] => Resource[F, FtsClient[F]]): Resource[F, BackendApp[F]] =
|
||||||
for {
|
for {
|
||||||
store <- Store.create(cfg.jdbc, connectEC, blocker)
|
store <- Store.create(cfg.jdbc, connectEC)
|
||||||
httpClient <- BlazeClientBuilder[F](httpClientEc).resource
|
httpClient <- BlazeClientBuilder[F](httpClientEc).resource
|
||||||
ftsClient <- ftsFactory(httpClient)
|
ftsClient <- ftsFactory(httpClient)
|
||||||
backend <- create(cfg, store, httpClient, ftsClient, blocker)
|
backend <- create(cfg, store, httpClient, ftsClient)
|
||||||
} yield backend
|
} yield backend
|
||||||
}
|
}
|
||||||
|
@ -69,7 +69,7 @@ object Login {
|
|||||||
def invalidTime: Result = InvalidTime
|
def invalidTime: Result = InvalidTime
|
||||||
}
|
}
|
||||||
|
|
||||||
def apply[F[_]: Effect](store: Store[F]): Resource[F, Login[F]] =
|
def apply[F[_]: Async](store: Store[F]): Resource[F, Login[F]] =
|
||||||
Resource.pure[F, Login[F]](new Login[F] {
|
Resource.pure[F, Login[F]](new Login[F] {
|
||||||
|
|
||||||
private val logF = Logger.log4s(logger)
|
private val logF = Logger.log4s(logger)
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
package docspell.backend.ops
|
package docspell.backend.ops
|
||||||
|
|
||||||
import cats.data.OptionT
|
import cats.data.OptionT
|
||||||
import cats.effect.{Effect, Resource}
|
import cats.effect.{Async, Resource}
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.common.AccountId
|
import docspell.common.AccountId
|
||||||
@ -25,7 +25,7 @@ trait OClientSettings[F[_]] {
|
|||||||
object OClientSettings {
|
object OClientSettings {
|
||||||
private[this] val logger = getLogger
|
private[this] val logger = getLogger
|
||||||
|
|
||||||
def apply[F[_]: Effect](store: Store[F]): Resource[F, OClientSettings[F]] =
|
def apply[F[_]: Async](store: Store[F]): Resource[F, OClientSettings[F]] =
|
||||||
Resource.pure[F, OClientSettings[F]](new OClientSettings[F] {
|
Resource.pure[F, OClientSettings[F]](new OClientSettings[F] {
|
||||||
|
|
||||||
private def getUserId(account: AccountId): OptionT[F, Ident] =
|
private def getUserId(account: AccountId): OptionT[F, Ident] =
|
||||||
@ -58,7 +58,7 @@ object OClientSettings {
|
|||||||
store.transact(RClientSettings.upsert(clientId, userId, data))
|
store.transact(RClientSettings.upsert(clientId, userId, data))
|
||||||
)
|
)
|
||||||
_ <- OptionT.liftF(
|
_ <- OptionT.liftF(
|
||||||
if (n <= 0) Effect[F].raiseError(new Exception("No rows updated!"))
|
if (n <= 0) Async[F].raiseError(new Exception("No rows updated!"))
|
||||||
else ().pure[F]
|
else ().pure[F]
|
||||||
)
|
)
|
||||||
} yield ()).getOrElse(())
|
} yield ()).getOrElse(())
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
package docspell.backend.ops
|
package docspell.backend.ops
|
||||||
|
|
||||||
import cats.effect.{Effect, Resource}
|
import cats.effect.{Async, Resource}
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
|
||||||
@ -126,7 +126,7 @@ object OCollective {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def apply[F[_]: Effect](
|
def apply[F[_]: Async](
|
||||||
store: Store[F],
|
store: Store[F],
|
||||||
uts: UserTaskStore[F],
|
uts: UserTaskStore[F],
|
||||||
queue: JobQueue[F],
|
queue: JobQueue[F],
|
||||||
|
@ -87,7 +87,7 @@ object OCustomFields {
|
|||||||
collective: Ident
|
collective: Ident
|
||||||
)
|
)
|
||||||
|
|
||||||
def apply[F[_]: Effect](
|
def apply[F[_]: Async](
|
||||||
store: Store[F]
|
store: Store[F]
|
||||||
): Resource[F, OCustomFields[F]] =
|
): Resource[F, OCustomFields[F]] =
|
||||||
Resource.pure[F, OCustomFields[F]](new OCustomFields[F] {
|
Resource.pure[F, OCustomFields[F]](new OCustomFields[F] {
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
package docspell.backend.ops
|
package docspell.backend.ops
|
||||||
|
|
||||||
import cats.effect.{Effect, Resource}
|
import cats.effect.{Async, Resource}
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.common.{AccountId, Ident}
|
import docspell.common.{AccountId, Ident}
|
||||||
@ -22,7 +22,7 @@ trait OEquipment[F[_]] {
|
|||||||
|
|
||||||
object OEquipment {
|
object OEquipment {
|
||||||
|
|
||||||
def apply[F[_]: Effect](store: Store[F]): Resource[F, OEquipment[F]] =
|
def apply[F[_]: Async](store: Store[F]): Resource[F, OEquipment[F]] =
|
||||||
Resource.pure[F, OEquipment[F]](new OEquipment[F] {
|
Resource.pure[F, OEquipment[F]](new OEquipment[F] {
|
||||||
def findAll(account: AccountId, nameQuery: Option[String]): F[Vector[REquipment]] =
|
def findAll(account: AccountId, nameQuery: Option[String]): F[Vector[REquipment]] =
|
||||||
store.transact(REquipment.findAll(account.collective, nameQuery, _.name))
|
store.transact(REquipment.findAll(account.collective, nameQuery, _.name))
|
||||||
|
@ -55,7 +55,7 @@ object OFolder {
|
|||||||
type FolderDetail = QFolder.FolderDetail
|
type FolderDetail = QFolder.FolderDetail
|
||||||
val FolderDetail = QFolder.FolderDetail
|
val FolderDetail = QFolder.FolderDetail
|
||||||
|
|
||||||
def apply[F[_]: Effect](store: Store[F]): Resource[F, OFolder[F]] =
|
def apply[F[_]](store: Store[F]): Resource[F, OFolder[F]] =
|
||||||
Resource.pure[F, OFolder[F]](new OFolder[F] {
|
Resource.pure[F, OFolder[F]](new OFolder[F] {
|
||||||
def findAll(
|
def findAll(
|
||||||
account: AccountId,
|
account: AccountId,
|
||||||
|
@ -77,7 +77,7 @@ object OFulltext {
|
|||||||
case class FtsItem(item: ListItem, ftsData: FtsData)
|
case class FtsItem(item: ListItem, ftsData: FtsData)
|
||||||
case class FtsItemWithTags(item: ListItemWithTags, ftsData: FtsData)
|
case class FtsItemWithTags(item: ListItemWithTags, ftsData: FtsData)
|
||||||
|
|
||||||
def apply[F[_]: Effect](
|
def apply[F[_]: Async](
|
||||||
itemSearch: OItemSearch[F],
|
itemSearch: OItemSearch[F],
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
store: Store[F],
|
store: Store[F],
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
package docspell.backend.ops
|
package docspell.backend.ops
|
||||||
|
|
||||||
import cats.data.{NonEmptyList, OptionT}
|
import cats.data.{NonEmptyList, OptionT}
|
||||||
import cats.effect.{Effect, Resource}
|
import cats.effect.{Async, Resource}
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.backend.JobFactory
|
import docspell.backend.JobFactory
|
||||||
@ -191,7 +191,7 @@ trait OItem[F[_]] {
|
|||||||
|
|
||||||
object OItem {
|
object OItem {
|
||||||
|
|
||||||
def apply[F[_]: Effect](
|
def apply[F[_]: Async](
|
||||||
store: Store[F],
|
store: Store[F],
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
queue: JobQueue[F],
|
queue: JobQueue[F],
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
package docspell.backend.ops
|
package docspell.backend.ops
|
||||||
|
|
||||||
import cats.data.OptionT
|
import cats.data.OptionT
|
||||||
import cats.effect.{Effect, Resource}
|
import cats.effect.{Async, Resource}
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
|
||||||
@ -118,7 +118,7 @@ object OItemSearch {
|
|||||||
val fileId = rs.fileId
|
val fileId = rs.fileId
|
||||||
}
|
}
|
||||||
|
|
||||||
def apply[F[_]: Effect](store: Store[F]): Resource[F, OItemSearch[F]] =
|
def apply[F[_]: Async](store: Store[F]): Resource[F, OItemSearch[F]] =
|
||||||
Resource.pure[F, OItemSearch[F]](new OItemSearch[F] {
|
Resource.pure[F, OItemSearch[F]](new OItemSearch[F] {
|
||||||
|
|
||||||
def findItem(id: Ident, collective: Ident): F[Option[ItemData]] =
|
def findItem(id: Ident, collective: Ident): F[Option[ItemData]] =
|
||||||
|
@ -36,7 +36,7 @@ object OJoex {
|
|||||||
} yield cancel.success).getOrElse(false)
|
} yield cancel.success).getOrElse(false)
|
||||||
})
|
})
|
||||||
|
|
||||||
def create[F[_]: ConcurrentEffect](
|
def create[F[_]: Async](
|
||||||
ec: ExecutionContext,
|
ec: ExecutionContext,
|
||||||
store: Store[F]
|
store: Store[F]
|
||||||
): Resource[F, OJoex[F]] =
|
): Resource[F, OJoex[F]] =
|
||||||
|
@ -141,7 +141,7 @@ object OMail {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
def apply[F[_]: Effect](store: Store[F], emil: Emil[F]): Resource[F, OMail[F]] =
|
def apply[F[_]: Async](store: Store[F], emil: Emil[F]): Resource[F, OMail[F]] =
|
||||||
Resource.pure[F, OMail[F]](new OMail[F] {
|
Resource.pure[F, OMail[F]](new OMail[F] {
|
||||||
def getSmtpSettings(
|
def getSmtpSettings(
|
||||||
accId: AccountId,
|
accId: AccountId,
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
package docspell.backend.ops
|
package docspell.backend.ops
|
||||||
|
|
||||||
import cats.effect.{Effect, Resource}
|
import cats.effect.{Async, Resource}
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.common.syntax.all._
|
import docspell.common.syntax.all._
|
||||||
@ -20,7 +20,7 @@ trait ONode[F[_]] {
|
|||||||
object ONode {
|
object ONode {
|
||||||
private[this] val logger = getLogger
|
private[this] val logger = getLogger
|
||||||
|
|
||||||
def apply[F[_]: Effect](store: Store[F]): Resource[F, ONode[F]] =
|
def apply[F[_]: Async](store: Store[F]): Resource[F, ONode[F]] =
|
||||||
Resource.pure[F, ONode[F]](new ONode[F] {
|
Resource.pure[F, ONode[F]](new ONode[F] {
|
||||||
|
|
||||||
def register(appId: Ident, nodeType: NodeType, uri: LenientUri): F[Unit] =
|
def register(appId: Ident, nodeType: NodeType, uri: LenientUri): F[Unit] =
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
package docspell.backend.ops
|
package docspell.backend.ops
|
||||||
|
|
||||||
import cats.effect.{Effect, Resource}
|
import cats.effect.{Async, Resource}
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.backend.ops.OOrganization._
|
import docspell.backend.ops.OOrganization._
|
||||||
@ -49,7 +49,7 @@ object OOrganization {
|
|||||||
contacts: Seq[RContact]
|
contacts: Seq[RContact]
|
||||||
)
|
)
|
||||||
|
|
||||||
def apply[F[_]: Effect](store: Store[F]): Resource[F, OOrganization[F]] =
|
def apply[F[_]: Async](store: Store[F]): Resource[F, OOrganization[F]] =
|
||||||
Resource.pure[F, OOrganization[F]](new OOrganization[F] {
|
Resource.pure[F, OOrganization[F]](new OOrganization[F] {
|
||||||
|
|
||||||
def findAllOrg(
|
def findAllOrg(
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
package docspell.backend.ops
|
package docspell.backend.ops
|
||||||
|
|
||||||
import cats.effect.{Effect, Resource}
|
import cats.effect.{Async, Resource}
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.common.{AccountId, Ident}
|
import docspell.common.{AccountId, Ident}
|
||||||
@ -22,7 +22,7 @@ trait OSource[F[_]] {
|
|||||||
|
|
||||||
object OSource {
|
object OSource {
|
||||||
|
|
||||||
def apply[F[_]: Effect](store: Store[F]): Resource[F, OSource[F]] =
|
def apply[F[_]: Async](store: Store[F]): Resource[F, OSource[F]] =
|
||||||
Resource.pure[F, OSource[F]](new OSource[F] {
|
Resource.pure[F, OSource[F]](new OSource[F] {
|
||||||
def findAll(account: AccountId): F[Vector[SourceData]] =
|
def findAll(account: AccountId): F[Vector[SourceData]] =
|
||||||
store
|
store
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
package docspell.backend.ops
|
package docspell.backend.ops
|
||||||
|
|
||||||
import cats.effect.{Effect, Resource}
|
import cats.effect.{Async, Resource}
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.common.{AccountId, Ident}
|
import docspell.common.{AccountId, Ident}
|
||||||
@ -25,7 +25,7 @@ trait OTag[F[_]] {
|
|||||||
|
|
||||||
object OTag {
|
object OTag {
|
||||||
|
|
||||||
def apply[F[_]: Effect](store: Store[F]): Resource[F, OTag[F]] =
|
def apply[F[_]: Async](store: Store[F]): Resource[F, OTag[F]] =
|
||||||
Resource.pure[F, OTag[F]](new OTag[F] {
|
Resource.pure[F, OTag[F]](new OTag[F] {
|
||||||
def findAll(account: AccountId, nameQuery: Option[String]): F[Vector[RTag]] =
|
def findAll(account: AccountId, nameQuery: Option[String]): F[Vector[RTag]] =
|
||||||
store.transact(RTag.findAll(account.collective, nameQuery, _.name))
|
store.transact(RTag.findAll(account.collective, nameQuery, _.name))
|
||||||
|
@ -62,7 +62,7 @@ trait OUserTask[F[_]] {
|
|||||||
|
|
||||||
object OUserTask {
|
object OUserTask {
|
||||||
|
|
||||||
def apply[F[_]: Effect](
|
def apply[F[_]: Async](
|
||||||
store: UserTaskStore[F],
|
store: UserTaskStore[F],
|
||||||
queue: JobQueue[F],
|
queue: JobQueue[F],
|
||||||
joex: OJoex[F]
|
joex: OJoex[F]
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
package docspell.backend.signup
|
package docspell.backend.signup
|
||||||
|
|
||||||
import cats.effect.{Effect, Resource}
|
import cats.effect.{Async, Resource}
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.backend.PasswordCrypt
|
import docspell.backend.PasswordCrypt
|
||||||
@ -23,7 +23,7 @@ trait OSignup[F[_]] {
|
|||||||
object OSignup {
|
object OSignup {
|
||||||
private[this] val logger = getLogger
|
private[this] val logger = getLogger
|
||||||
|
|
||||||
def apply[F[_]: Effect](store: Store[F]): Resource[F, OSignup[F]] =
|
def apply[F[_]: Async](store: Store[F]): Resource[F, OSignup[F]] =
|
||||||
Resource.pure[F, OSignup[F]](new OSignup[F] {
|
Resource.pure[F, OSignup[F]](new OSignup[F] {
|
||||||
|
|
||||||
def newInvite(cfg: Config)(password: Password): F[NewInviteResult] =
|
def newInvite(cfg: Config)(password: Password): F[NewInviteResult] =
|
||||||
@ -35,7 +35,7 @@ object OSignup {
|
|||||||
.transact(RInvitation.insertNew)
|
.transact(RInvitation.insertNew)
|
||||||
.map(ri => NewInviteResult.success(ri.id))
|
.map(ri => NewInviteResult.success(ri.id))
|
||||||
else
|
else
|
||||||
Effect[F].pure(NewInviteResult.invitationClosed)
|
Async[F].pure(NewInviteResult.invitationClosed)
|
||||||
|
|
||||||
def register(cfg: Config)(data: RegisterData): F[SignupResult] =
|
def register(cfg: Config)(data: RegisterData): F[SignupResult] =
|
||||||
cfg.mode match {
|
cfg.mode match {
|
||||||
|
@ -1,47 +1,48 @@
|
|||||||
package docspell.common
|
package docspell.common
|
||||||
|
|
||||||
import java.io.IOException
|
import java.io.IOException
|
||||||
import java.nio.charset.StandardCharsets
|
|
||||||
import java.nio.file._
|
|
||||||
import java.nio.file.attribute.BasicFileAttributes
|
import java.nio.file.attribute.BasicFileAttributes
|
||||||
|
import java.nio.file.{Files => JFiles, _}
|
||||||
import java.util.concurrent.atomic.AtomicInteger
|
import java.util.concurrent.atomic.AtomicInteger
|
||||||
|
|
||||||
import scala.jdk.CollectionConverters._
|
import scala.jdk.CollectionConverters._
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Stream
|
import fs2.io.file.Files
|
||||||
|
import fs2.{Chunk, Stream}
|
||||||
|
|
||||||
import docspell.common.syntax.all._
|
import docspell.common.syntax.all._
|
||||||
|
|
||||||
import io.circe.Decoder
|
import io.circe.Decoder
|
||||||
|
import scodec.bits.ByteVector
|
||||||
|
//TODO use io.fs2.files.Files api
|
||||||
object File {
|
object File {
|
||||||
|
|
||||||
def mkDir[F[_]: Sync](dir: Path): F[Path] =
|
def mkDir[F[_]: Sync](dir: Path): F[Path] =
|
||||||
Sync[F].delay(Files.createDirectories(dir))
|
Sync[F].blocking(JFiles.createDirectories(dir))
|
||||||
|
|
||||||
def mkTempDir[F[_]: Sync](parent: Path, prefix: String): F[Path] =
|
def mkTempDir[F[_]: Sync](parent: Path, prefix: String): F[Path] =
|
||||||
mkDir(parent).map(p => Files.createTempDirectory(p, prefix))
|
mkDir(parent).map(p => JFiles.createTempDirectory(p, prefix))
|
||||||
|
|
||||||
def mkTempFile[F[_]: Sync](
|
def mkTempFile[F[_]: Sync](
|
||||||
parent: Path,
|
parent: Path,
|
||||||
prefix: String,
|
prefix: String,
|
||||||
suffix: Option[String] = None
|
suffix: Option[String] = None
|
||||||
): F[Path] =
|
): F[Path] =
|
||||||
mkDir(parent).map(p => Files.createTempFile(p, prefix, suffix.orNull))
|
mkDir(parent).map(p => JFiles.createTempFile(p, prefix, suffix.orNull))
|
||||||
|
|
||||||
def deleteDirectory[F[_]: Sync](dir: Path): F[Int] =
|
def deleteDirectory[F[_]: Sync](dir: Path): F[Int] =
|
||||||
Sync[F].delay {
|
Sync[F].delay {
|
||||||
val count = new AtomicInteger(0)
|
val count = new AtomicInteger(0)
|
||||||
Files.walkFileTree(
|
JFiles.walkFileTree(
|
||||||
dir,
|
dir,
|
||||||
new SimpleFileVisitor[Path]() {
|
new SimpleFileVisitor[Path]() {
|
||||||
override def visitFile(
|
override def visitFile(
|
||||||
file: Path,
|
file: Path,
|
||||||
attrs: BasicFileAttributes
|
attrs: BasicFileAttributes
|
||||||
): FileVisitResult = {
|
): FileVisitResult = {
|
||||||
Files.deleteIfExists(file)
|
JFiles.deleteIfExists(file)
|
||||||
count.incrementAndGet()
|
count.incrementAndGet()
|
||||||
FileVisitResult.CONTINUE
|
FileVisitResult.CONTINUE
|
||||||
}
|
}
|
||||||
@ -49,7 +50,7 @@ object File {
|
|||||||
Option(e) match {
|
Option(e) match {
|
||||||
case Some(ex) => throw ex
|
case Some(ex) => throw ex
|
||||||
case None =>
|
case None =>
|
||||||
Files.deleteIfExists(dir)
|
JFiles.deleteIfExists(dir)
|
||||||
FileVisitResult.CONTINUE
|
FileVisitResult.CONTINUE
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -58,47 +59,57 @@ object File {
|
|||||||
}
|
}
|
||||||
|
|
||||||
def exists[F[_]: Sync](file: Path): F[Boolean] =
|
def exists[F[_]: Sync](file: Path): F[Boolean] =
|
||||||
Sync[F].delay(Files.exists(file))
|
Sync[F].delay(JFiles.exists(file))
|
||||||
|
|
||||||
def size[F[_]: Sync](file: Path): F[Long] =
|
def size[F[_]: Sync](file: Path): F[Long] =
|
||||||
Sync[F].delay(Files.size(file))
|
Sync[F].delay(JFiles.size(file))
|
||||||
|
|
||||||
def existsNonEmpty[F[_]: Sync](file: Path, minSize: Long = 0): F[Boolean] =
|
def existsNonEmpty[F[_]: Sync](file: Path, minSize: Long = 0): F[Boolean] =
|
||||||
Sync[F].delay(Files.exists(file) && Files.size(file) > minSize)
|
Sync[F].delay(JFiles.exists(file) && JFiles.size(file) > minSize)
|
||||||
|
|
||||||
def deleteFile[F[_]: Sync](file: Path): F[Unit] =
|
def deleteFile[F[_]: Sync](file: Path): F[Unit] =
|
||||||
Sync[F].delay(Files.deleteIfExists(file)).map(_ => ())
|
Sync[F].delay(JFiles.deleteIfExists(file)).map(_ => ())
|
||||||
|
|
||||||
def delete[F[_]: Sync](path: Path): F[Int] =
|
def delete[F[_]: Sync](path: Path): F[Int] =
|
||||||
if (Files.isDirectory(path)) deleteDirectory(path)
|
if (JFiles.isDirectory(path)) deleteDirectory(path)
|
||||||
else deleteFile(path).map(_ => 1)
|
else deleteFile(path).map(_ => 1)
|
||||||
|
|
||||||
def withTempDir[F[_]: Sync](parent: Path, prefix: String): Resource[F, Path] =
|
def withTempDir[F[_]: Sync](parent: Path, prefix: String): Resource[F, Path] =
|
||||||
Resource.make(mkTempDir(parent, prefix))(p => delete(p).map(_ => ()))
|
Resource.make(mkTempDir(parent, prefix))(p => delete(p).map(_ => ()))
|
||||||
|
|
||||||
def listFiles[F[_]: Sync](pred: Path => Boolean, dir: Path): F[List[Path]] =
|
def listJFiles[F[_]: Sync](pred: Path => Boolean, dir: Path): F[List[Path]] =
|
||||||
Sync[F].delay {
|
Sync[F].delay {
|
||||||
val javaList =
|
val javaList =
|
||||||
Files.list(dir).filter(p => pred(p)).collect(java.util.stream.Collectors.toList())
|
JFiles
|
||||||
|
.list(dir)
|
||||||
|
.filter(p => pred(p))
|
||||||
|
.collect(java.util.stream.Collectors.toList())
|
||||||
javaList.asScala.toList.sortBy(_.getFileName.toString)
|
javaList.asScala.toList.sortBy(_.getFileName.toString)
|
||||||
}
|
}
|
||||||
|
|
||||||
def readAll[F[_]: Sync: ContextShift](
|
def readAll[F[_]: Files](
|
||||||
file: Path,
|
file: Path,
|
||||||
blocker: Blocker,
|
|
||||||
chunkSize: Int
|
chunkSize: Int
|
||||||
): Stream[F, Byte] =
|
): Stream[F, Byte] =
|
||||||
fs2.io.file.readAll(file, blocker, chunkSize)
|
Files[F].readAll(file, chunkSize)
|
||||||
|
|
||||||
def readText[F[_]: Sync: ContextShift](file: Path, blocker: Blocker): F[String] =
|
def readText[F[_]: Files: Concurrent](file: Path): F[String] =
|
||||||
readAll[F](file, blocker, 8192).through(fs2.text.utf8Decode).compile.foldMonoid
|
readAll[F](file, 8192).through(fs2.text.utf8Decode).compile.foldMonoid
|
||||||
|
|
||||||
def writeString[F[_]: Sync](file: Path, content: String): F[Path] =
|
def writeString[F[_]: Files: Concurrent](file: Path, content: String): F[Path] =
|
||||||
Sync[F].delay(Files.write(file, content.getBytes(StandardCharsets.UTF_8)))
|
ByteVector.encodeUtf8(content) match {
|
||||||
|
case Right(bv) =>
|
||||||
|
Stream
|
||||||
|
.chunk(Chunk.byteVector(bv))
|
||||||
|
.through(Files[F].writeAll(file))
|
||||||
|
.compile
|
||||||
|
.drain
|
||||||
|
.map(_ => file)
|
||||||
|
case Left(ex) =>
|
||||||
|
Concurrent[F].raiseError(ex)
|
||||||
|
}
|
||||||
|
|
||||||
def readJson[F[_]: Sync: ContextShift, A](file: Path, blocker: Blocker)(implicit
|
def readJson[F[_]: Async, A](file: Path)(implicit d: Decoder[A]): F[A] =
|
||||||
d: Decoder[A]
|
readText[F](file).map(_.parseJsonAs[A]).rethrow
|
||||||
): F[A] =
|
|
||||||
readText[F](file, blocker).map(_.parseJsonAs[A]).rethrow
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@ import java.net.URLEncoder
|
|||||||
|
|
||||||
import cats.data.NonEmptyList
|
import cats.data.NonEmptyList
|
||||||
import cats.effect.Resource
|
import cats.effect.Resource
|
||||||
import cats.effect.{Blocker, ContextShift, Sync}
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
|
||||||
@ -66,20 +66,17 @@ case class LenientUri(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
def readURL[F[_]: Sync: ContextShift](
|
def readURL[F[_]: Sync](chunkSize: Int): Stream[F, Byte] =
|
||||||
chunkSize: Int,
|
|
||||||
blocker: Blocker
|
|
||||||
): Stream[F, Byte] =
|
|
||||||
Stream
|
Stream
|
||||||
.emit(Either.catchNonFatal(new URL(asString)))
|
.emit(Either.catchNonFatal(new URL(asString)))
|
||||||
.covary[F]
|
.covary[F]
|
||||||
.rethrow
|
.rethrow
|
||||||
.flatMap(url =>
|
.flatMap(url =>
|
||||||
fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, blocker, true)
|
fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, true)
|
||||||
)
|
)
|
||||||
|
|
||||||
def readText[F[_]: Sync: ContextShift](chunkSize: Int, blocker: Blocker): F[String] =
|
def readText[F[_]: Sync](chunkSize: Int): F[String] =
|
||||||
readURL[F](chunkSize, blocker).through(fs2.text.utf8Decode).compile.foldMonoid
|
readURL[F](chunkSize).through(fs2.text.utf8Decode).compile.foldMonoid
|
||||||
|
|
||||||
def host: Option[String] =
|
def host: Option[String] =
|
||||||
authority.map(a =>
|
authority.map(a =>
|
||||||
|
@ -2,13 +2,10 @@ package docspell.common
|
|||||||
|
|
||||||
import scala.concurrent.ExecutionContext
|
import scala.concurrent.ExecutionContext
|
||||||
|
|
||||||
import cats.effect._
|
|
||||||
|
|
||||||
/** Captures thread pools to use in an application.
|
/** Captures thread pools to use in an application.
|
||||||
*/
|
*/
|
||||||
case class Pools(
|
case class Pools(
|
||||||
connectEC: ExecutionContext,
|
connectEC: ExecutionContext,
|
||||||
httpClientEC: ExecutionContext,
|
httpClientEC: ExecutionContext,
|
||||||
blocker: Blocker,
|
|
||||||
restEC: ExecutionContext
|
restEC: ExecutionContext
|
||||||
)
|
)
|
||||||
|
@ -7,7 +7,7 @@ import java.util.concurrent.TimeUnit
|
|||||||
|
|
||||||
import scala.jdk.CollectionConverters._
|
import scala.jdk.CollectionConverters._
|
||||||
|
|
||||||
import cats.effect.{Blocker, ContextShift, Sync}
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.{Stream, io, text}
|
import fs2.{Stream, io, text}
|
||||||
|
|
||||||
@ -34,9 +34,8 @@ object SystemCommand {
|
|||||||
|
|
||||||
final case class Result(rc: Int, stdout: String, stderr: String)
|
final case class Result(rc: Int, stdout: String, stderr: String)
|
||||||
|
|
||||||
def exec[F[_]: Sync: ContextShift](
|
def exec[F[_]: Sync](
|
||||||
cmd: Config,
|
cmd: Config,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
wd: Option[Path] = None,
|
wd: Option[Path] = None,
|
||||||
stdin: Stream[F, Byte] = Stream.empty
|
stdin: Stream[F, Byte] = Stream.empty
|
||||||
@ -44,8 +43,8 @@ object SystemCommand {
|
|||||||
startProcess(cmd, wd, logger, stdin) { proc =>
|
startProcess(cmd, wd, logger, stdin) { proc =>
|
||||||
Stream.eval {
|
Stream.eval {
|
||||||
for {
|
for {
|
||||||
_ <- writeToProcess(stdin, proc, blocker)
|
_ <- writeToProcess(stdin, proc)
|
||||||
term <- Sync[F].delay(proc.waitFor(cmd.timeout.seconds, TimeUnit.SECONDS))
|
term <- Sync[F].blocking(proc.waitFor(cmd.timeout.seconds, TimeUnit.SECONDS))
|
||||||
_ <-
|
_ <-
|
||||||
if (term)
|
if (term)
|
||||||
logger.debug(s"Command `${cmd.cmdString}` finished: ${proc.exitValue}")
|
logger.debug(s"Command `${cmd.cmdString}` finished: ${proc.exitValue}")
|
||||||
@ -55,23 +54,22 @@ object SystemCommand {
|
|||||||
)
|
)
|
||||||
_ <- if (!term) timeoutError(proc, cmd) else Sync[F].pure(())
|
_ <- if (!term) timeoutError(proc, cmd) else Sync[F].pure(())
|
||||||
out <-
|
out <-
|
||||||
if (term) inputStreamToString(proc.getInputStream, blocker)
|
if (term) inputStreamToString(proc.getInputStream)
|
||||||
else Sync[F].pure("")
|
else Sync[F].pure("")
|
||||||
err <-
|
err <-
|
||||||
if (term) inputStreamToString(proc.getErrorStream, blocker)
|
if (term) inputStreamToString(proc.getErrorStream)
|
||||||
else Sync[F].pure("")
|
else Sync[F].pure("")
|
||||||
} yield Result(proc.exitValue, out, err)
|
} yield Result(proc.exitValue, out, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def execSuccess[F[_]: Sync: ContextShift](
|
def execSuccess[F[_]: Sync](
|
||||||
cmd: Config,
|
cmd: Config,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
wd: Option[Path] = None,
|
wd: Option[Path] = None,
|
||||||
stdin: Stream[F, Byte] = Stream.empty
|
stdin: Stream[F, Byte] = Stream.empty
|
||||||
): Stream[F, Result] =
|
): Stream[F, Result] =
|
||||||
exec(cmd, blocker, logger, wd, stdin).flatMap { r =>
|
exec(cmd, logger, wd, stdin).flatMap { r =>
|
||||||
if (r.rc != 0)
|
if (r.rc != 0)
|
||||||
Stream.raiseError[F](
|
Stream.raiseError[F](
|
||||||
new Exception(
|
new Exception(
|
||||||
@ -92,7 +90,7 @@ object SystemCommand {
|
|||||||
val log = logger.debug(s"Running external command: ${cmd.cmdString}")
|
val log = logger.debug(s"Running external command: ${cmd.cmdString}")
|
||||||
val hasStdin = stdin.take(1).compile.last.map(_.isDefined)
|
val hasStdin = stdin.take(1).compile.last.map(_.isDefined)
|
||||||
val proc = log *> hasStdin.flatMap(flag =>
|
val proc = log *> hasStdin.flatMap(flag =>
|
||||||
Sync[F].delay {
|
Sync[F].blocking {
|
||||||
val pb = new ProcessBuilder(cmd.toCmd.asJava)
|
val pb = new ProcessBuilder(cmd.toCmd.asJava)
|
||||||
.redirectInput(if (flag) Redirect.PIPE else Redirect.INHERIT)
|
.redirectInput(if (flag) Redirect.PIPE else Redirect.INHERIT)
|
||||||
.redirectError(Redirect.PIPE)
|
.redirectError(Redirect.PIPE)
|
||||||
@ -109,11 +107,8 @@ object SystemCommand {
|
|||||||
.flatMap(f)
|
.flatMap(f)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def inputStreamToString[F[_]: Sync: ContextShift](
|
private def inputStreamToString[F[_]: Sync](in: InputStream): F[String] =
|
||||||
in: InputStream,
|
io.readInputStream(Sync[F].pure(in), 16 * 1024, closeAfterUse = false)
|
||||||
blocker: Blocker
|
|
||||||
): F[String] =
|
|
||||||
io.readInputStream(Sync[F].pure(in), 16 * 1024, blocker, closeAfterUse = false)
|
|
||||||
.through(text.utf8Decode)
|
.through(text.utf8Decode)
|
||||||
.chunks
|
.chunks
|
||||||
.map(_.toVector.mkString)
|
.map(_.toVector.mkString)
|
||||||
@ -122,18 +117,17 @@ object SystemCommand {
|
|||||||
.last
|
.last
|
||||||
.map(_.getOrElse(""))
|
.map(_.getOrElse(""))
|
||||||
|
|
||||||
private def writeToProcess[F[_]: Sync: ContextShift](
|
private def writeToProcess[F[_]: Sync](
|
||||||
data: Stream[F, Byte],
|
data: Stream[F, Byte],
|
||||||
proc: Process,
|
proc: Process
|
||||||
blocker: Blocker
|
|
||||||
): F[Unit] =
|
): F[Unit] =
|
||||||
data
|
data
|
||||||
.through(io.writeOutputStream(Sync[F].delay(proc.getOutputStream), blocker))
|
.through(io.writeOutputStream(Sync[F].blocking(proc.getOutputStream)))
|
||||||
.compile
|
.compile
|
||||||
.drain
|
.drain
|
||||||
|
|
||||||
private def timeoutError[F[_]: Sync](proc: Process, cmd: Config): F[Unit] =
|
private def timeoutError[F[_]: Sync](proc: Process, cmd: Config): F[Unit] =
|
||||||
Sync[F].delay(proc.destroyForcibly()).attempt *> {
|
Sync[F].blocking(proc.destroyForcibly()).attempt *> {
|
||||||
Sync[F].raiseError(
|
Sync[F].raiseError(
|
||||||
new Exception(
|
new Exception(
|
||||||
s"Command `${cmd.cmdString}` timed out (${cmd.timeout.formatExact})"
|
s"Command `${cmd.cmdString}` timed out (${cmd.timeout.formatExact})"
|
||||||
|
@ -12,6 +12,8 @@ import docspell.convert.extern._
|
|||||||
import docspell.convert.flexmark.Markdown
|
import docspell.convert.flexmark.Markdown
|
||||||
import docspell.files.{ImageSize, TikaMimetype}
|
import docspell.files.{ImageSize, TikaMimetype}
|
||||||
|
|
||||||
|
import scodec.bits.ByteVector
|
||||||
|
|
||||||
trait Conversion[F[_]] {
|
trait Conversion[F[_]] {
|
||||||
|
|
||||||
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(
|
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(
|
||||||
@ -22,10 +24,9 @@ trait Conversion[F[_]] {
|
|||||||
|
|
||||||
object Conversion {
|
object Conversion {
|
||||||
|
|
||||||
def create[F[_]: Sync: ContextShift](
|
def create[F[_]: Async](
|
||||||
cfg: ConvertConfig,
|
cfg: ConvertConfig,
|
||||||
sanitizeHtml: SanitizeHtml,
|
sanitizeHtml: SanitizeHtml,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
): Resource[F, Conversion[F]] =
|
): Resource[F, Conversion[F]] =
|
||||||
Resource.pure[F, Conversion[F]](new Conversion[F] {
|
Resource.pure[F, Conversion[F]](new Conversion[F] {
|
||||||
@ -36,12 +37,12 @@ object Conversion {
|
|||||||
TikaMimetype.resolve(dataType, in).flatMap {
|
TikaMimetype.resolve(dataType, in).flatMap {
|
||||||
case MimeType.PdfMatch(_) =>
|
case MimeType.PdfMatch(_) =>
|
||||||
OcrMyPdf
|
OcrMyPdf
|
||||||
.toPDF(cfg.ocrmypdf, lang, cfg.chunkSize, blocker, logger)(in, handler)
|
.toPDF(cfg.ocrmypdf, lang, cfg.chunkSize, logger)(in, handler)
|
||||||
|
|
||||||
case MimeType.HtmlMatch(mt) =>
|
case MimeType.HtmlMatch(mt) =>
|
||||||
val cs = mt.charsetOrUtf8
|
val cs = mt.charsetOrUtf8
|
||||||
WkHtmlPdf
|
WkHtmlPdf
|
||||||
.toPDF(cfg.wkhtmlpdf, cfg.chunkSize, cs, sanitizeHtml, blocker, logger)(
|
.toPDF(cfg.wkhtmlpdf, cfg.chunkSize, cs, sanitizeHtml, logger)(
|
||||||
in,
|
in,
|
||||||
handler
|
handler
|
||||||
)
|
)
|
||||||
@ -50,14 +51,15 @@ object Conversion {
|
|||||||
val cs = mt.charsetOrUtf8
|
val cs = mt.charsetOrUtf8
|
||||||
Markdown.toHtml(in, cfg.markdown, cs).flatMap { html =>
|
Markdown.toHtml(in, cfg.markdown, cs).flatMap { html =>
|
||||||
val bytes = Stream
|
val bytes = Stream
|
||||||
.chunk(Chunk.bytes(html.getBytes(StandardCharsets.UTF_8)))
|
.chunk(
|
||||||
|
Chunk.byteVector(ByteVector.view(html.getBytes(StandardCharsets.UTF_8)))
|
||||||
|
)
|
||||||
.covary[F]
|
.covary[F]
|
||||||
WkHtmlPdf.toPDF(
|
WkHtmlPdf.toPDF(
|
||||||
cfg.wkhtmlpdf,
|
cfg.wkhtmlpdf,
|
||||||
cfg.chunkSize,
|
cfg.chunkSize,
|
||||||
StandardCharsets.UTF_8,
|
StandardCharsets.UTF_8,
|
||||||
sanitizeHtml,
|
sanitizeHtml,
|
||||||
blocker,
|
|
||||||
logger
|
logger
|
||||||
)(bytes, handler)
|
)(bytes, handler)
|
||||||
}
|
}
|
||||||
@ -77,7 +79,7 @@ object Conversion {
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
else
|
else
|
||||||
Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, blocker, logger)(
|
Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, logger)(
|
||||||
in,
|
in,
|
||||||
handler
|
handler
|
||||||
)
|
)
|
||||||
@ -86,14 +88,14 @@ object Conversion {
|
|||||||
logger.info(
|
logger.info(
|
||||||
s"Cannot read image when determining size for ${mt.asString}. Converting anyways."
|
s"Cannot read image when determining size for ${mt.asString}. Converting anyways."
|
||||||
) *>
|
) *>
|
||||||
Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, blocker, logger)(
|
Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, logger)(
|
||||||
in,
|
in,
|
||||||
handler
|
handler
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
case Office(_) =>
|
case Office(_) =>
|
||||||
Unoconv.toPDF(cfg.unoconv, cfg.chunkSize, blocker, logger)(in, handler)
|
Unoconv.toPDF(cfg.unoconv, cfg.chunkSize, logger)(in, handler)
|
||||||
|
|
||||||
case mt =>
|
case mt =>
|
||||||
handler.run(ConversionResult.unsupportedFormat(mt))
|
handler.run(ConversionResult.unsupportedFormat(mt))
|
||||||
|
@ -4,6 +4,7 @@ import java.nio.file.Path
|
|||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
import fs2.io.file.Files
|
||||||
import fs2.{Pipe, Stream}
|
import fs2.{Pipe, Stream}
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
@ -12,12 +13,11 @@ import docspell.convert.ConversionResult.{Handler, successPdf, successPdfTxt}
|
|||||||
|
|
||||||
private[extern] object ExternConv {
|
private[extern] object ExternConv {
|
||||||
|
|
||||||
def toPDF[F[_]: Sync: ContextShift, A](
|
def toPDF[F[_]: Async, A](
|
||||||
name: String,
|
name: String,
|
||||||
cmdCfg: SystemCommand.Config,
|
cmdCfg: SystemCommand.Config,
|
||||||
wd: Path,
|
wd: Path,
|
||||||
useStdin: Boolean,
|
useStdin: Boolean,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
reader: (Path, SystemCommand.Result) => F[ConversionResult[F]]
|
reader: (Path, SystemCommand.Result) => F[ConversionResult[F]]
|
||||||
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] =
|
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] =
|
||||||
@ -37,13 +37,12 @@ private[extern] object ExternConv {
|
|||||||
|
|
||||||
val createInput: Pipe[F, Byte, Unit] =
|
val createInput: Pipe[F, Byte, Unit] =
|
||||||
if (useStdin) _ => Stream.emit(())
|
if (useStdin) _ => Stream.emit(())
|
||||||
else storeDataToFile(name, blocker, logger, inFile)
|
else storeDataToFile(name, logger, inFile)
|
||||||
|
|
||||||
in.through(createInput).flatMap { _ =>
|
in.through(createInput).flatMap { _ =>
|
||||||
SystemCommand
|
SystemCommand
|
||||||
.exec[F](
|
.exec[F](
|
||||||
sysCfg,
|
sysCfg,
|
||||||
blocker,
|
|
||||||
logger,
|
logger,
|
||||||
Some(dir),
|
Some(dir),
|
||||||
if (useStdin) in
|
if (useStdin) in
|
||||||
@ -66,8 +65,7 @@ private[extern] object ExternConv {
|
|||||||
handler.run(ConversionResult.failure(ex))
|
handler.run(ConversionResult.failure(ex))
|
||||||
}
|
}
|
||||||
|
|
||||||
def readResult[F[_]: Sync: ContextShift](
|
def readResult[F[_]: Async](
|
||||||
blocker: Blocker,
|
|
||||||
chunkSize: Int,
|
chunkSize: Int,
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
)(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] =
|
)(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] =
|
||||||
@ -77,15 +75,15 @@ private[extern] object ExternConv {
|
|||||||
File.existsNonEmpty[F](outTxt).flatMap {
|
File.existsNonEmpty[F](outTxt).flatMap {
|
||||||
case true =>
|
case true =>
|
||||||
successPdfTxt(
|
successPdfTxt(
|
||||||
File.readAll(out, blocker, chunkSize),
|
File.readAll(out, chunkSize),
|
||||||
File.readText(outTxt, blocker)
|
File.readText(outTxt)
|
||||||
).pure[F]
|
).pure[F]
|
||||||
case false =>
|
case false =>
|
||||||
successPdf(File.readAll(out, blocker, chunkSize)).pure[F]
|
successPdf(File.readAll(out, chunkSize)).pure[F]
|
||||||
}
|
}
|
||||||
case true =>
|
case true =>
|
||||||
logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
|
logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
|
||||||
successPdf(File.readAll(out, blocker, chunkSize)).pure[F]
|
successPdf(File.readAll(out, chunkSize)).pure[F]
|
||||||
|
|
||||||
case false =>
|
case false =>
|
||||||
ConversionResult
|
ConversionResult
|
||||||
@ -95,9 +93,8 @@ private[extern] object ExternConv {
|
|||||||
.pure[F]
|
.pure[F]
|
||||||
}
|
}
|
||||||
|
|
||||||
def readResultTesseract[F[_]: Sync: ContextShift](
|
def readResultTesseract[F[_]: Async](
|
||||||
outPrefix: String,
|
outPrefix: String,
|
||||||
blocker: Blocker,
|
|
||||||
chunkSize: Int,
|
chunkSize: Int,
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
)(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] = {
|
)(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] = {
|
||||||
@ -106,9 +103,9 @@ private[extern] object ExternConv {
|
|||||||
case true =>
|
case true =>
|
||||||
val outTxt = out.resolveSibling(s"$outPrefix.txt")
|
val outTxt = out.resolveSibling(s"$outPrefix.txt")
|
||||||
File.exists(outTxt).flatMap { txtExists =>
|
File.exists(outTxt).flatMap { txtExists =>
|
||||||
val pdfData = File.readAll(out, blocker, chunkSize)
|
val pdfData = File.readAll(out, chunkSize)
|
||||||
if (result.rc == 0)
|
if (result.rc == 0)
|
||||||
if (txtExists) successPdfTxt(pdfData, File.readText(outTxt, blocker)).pure[F]
|
if (txtExists) successPdfTxt(pdfData, File.readText(outTxt)).pure[F]
|
||||||
else successPdf(pdfData).pure[F]
|
else successPdf(pdfData).pure[F]
|
||||||
else
|
else
|
||||||
logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
|
logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
|
||||||
@ -124,9 +121,8 @@ private[extern] object ExternConv {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private def storeDataToFile[F[_]: Sync: ContextShift](
|
private def storeDataToFile[F[_]: Async](
|
||||||
name: String,
|
name: String,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
inFile: Path
|
inFile: Path
|
||||||
): Pipe[F, Byte, Unit] =
|
): Pipe[F, Byte, Unit] =
|
||||||
@ -134,7 +130,7 @@ private[extern] object ExternConv {
|
|||||||
Stream
|
Stream
|
||||||
.eval(logger.debug(s"Storing input to file ${inFile} for running $name"))
|
.eval(logger.debug(s"Storing input to file ${inFile} for running $name"))
|
||||||
.drain ++
|
.drain ++
|
||||||
Stream.eval(storeFile(in, inFile, blocker))
|
Stream.eval(storeFile(in, inFile))
|
||||||
|
|
||||||
private def logResult[F[_]: Sync](
|
private def logResult[F[_]: Sync](
|
||||||
name: String,
|
name: String,
|
||||||
@ -144,10 +140,9 @@ private[extern] object ExternConv {
|
|||||||
logger.debug(s"$name stdout: ${result.stdout}") *>
|
logger.debug(s"$name stdout: ${result.stdout}") *>
|
||||||
logger.debug(s"$name stderr: ${result.stderr}")
|
logger.debug(s"$name stderr: ${result.stderr}")
|
||||||
|
|
||||||
private def storeFile[F[_]: Sync: ContextShift](
|
private def storeFile[F[_]: Async](
|
||||||
in: Stream[F, Byte],
|
in: Stream[F, Byte],
|
||||||
target: Path,
|
target: Path
|
||||||
blocker: Blocker
|
|
||||||
): F[Unit] =
|
): F[Unit] =
|
||||||
in.through(fs2.io.file.writeAll(target, blocker)).compile.drain
|
in.through(Files[F].writeAll(target)).compile.drain
|
||||||
}
|
}
|
||||||
|
@ -11,23 +11,21 @@ import docspell.convert.ConversionResult.Handler
|
|||||||
|
|
||||||
object OcrMyPdf {
|
object OcrMyPdf {
|
||||||
|
|
||||||
def toPDF[F[_]: Sync: ContextShift, A](
|
def toPDF[F[_]: Async, A](
|
||||||
cfg: OcrMyPdfConfig,
|
cfg: OcrMyPdfConfig,
|
||||||
lang: Language,
|
lang: Language,
|
||||||
chunkSize: Int,
|
chunkSize: Int,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] =
|
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] =
|
||||||
if (cfg.enabled) {
|
if (cfg.enabled) {
|
||||||
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
||||||
ExternConv.readResult[F](blocker, chunkSize, logger)
|
ExternConv.readResult[F](chunkSize, logger)
|
||||||
|
|
||||||
ExternConv.toPDF[F, A](
|
ExternConv.toPDF[F, A](
|
||||||
"ocrmypdf",
|
"ocrmypdf",
|
||||||
cfg.command.replace(Map("{{lang}}" -> lang.iso3)),
|
cfg.command.replace(Map("{{lang}}" -> lang.iso3)),
|
||||||
cfg.workingDir,
|
cfg.workingDir,
|
||||||
false,
|
false,
|
||||||
blocker,
|
|
||||||
logger,
|
logger,
|
||||||
reader
|
reader
|
||||||
)(in, handler)
|
)(in, handler)
|
||||||
|
@ -11,23 +11,21 @@ import docspell.convert.ConversionResult.Handler
|
|||||||
|
|
||||||
object Tesseract {
|
object Tesseract {
|
||||||
|
|
||||||
def toPDF[F[_]: Sync: ContextShift, A](
|
def toPDF[F[_]: Async, A](
|
||||||
cfg: TesseractConfig,
|
cfg: TesseractConfig,
|
||||||
lang: Language,
|
lang: Language,
|
||||||
chunkSize: Int,
|
chunkSize: Int,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
|
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
|
||||||
val outBase = cfg.command.args.tail.headOption.getOrElse("out")
|
val outBase = cfg.command.args.tail.headOption.getOrElse("out")
|
||||||
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
||||||
ExternConv.readResultTesseract[F](outBase, blocker, chunkSize, logger)
|
ExternConv.readResultTesseract[F](outBase, chunkSize, logger)
|
||||||
|
|
||||||
ExternConv.toPDF[F, A](
|
ExternConv.toPDF[F, A](
|
||||||
"tesseract",
|
"tesseract",
|
||||||
cfg.command.replace(Map("{{lang}}" -> lang.iso3)),
|
cfg.command.replace(Map("{{lang}}" -> lang.iso3)),
|
||||||
cfg.workingDir,
|
cfg.workingDir,
|
||||||
false,
|
false,
|
||||||
blocker,
|
|
||||||
logger,
|
logger,
|
||||||
reader
|
reader
|
||||||
)(in, handler)
|
)(in, handler)
|
||||||
|
@ -11,21 +11,19 @@ import docspell.convert.ConversionResult.Handler
|
|||||||
|
|
||||||
object Unoconv {
|
object Unoconv {
|
||||||
|
|
||||||
def toPDF[F[_]: Sync: ContextShift, A](
|
def toPDF[F[_]: Async, A](
|
||||||
cfg: UnoconvConfig,
|
cfg: UnoconvConfig,
|
||||||
chunkSize: Int,
|
chunkSize: Int,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
|
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
|
||||||
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
||||||
ExternConv.readResult[F](blocker, chunkSize, logger)
|
ExternConv.readResult[F](chunkSize, logger)
|
||||||
|
|
||||||
ExternConv.toPDF[F, A](
|
ExternConv.toPDF[F, A](
|
||||||
"unoconv",
|
"unoconv",
|
||||||
cfg.command,
|
cfg.command,
|
||||||
cfg.workingDir,
|
cfg.workingDir,
|
||||||
false,
|
false,
|
||||||
blocker,
|
|
||||||
logger,
|
logger,
|
||||||
reader
|
reader
|
||||||
)(
|
)(
|
||||||
|
@ -13,16 +13,15 @@ import docspell.convert.{ConversionResult, SanitizeHtml}
|
|||||||
|
|
||||||
object WkHtmlPdf {
|
object WkHtmlPdf {
|
||||||
|
|
||||||
def toPDF[F[_]: Sync: ContextShift, A](
|
def toPDF[F[_]: Async, A](
|
||||||
cfg: WkHtmlPdfConfig,
|
cfg: WkHtmlPdfConfig,
|
||||||
chunkSize: Int,
|
chunkSize: Int,
|
||||||
charset: Charset,
|
charset: Charset,
|
||||||
sanitizeHtml: SanitizeHtml,
|
sanitizeHtml: SanitizeHtml,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
|
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
|
||||||
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
||||||
ExternConv.readResult[F](blocker, chunkSize, logger)
|
ExternConv.readResult[F](chunkSize, logger)
|
||||||
|
|
||||||
val cmdCfg = cfg.command.replace(Map("{{encoding}}" -> charset.name()))
|
val cmdCfg = cfg.command.replace(Map("{{encoding}}" -> charset.name()))
|
||||||
|
|
||||||
@ -40,7 +39,7 @@ object WkHtmlPdf {
|
|||||||
)
|
)
|
||||||
|
|
||||||
ExternConv
|
ExternConv
|
||||||
.toPDF[F, A]("wkhtmltopdf", cmdCfg, cfg.workingDir, true, blocker, logger, reader)(
|
.toPDF[F, A]("wkhtmltopdf", cmdCfg, cfg.workingDir, true, logger, reader)(
|
||||||
inSane,
|
inSane,
|
||||||
handler
|
handler
|
||||||
)
|
)
|
||||||
|
@ -4,6 +4,7 @@ import java.nio.file.Paths
|
|||||||
|
|
||||||
import cats.data.Kleisli
|
import cats.data.Kleisli
|
||||||
import cats.effect.IO
|
import cats.effect.IO
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
|
||||||
@ -12,13 +13,11 @@ import docspell.convert.ConversionResult.Handler
|
|||||||
import docspell.convert.extern.OcrMyPdfConfig
|
import docspell.convert.extern.OcrMyPdfConfig
|
||||||
import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig}
|
import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig}
|
||||||
import docspell.convert.flexmark.MarkdownConfig
|
import docspell.convert.flexmark.MarkdownConfig
|
||||||
import docspell.files.{ExampleFiles, TestFiles}
|
import docspell.files.ExampleFiles
|
||||||
|
|
||||||
import munit._
|
import munit._
|
||||||
|
|
||||||
class ConversionTest extends FunSuite with FileChecks {
|
class ConversionTest extends FunSuite with FileChecks {
|
||||||
val blocker = TestFiles.blocker
|
|
||||||
implicit val CS = TestFiles.CS
|
|
||||||
|
|
||||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||||
val target = Paths.get("target")
|
val target = Paths.get("target")
|
||||||
@ -73,7 +72,7 @@ class ConversionTest extends FunSuite with FileChecks {
|
|||||||
)
|
)
|
||||||
|
|
||||||
val conversion =
|
val conversion =
|
||||||
Conversion.create[IO](convertConfig, SanitizeHtml.none, blocker, logger)
|
Conversion.create[IO](convertConfig, SanitizeHtml.none, logger)
|
||||||
|
|
||||||
val bombs = List(
|
val bombs = List(
|
||||||
ExampleFiles.bombs_20K_gray_jpeg,
|
ExampleFiles.bombs_20K_gray_jpeg,
|
||||||
@ -167,7 +166,7 @@ class ConversionTest extends FunSuite with FileChecks {
|
|||||||
.covary[IO]
|
.covary[IO]
|
||||||
.zipWithIndex
|
.zipWithIndex
|
||||||
.evalMap({ case (uri, index) =>
|
.evalMap({ case (uri, index) =>
|
||||||
val load = uri.readURL[IO](8192, blocker)
|
val load = uri.readURL[IO](8192)
|
||||||
val dataType = DataType.filename(uri.path.segments.last)
|
val dataType = DataType.filename(uri.path.segments.last)
|
||||||
logger.info(s"Processing file ${uri.path.asString}") *>
|
logger.info(s"Processing file ${uri.path.asString}") *>
|
||||||
conv.toPDF(dataType, Language.German, handler(index))(load)
|
conv.toPDF(dataType, Language.German, handler(index))(load)
|
||||||
|
@ -5,6 +5,7 @@ import java.nio.file.{Files, Path}
|
|||||||
|
|
||||||
import cats.data.Kleisli
|
import cats.data.Kleisli
|
||||||
import cats.effect.IO
|
import cats.effect.IO
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
import fs2.{Pipe, Stream}
|
import fs2.{Pipe, Stream}
|
||||||
|
|
||||||
import docspell.common.MimeType
|
import docspell.common.MimeType
|
||||||
|
@ -4,19 +4,18 @@ import java.nio.charset.StandardCharsets
|
|||||||
import java.nio.file.{Path, Paths}
|
import java.nio.file.{Path, Paths}
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.convert._
|
import docspell.convert._
|
||||||
import docspell.files.{ExampleFiles, TestFiles}
|
import docspell.files.ExampleFiles
|
||||||
|
|
||||||
import munit._
|
import munit._
|
||||||
|
|
||||||
class ExternConvTest extends FunSuite with FileChecks {
|
class ExternConvTest extends FunSuite with FileChecks {
|
||||||
val blocker = TestFiles.blocker
|
val utf8 = StandardCharsets.UTF_8
|
||||||
implicit val CS = TestFiles.CS
|
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||||
val utf8 = StandardCharsets.UTF_8
|
val target = Paths.get("target")
|
||||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
|
||||||
val target = Paths.get("target")
|
|
||||||
|
|
||||||
test("convert html to pdf") {
|
test("convert html to pdf") {
|
||||||
val cfg = SystemCommand.Config(
|
val cfg = SystemCommand.Config(
|
||||||
@ -32,8 +31,8 @@ class ExternConvTest extends FunSuite with FileChecks {
|
|||||||
val wkCfg = WkHtmlPdfConfig(cfg, target)
|
val wkCfg = WkHtmlPdfConfig(cfg, target)
|
||||||
val p =
|
val p =
|
||||||
WkHtmlPdf
|
WkHtmlPdf
|
||||||
.toPDF[IO, Path](wkCfg, 8192, utf8, SanitizeHtml.none, blocker, logger)(
|
.toPDF[IO, Path](wkCfg, 8192, utf8, SanitizeHtml.none, logger)(
|
||||||
ExampleFiles.letter_de_html.readURL[IO](8192, blocker),
|
ExampleFiles.letter_de_html.readURL[IO](8192),
|
||||||
storePdfHandler(dir.resolve("test.pdf"))
|
storePdfHandler(dir.resolve("test.pdf"))
|
||||||
)
|
)
|
||||||
.unsafeRunSync()
|
.unsafeRunSync()
|
||||||
@ -59,8 +58,8 @@ class ExternConvTest extends FunSuite with FileChecks {
|
|||||||
val ucCfg = UnoconvConfig(cfg, target)
|
val ucCfg = UnoconvConfig(cfg, target)
|
||||||
val p =
|
val p =
|
||||||
Unoconv
|
Unoconv
|
||||||
.toPDF[IO, Path](ucCfg, 8192, blocker, logger)(
|
.toPDF[IO, Path](ucCfg, 8192, logger)(
|
||||||
ExampleFiles.examples_sample_docx.readURL[IO](8192, blocker),
|
ExampleFiles.examples_sample_docx.readURL[IO](8192),
|
||||||
storePdfHandler(dir.resolve("test.pdf"))
|
storePdfHandler(dir.resolve("test.pdf"))
|
||||||
)
|
)
|
||||||
.unsafeRunSync()
|
.unsafeRunSync()
|
||||||
@ -85,8 +84,8 @@ class ExternConvTest extends FunSuite with FileChecks {
|
|||||||
val tessCfg = TesseractConfig(cfg, target)
|
val tessCfg = TesseractConfig(cfg, target)
|
||||||
val (pdf, txt) =
|
val (pdf, txt) =
|
||||||
Tesseract
|
Tesseract
|
||||||
.toPDF[IO, (Path, Path)](tessCfg, Language.German, 8192, blocker, logger)(
|
.toPDF[IO, (Path, Path)](tessCfg, Language.German, 8192, logger)(
|
||||||
ExampleFiles.camera_letter_en_jpg.readURL[IO](8192, blocker),
|
ExampleFiles.camera_letter_en_jpg.readURL[IO](8192),
|
||||||
storePdfTxtHandler(dir.resolve("test.pdf"), dir.resolve("test.txt"))
|
storePdfTxtHandler(dir.resolve("test.pdf"), dir.resolve("test.txt"))
|
||||||
)
|
)
|
||||||
.unsafeRunSync()
|
.unsafeRunSync()
|
||||||
|
@ -25,8 +25,7 @@ trait Extraction[F[_]] {
|
|||||||
|
|
||||||
object Extraction {
|
object Extraction {
|
||||||
|
|
||||||
def create[F[_]: Sync: ContextShift](
|
def create[F[_]: Async](
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
cfg: ExtractConfig
|
cfg: ExtractConfig
|
||||||
): Extraction[F] =
|
): Extraction[F] =
|
||||||
@ -39,7 +38,7 @@ object Extraction {
|
|||||||
TikaMimetype.resolve(dataType, data).flatMap {
|
TikaMimetype.resolve(dataType, data).flatMap {
|
||||||
case MimeType.PdfMatch(_) =>
|
case MimeType.PdfMatch(_) =>
|
||||||
PdfExtract
|
PdfExtract
|
||||||
.get(data, blocker, lang, cfg.pdf.minTextLen, cfg.ocr, logger)
|
.get(data, lang, cfg.pdf.minTextLen, cfg.ocr, logger)
|
||||||
.map(ExtractResult.fromEitherResult)
|
.map(ExtractResult.fromEitherResult)
|
||||||
|
|
||||||
case PoiType(mt) =>
|
case PoiType(mt) =>
|
||||||
@ -59,7 +58,7 @@ object Extraction {
|
|||||||
|
|
||||||
case OcrType(mt) =>
|
case OcrType(mt) =>
|
||||||
val doExtract = TextExtract
|
val doExtract = TextExtract
|
||||||
.extractOCR(data, blocker, logger, lang.iso3, cfg.ocr)
|
.extractOCR(data, logger, lang.iso3, cfg.ocr)
|
||||||
.compile
|
.compile
|
||||||
.lastOrError
|
.lastOrError
|
||||||
.map(_.value)
|
.map(_.value)
|
||||||
|
@ -17,9 +17,8 @@ object PdfExtract {
|
|||||||
Result(t._1, t._2)
|
Result(t._1, t._2)
|
||||||
}
|
}
|
||||||
|
|
||||||
def get[F[_]: Sync: ContextShift](
|
def get[F[_]: Async](
|
||||||
in: Stream[F, Byte],
|
in: Stream[F, Byte],
|
||||||
blocker: Blocker,
|
|
||||||
lang: Language,
|
lang: Language,
|
||||||
stripMinLen: Int,
|
stripMinLen: Int,
|
||||||
ocrCfg: OcrConfig,
|
ocrCfg: OcrConfig,
|
||||||
@ -27,7 +26,7 @@ object PdfExtract {
|
|||||||
): F[Either[Throwable, Result]] = {
|
): F[Either[Throwable, Result]] = {
|
||||||
|
|
||||||
val runOcr =
|
val runOcr =
|
||||||
TextExtract.extractOCR(in, blocker, logger, lang.iso3, ocrCfg).compile.lastOrError
|
TextExtract.extractOCR(in, logger, lang.iso3, ocrCfg).compile.lastOrError
|
||||||
|
|
||||||
def chooseResult(ocrStr: Text, strippedRes: (Text, Option[PdfMetaData])) =
|
def chooseResult(ocrStr: Text, strippedRes: (Text, Option[PdfMetaData])) =
|
||||||
if (ocrStr.length > strippedRes._1.length)
|
if (ocrStr.length > strippedRes._1.length)
|
||||||
|
@ -2,7 +2,7 @@ package docspell.extract.ocr
|
|||||||
|
|
||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
|
|
||||||
import cats.effect.{Blocker, ContextShift, Sync}
|
import cats.effect._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
@ -11,16 +11,15 @@ object Ocr {
|
|||||||
|
|
||||||
/** Extract the text of all pages in the given pdf file.
|
/** Extract the text of all pages in the given pdf file.
|
||||||
*/
|
*/
|
||||||
def extractPdf[F[_]: Sync: ContextShift](
|
def extractPdf[F[_]: Async](
|
||||||
pdf: Stream[F, Byte],
|
pdf: Stream[F, Byte],
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
lang: String,
|
lang: String,
|
||||||
config: OcrConfig
|
config: OcrConfig
|
||||||
): F[Option[String]] =
|
): F[Option[String]] =
|
||||||
File.withTempDir(config.ghostscript.workingDir, "extractpdf").use { wd =>
|
File.withTempDir(config.ghostscript.workingDir, "extractpdf").use { wd =>
|
||||||
runGhostscript(pdf, config, wd, blocker, logger)
|
runGhostscript(pdf, config, wd, logger)
|
||||||
.flatMap(tmpImg => runTesseractFile(tmpImg, blocker, logger, lang, config))
|
.flatMap(tmpImg => runTesseractFile(tmpImg, logger, lang, config))
|
||||||
.fold1(_ + "\n\n\n" + _)
|
.fold1(_ + "\n\n\n" + _)
|
||||||
.compile
|
.compile
|
||||||
.last
|
.last
|
||||||
@ -28,47 +27,43 @@ object Ocr {
|
|||||||
|
|
||||||
/** Extract the text from the given image file
|
/** Extract the text from the given image file
|
||||||
*/
|
*/
|
||||||
def extractImage[F[_]: Sync: ContextShift](
|
def extractImage[F[_]: Async](
|
||||||
img: Stream[F, Byte],
|
img: Stream[F, Byte],
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
lang: String,
|
lang: String,
|
||||||
config: OcrConfig
|
config: OcrConfig
|
||||||
): Stream[F, String] =
|
): Stream[F, String] =
|
||||||
runTesseractStdin(img, blocker, logger, lang, config)
|
runTesseractStdin(img, logger, lang, config)
|
||||||
|
|
||||||
def extractPdFFile[F[_]: Sync: ContextShift](
|
def extractPdFFile[F[_]: Async](
|
||||||
pdf: Path,
|
pdf: Path,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
lang: String,
|
lang: String,
|
||||||
config: OcrConfig
|
config: OcrConfig
|
||||||
): F[Option[String]] =
|
): F[Option[String]] =
|
||||||
File.withTempDir(config.ghostscript.workingDir, "extractpdf").use { wd =>
|
File.withTempDir(config.ghostscript.workingDir, "extractpdf").use { wd =>
|
||||||
runGhostscriptFile(pdf, config.ghostscript.command, wd, blocker, logger)
|
runGhostscriptFile(pdf, config.ghostscript.command, wd, logger)
|
||||||
.flatMap(tif => runTesseractFile(tif, blocker, logger, lang, config))
|
.flatMap(tif => runTesseractFile(tif, logger, lang, config))
|
||||||
.fold1(_ + "\n\n\n" + _)
|
.fold1(_ + "\n\n\n" + _)
|
||||||
.compile
|
.compile
|
||||||
.last
|
.last
|
||||||
}
|
}
|
||||||
|
|
||||||
def extractImageFile[F[_]: Sync: ContextShift](
|
def extractImageFile[F[_]: Async](
|
||||||
img: Path,
|
img: Path,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
lang: String,
|
lang: String,
|
||||||
config: OcrConfig
|
config: OcrConfig
|
||||||
): Stream[F, String] =
|
): Stream[F, String] =
|
||||||
runTesseractFile(img, blocker, logger, lang, config)
|
runTesseractFile(img, logger, lang, config)
|
||||||
|
|
||||||
/** Run ghostscript to extract all pdf pages into tiff files. The
|
/** Run ghostscript to extract all pdf pages into tiff files. The
|
||||||
* files are stored to a temporary location on disk and returned.
|
* files are stored to a temporary location on disk and returned.
|
||||||
*/
|
*/
|
||||||
private[extract] def runGhostscript[F[_]: Sync: ContextShift](
|
private[extract] def runGhostscript[F[_]: Async](
|
||||||
pdf: Stream[F, Byte],
|
pdf: Stream[F, Byte],
|
||||||
cfg: OcrConfig,
|
cfg: OcrConfig,
|
||||||
wd: Path,
|
wd: Path,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
): Stream[F, Path] = {
|
): Stream[F, Path] = {
|
||||||
val xargs =
|
val xargs =
|
||||||
@ -84,19 +79,18 @@ object Ocr {
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
SystemCommand
|
SystemCommand
|
||||||
.execSuccess(cmd, blocker, logger, wd = Some(wd), stdin = pdf)
|
.execSuccess(cmd, logger, wd = Some(wd), stdin = pdf)
|
||||||
.evalMap(_ => File.listFiles(pathEndsWith(".tif"), wd))
|
.evalMap(_ => File.listJFiles(pathEndsWith(".tif"), wd))
|
||||||
.flatMap(fs => Stream.emits(fs))
|
.flatMap(fs => Stream.emits(fs))
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Run ghostscript to extract all pdf pages into tiff files. The
|
/** Run ghostscript to extract all pdf pages into tiff files. The
|
||||||
* files are stored to a temporary location on disk and returned.
|
* files are stored to a temporary location on disk and returned.
|
||||||
*/
|
*/
|
||||||
private[extract] def runGhostscriptFile[F[_]: Sync: ContextShift](
|
private[extract] def runGhostscriptFile[F[_]: Async](
|
||||||
pdf: Path,
|
pdf: Path,
|
||||||
ghostscript: SystemCommand.Config,
|
ghostscript: SystemCommand.Config,
|
||||||
wd: Path,
|
wd: Path,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
): Stream[F, Path] = {
|
): Stream[F, Path] = {
|
||||||
val cmd = ghostscript.replace(
|
val cmd = ghostscript.replace(
|
||||||
@ -106,8 +100,8 @@ object Ocr {
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
SystemCommand
|
SystemCommand
|
||||||
.execSuccess[F](cmd, blocker, logger, wd = Some(wd))
|
.execSuccess[F](cmd, logger, wd = Some(wd))
|
||||||
.evalMap(_ => File.listFiles(pathEndsWith(".tif"), wd))
|
.evalMap(_ => File.listJFiles(pathEndsWith(".tif"), wd))
|
||||||
.flatMap(fs => Stream.emits(fs))
|
.flatMap(fs => Stream.emits(fs))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,11 +111,10 @@ object Ocr {
|
|||||||
/** Run unpaper to optimize the image for ocr. The
|
/** Run unpaper to optimize the image for ocr. The
|
||||||
* files are stored to a temporary location on disk and returned.
|
* files are stored to a temporary location on disk and returned.
|
||||||
*/
|
*/
|
||||||
private[extract] def runUnpaperFile[F[_]: Sync: ContextShift](
|
private[extract] def runUnpaperFile[F[_]: Async](
|
||||||
img: Path,
|
img: Path,
|
||||||
unpaper: SystemCommand.Config,
|
unpaper: SystemCommand.Config,
|
||||||
wd: Path,
|
wd: Path,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
): Stream[F, Path] = {
|
): Stream[F, Path] = {
|
||||||
val targetFile = img.resolveSibling("u-" + img.getFileName.toString).toAbsolutePath
|
val targetFile = img.resolveSibling("u-" + img.getFileName.toString).toAbsolutePath
|
||||||
@ -132,7 +125,7 @@ object Ocr {
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
SystemCommand
|
SystemCommand
|
||||||
.execSuccess[F](cmd, blocker, logger, wd = Some(wd))
|
.execSuccess[F](cmd, logger, wd = Some(wd))
|
||||||
.map(_ => targetFile)
|
.map(_ => targetFile)
|
||||||
.handleErrorWith { th =>
|
.handleErrorWith { th =>
|
||||||
logger
|
logger
|
||||||
@ -146,39 +139,36 @@ object Ocr {
|
|||||||
/** Run tesseract on the given image file and return the extracted
|
/** Run tesseract on the given image file and return the extracted
|
||||||
* text.
|
* text.
|
||||||
*/
|
*/
|
||||||
private[extract] def runTesseractFile[F[_]: Sync: ContextShift](
|
private[extract] def runTesseractFile[F[_]: Async](
|
||||||
img: Path,
|
img: Path,
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
lang: String,
|
lang: String,
|
||||||
config: OcrConfig
|
config: OcrConfig
|
||||||
): Stream[F, String] =
|
): Stream[F, String] =
|
||||||
// tesseract cannot cope with absolute filenames
|
// tesseract cannot cope with absolute filenames
|
||||||
// so use the parent as working dir
|
// so use the parent as working dir
|
||||||
runUnpaperFile(img, config.unpaper.command, img.getParent, blocker, logger).flatMap {
|
runUnpaperFile(img, config.unpaper.command, img.getParent, logger).flatMap { uimg =>
|
||||||
uimg =>
|
val cmd = config.tesseract.command
|
||||||
val cmd = config.tesseract.command
|
.replace(
|
||||||
.replace(
|
Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang))
|
||||||
Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang))
|
)
|
||||||
)
|
SystemCommand
|
||||||
SystemCommand
|
.execSuccess[F](cmd, logger, wd = Some(uimg.getParent))
|
||||||
.execSuccess[F](cmd, blocker, logger, wd = Some(uimg.getParent))
|
.map(_.stdout)
|
||||||
.map(_.stdout)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Run tesseract on the given image file and return the extracted
|
/** Run tesseract on the given image file and return the extracted
|
||||||
* text.
|
* text.
|
||||||
*/
|
*/
|
||||||
private[extract] def runTesseractStdin[F[_]: Sync: ContextShift](
|
private[extract] def runTesseractStdin[F[_]: Async](
|
||||||
img: Stream[F, Byte],
|
img: Stream[F, Byte],
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
lang: String,
|
lang: String,
|
||||||
config: OcrConfig
|
config: OcrConfig
|
||||||
): Stream[F, String] = {
|
): Stream[F, String] = {
|
||||||
val cmd = config.tesseract.command
|
val cmd = config.tesseract.command
|
||||||
.replace(Map("{{file}}" -> "stdin", "{{lang}}" -> fixLanguage(lang)))
|
.replace(Map("{{file}}" -> "stdin", "{{lang}}" -> fixLanguage(lang)))
|
||||||
SystemCommand.execSuccess(cmd, blocker, logger, stdin = img).map(_.stdout)
|
SystemCommand.execSuccess(cmd, logger, stdin = img).map(_.stdout)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def fixLanguage(lang: String): String =
|
private def fixLanguage(lang: String): String =
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
package docspell.extract.ocr
|
package docspell.extract.ocr
|
||||||
|
|
||||||
import cats.effect.{Blocker, ContextShift, Sync}
|
import cats.effect._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
@ -9,18 +9,16 @@ import docspell.files._
|
|||||||
|
|
||||||
object TextExtract {
|
object TextExtract {
|
||||||
|
|
||||||
def extract[F[_]: Sync: ContextShift](
|
def extract[F[_]: Async](
|
||||||
in: Stream[F, Byte],
|
in: Stream[F, Byte],
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
lang: String,
|
lang: String,
|
||||||
config: OcrConfig
|
config: OcrConfig
|
||||||
): Stream[F, Text] =
|
): Stream[F, Text] =
|
||||||
extractOCR(in, blocker, logger, lang, config)
|
extractOCR(in, logger, lang, config)
|
||||||
|
|
||||||
def extractOCR[F[_]: Sync: ContextShift](
|
def extractOCR[F[_]: Async](
|
||||||
in: Stream[F, Byte],
|
in: Stream[F, Byte],
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
lang: String,
|
lang: String,
|
||||||
config: OcrConfig
|
config: OcrConfig
|
||||||
@ -29,10 +27,10 @@ object TextExtract {
|
|||||||
.eval(TikaMimetype.detect(in, MimeTypeHint.none))
|
.eval(TikaMimetype.detect(in, MimeTypeHint.none))
|
||||||
.flatMap({
|
.flatMap({
|
||||||
case MimeType.pdf =>
|
case MimeType.pdf =>
|
||||||
Stream.eval(Ocr.extractPdf(in, blocker, logger, lang, config)).unNoneTerminate
|
Stream.eval(Ocr.extractPdf(in, logger, lang, config)).unNoneTerminate
|
||||||
|
|
||||||
case mt if mt.primary == "image" =>
|
case mt if mt.primary == "image" =>
|
||||||
Ocr.extractImage(in, blocker, logger, lang, config)
|
Ocr.extractImage(in, logger, lang, config)
|
||||||
|
|
||||||
case mt =>
|
case mt =>
|
||||||
raiseError(s"File `$mt` not supported")
|
raiseError(s"File `$mt` not supported")
|
||||||
|
@ -12,6 +12,7 @@ import fs2.Stream
|
|||||||
import org.apache.commons.io.output.ByteArrayOutputStream
|
import org.apache.commons.io.output.ByteArrayOutputStream
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument
|
import org.apache.pdfbox.pdmodel.PDDocument
|
||||||
import org.apache.pdfbox.rendering.PDFRenderer
|
import org.apache.pdfbox.rendering.PDFRenderer
|
||||||
|
import scodec.bits.ByteVector
|
||||||
|
|
||||||
trait PdfboxPreview[F[_]] {
|
trait PdfboxPreview[F[_]] {
|
||||||
|
|
||||||
@ -50,7 +51,7 @@ object PdfboxPreview {
|
|||||||
private def pngStream[F[_]](img: RenderedImage): Stream[F, Byte] = {
|
private def pngStream[F[_]](img: RenderedImage): Stream[F, Byte] = {
|
||||||
val out = new ByteArrayOutputStream()
|
val out = new ByteArrayOutputStream()
|
||||||
ImageIO.write(img, "PNG", out)
|
ImageIO.write(img, "PNG", out)
|
||||||
Stream.chunk(Chunk.bytes(out.toByteArray()))
|
Stream.chunk(Chunk.byteVector(ByteVector.view(out.toByteArray())))
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package docspell.extract.ocr
|
package docspell.extract.ocr
|
||||||
|
|
||||||
import cats.effect.IO
|
import cats.effect.IO
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
|
|
||||||
import docspell.common.Logger
|
import docspell.common.Logger
|
||||||
import docspell.files.TestFiles
|
import docspell.files.TestFiles
|
||||||
@ -14,7 +15,7 @@ class TextExtractionSuite extends FunSuite {
|
|||||||
|
|
||||||
test("extract english pdf".ignore) {
|
test("extract english pdf".ignore) {
|
||||||
val text = TextExtract
|
val text = TextExtract
|
||||||
.extract[IO](letterSourceEN, blocker, logger, "eng", OcrConfig.default)
|
.extract[IO](letterSourceEN, logger, "eng", OcrConfig.default)
|
||||||
.compile
|
.compile
|
||||||
.lastOrError
|
.lastOrError
|
||||||
.unsafeRunSync()
|
.unsafeRunSync()
|
||||||
@ -24,7 +25,7 @@ class TextExtractionSuite extends FunSuite {
|
|||||||
test("extract german pdf".ignore) {
|
test("extract german pdf".ignore) {
|
||||||
val expect = TestFiles.letterDEText
|
val expect = TestFiles.letterDEText
|
||||||
val extract = TextExtract
|
val extract = TextExtract
|
||||||
.extract[IO](letterSourceDE, blocker, logger, "deu", OcrConfig.default)
|
.extract[IO](letterSourceDE, logger, "deu", OcrConfig.default)
|
||||||
.compile
|
.compile
|
||||||
.lastOrError
|
.lastOrError
|
||||||
.unsafeRunSync()
|
.unsafeRunSync()
|
||||||
|
@ -1,14 +1,13 @@
|
|||||||
package docspell.extract.odf
|
package docspell.extract.odf
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
|
|
||||||
import docspell.files.{ExampleFiles, TestFiles}
|
import docspell.files.ExampleFiles
|
||||||
|
|
||||||
import munit._
|
import munit._
|
||||||
|
|
||||||
class OdfExtractTest extends FunSuite {
|
class OdfExtractTest extends FunSuite {
|
||||||
val blocker = TestFiles.blocker
|
|
||||||
implicit val CS = TestFiles.CS
|
|
||||||
|
|
||||||
val files = List(
|
val files = List(
|
||||||
ExampleFiles.examples_sample_odt -> 6372,
|
ExampleFiles.examples_sample_odt -> 6372,
|
||||||
@ -21,7 +20,7 @@ class OdfExtractTest extends FunSuite {
|
|||||||
val str1 = OdfExtract.get(is).fold(throw _, identity)
|
val str1 = OdfExtract.get(is).fold(throw _, identity)
|
||||||
assertEquals(str1.length, len)
|
assertEquals(str1.length, len)
|
||||||
|
|
||||||
val data = file.readURL[IO](8192, blocker)
|
val data = file.readURL[IO](8192)
|
||||||
val str2 = OdfExtract.get[IO](data).unsafeRunSync().fold(throw _, identity)
|
val str2 = OdfExtract.get[IO](data).unsafeRunSync().fold(throw _, identity)
|
||||||
assertEquals(str2, str1)
|
assertEquals(str2, str1)
|
||||||
}
|
}
|
||||||
|
@ -1,14 +1,13 @@
|
|||||||
package docspell.extract.pdfbox
|
package docspell.extract.pdfbox
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
|
|
||||||
import docspell.files.{ExampleFiles, TestFiles}
|
import docspell.files.{ExampleFiles, TestFiles}
|
||||||
|
|
||||||
import munit._
|
import munit._
|
||||||
|
|
||||||
class PdfboxExtractTest extends FunSuite {
|
class PdfboxExtractTest extends FunSuite {
|
||||||
val blocker = TestFiles.blocker
|
|
||||||
implicit val CS = TestFiles.CS
|
|
||||||
|
|
||||||
val textPDFs = List(
|
val textPDFs = List(
|
||||||
ExampleFiles.letter_de_pdf -> TestFiles.letterDEText,
|
ExampleFiles.letter_de_pdf -> TestFiles.letterDEText,
|
||||||
@ -27,7 +26,7 @@ class PdfboxExtractTest extends FunSuite {
|
|||||||
|
|
||||||
test("extract text from text PDFs via Stream") {
|
test("extract text from text PDFs via Stream") {
|
||||||
textPDFs.foreach { case (file, txt) =>
|
textPDFs.foreach { case (file, txt) =>
|
||||||
val data = file.readURL[IO](8192, blocker)
|
val data = file.readURL[IO](8192)
|
||||||
val str = PdfboxExtract.getText(data).unsafeRunSync().fold(throw _, identity)
|
val str = PdfboxExtract.getText(data).unsafeRunSync().fold(throw _, identity)
|
||||||
val received = removeFormatting(str.value)
|
val received = removeFormatting(str.value)
|
||||||
val expect = removeFormatting(txt)
|
val expect = removeFormatting(txt)
|
||||||
|
@ -3,15 +3,15 @@ package docspell.extract.pdfbox
|
|||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
import fs2.io.file.Files
|
||||||
|
|
||||||
import docspell.files.{ExampleFiles, TestFiles}
|
import docspell.files.ExampleFiles
|
||||||
|
|
||||||
import munit._
|
import munit._
|
||||||
|
|
||||||
class PdfboxPreviewTest extends FunSuite {
|
class PdfboxPreviewTest extends FunSuite {
|
||||||
val blocker = TestFiles.blocker
|
|
||||||
implicit val CS = TestFiles.CS
|
|
||||||
|
|
||||||
val testPDFs = List(
|
val testPDFs = List(
|
||||||
ExampleFiles.letter_de_pdf -> "7d98be75b239816d6c751b3f3c56118ebf1a4632c43baf35a68a662f9d595ab8",
|
ExampleFiles.letter_de_pdf -> "7d98be75b239816d6c751b3f3c56118ebf1a4632c43baf35a68a662f9d595ab8",
|
||||||
@ -21,7 +21,7 @@ class PdfboxPreviewTest extends FunSuite {
|
|||||||
|
|
||||||
test("extract first page image from PDFs".flaky) {
|
test("extract first page image from PDFs".flaky) {
|
||||||
testPDFs.foreach { case (file, checksum) =>
|
testPDFs.foreach { case (file, checksum) =>
|
||||||
val data = file.readURL[IO](8192, blocker)
|
val data = file.readURL[IO](8192)
|
||||||
val sha256out =
|
val sha256out =
|
||||||
Stream
|
Stream
|
||||||
.eval(PdfboxPreview[IO](PreviewConfig(48)))
|
.eval(PdfboxPreview[IO](PreviewConfig(48)))
|
||||||
@ -42,7 +42,7 @@ class PdfboxPreviewTest extends FunSuite {
|
|||||||
def writeToFile(data: Stream[IO, Byte], file: Path): IO[Unit] =
|
def writeToFile(data: Stream[IO, Byte], file: Path): IO[Unit] =
|
||||||
data
|
data
|
||||||
.through(
|
.through(
|
||||||
fs2.io.file.writeAll(file, blocker)
|
Files[IO].writeAll(file)
|
||||||
)
|
)
|
||||||
.compile
|
.compile
|
||||||
.drain
|
.drain
|
||||||
|
@ -1,15 +1,14 @@
|
|||||||
package docspell.extract.poi
|
package docspell.extract.poi
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
|
|
||||||
import docspell.common.MimeTypeHint
|
import docspell.common.MimeTypeHint
|
||||||
import docspell.files.{ExampleFiles, TestFiles}
|
import docspell.files.ExampleFiles
|
||||||
|
|
||||||
import munit._
|
import munit._
|
||||||
|
|
||||||
class PoiExtractTest extends FunSuite {
|
class PoiExtractTest extends FunSuite {
|
||||||
val blocker = TestFiles.blocker
|
|
||||||
implicit val CS = TestFiles.CS
|
|
||||||
|
|
||||||
val officeFiles = List(
|
val officeFiles = List(
|
||||||
ExampleFiles.examples_sample_doc -> 6241,
|
ExampleFiles.examples_sample_doc -> 6241,
|
||||||
@ -21,13 +20,13 @@ class PoiExtractTest extends FunSuite {
|
|||||||
test("extract text from ms office files") {
|
test("extract text from ms office files") {
|
||||||
officeFiles.foreach { case (file, len) =>
|
officeFiles.foreach { case (file, len) =>
|
||||||
val str1 = PoiExtract
|
val str1 = PoiExtract
|
||||||
.get[IO](file.readURL[IO](8192, blocker), MimeTypeHint.none)
|
.get[IO](file.readURL[IO](8192), MimeTypeHint.none)
|
||||||
.unsafeRunSync()
|
.unsafeRunSync()
|
||||||
.fold(throw _, identity)
|
.fold(throw _, identity)
|
||||||
|
|
||||||
val str2 = PoiExtract
|
val str2 = PoiExtract
|
||||||
.get[IO](
|
.get[IO](
|
||||||
file.readURL[IO](8192, blocker),
|
file.readURL[IO](8192),
|
||||||
MimeTypeHint(Some(file.path.segments.last), None)
|
MimeTypeHint(Some(file.path.segments.last), None)
|
||||||
)
|
)
|
||||||
.unsafeRunSync()
|
.unsafeRunSync()
|
||||||
|
@ -13,28 +13,19 @@ import docspell.common.Glob
|
|||||||
|
|
||||||
object Zip {
|
object Zip {
|
||||||
|
|
||||||
def unzipP[F[_]: ConcurrentEffect: ContextShift](
|
def unzipP[F[_]: Async](chunkSize: Int, glob: Glob): Pipe[F, Byte, Binary[F]] =
|
||||||
chunkSize: Int,
|
s => unzip[F](chunkSize, glob)(s)
|
||||||
blocker: Blocker,
|
|
||||||
glob: Glob
|
|
||||||
): Pipe[F, Byte, Binary[F]] =
|
|
||||||
s => unzip[F](chunkSize, blocker, glob)(s)
|
|
||||||
|
|
||||||
def unzip[F[_]: ConcurrentEffect: ContextShift](
|
def unzip[F[_]: Async](chunkSize: Int, glob: Glob)(
|
||||||
chunkSize: Int,
|
|
||||||
blocker: Blocker,
|
|
||||||
glob: Glob
|
|
||||||
)(
|
|
||||||
data: Stream[F, Byte]
|
data: Stream[F, Byte]
|
||||||
): Stream[F, Binary[F]] =
|
): Stream[F, Binary[F]] =
|
||||||
data
|
data
|
||||||
.through(fs2.io.toInputStream[F])
|
.through(fs2.io.toInputStream[F])
|
||||||
.flatMap(in => unzipJava(in, chunkSize, blocker, glob))
|
.flatMap(in => unzipJava(in, chunkSize, glob))
|
||||||
|
|
||||||
def unzipJava[F[_]: Sync: ContextShift](
|
def unzipJava[F[_]: Async](
|
||||||
in: InputStream,
|
in: InputStream,
|
||||||
chunkSize: Int,
|
chunkSize: Int,
|
||||||
blocker: Blocker,
|
|
||||||
glob: Glob
|
glob: Glob
|
||||||
): Stream[F, Binary[F]] = {
|
): Stream[F, Binary[F]] = {
|
||||||
val zin = new ZipInputStream(in)
|
val zin = new ZipInputStream(in)
|
||||||
@ -52,7 +43,7 @@ object Zip {
|
|||||||
.map { ze =>
|
.map { ze =>
|
||||||
val name = Paths.get(ze.getName()).getFileName.toString
|
val name = Paths.get(ze.getName()).getFileName.toString
|
||||||
val data =
|
val data =
|
||||||
fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, blocker, false)
|
fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, false)
|
||||||
Binary(name, data)
|
Binary(name, data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,16 +1,14 @@
|
|||||||
package docspell.files
|
package docspell.files
|
||||||
|
|
||||||
import scala.concurrent.ExecutionContext
|
|
||||||
import scala.util.Using
|
import scala.util.Using
|
||||||
|
|
||||||
import cats.effect.{Blocker, IO}
|
import cats.effect._
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import munit._
|
import munit._
|
||||||
|
|
||||||
class ImageSizeTest extends FunSuite {
|
class ImageSizeTest extends FunSuite {
|
||||||
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
|
|
||||||
implicit val CS = IO.contextShift(ExecutionContext.global)
|
|
||||||
|
|
||||||
//tiff files are not supported on the jdk by default
|
//tiff files are not supported on the jdk by default
|
||||||
//requires an external library
|
//requires an external library
|
||||||
@ -37,7 +35,7 @@ class ImageSizeTest extends FunSuite {
|
|||||||
|
|
||||||
test("get sizes from stream") {
|
test("get sizes from stream") {
|
||||||
files.foreach { case (uri, expect) =>
|
files.foreach { case (uri, expect) =>
|
||||||
val stream = uri.readURL[IO](8192, blocker)
|
val stream = uri.readURL[IO](8192)
|
||||||
val dim = ImageSize.get(stream).unsafeRunSync()
|
val dim = ImageSize.get(stream).unsafeRunSync()
|
||||||
assertEquals(dim, expect.some)
|
assertEquals(dim, expect.some)
|
||||||
}
|
}
|
||||||
|
@ -1,19 +1,17 @@
|
|||||||
package docspell.files
|
package docspell.files
|
||||||
|
|
||||||
import scala.concurrent.ExecutionContext
|
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
|
|
||||||
import docspell.common.MimeTypeHint
|
import docspell.common.MimeTypeHint
|
||||||
|
|
||||||
object Playing extends IOApp {
|
object Playing extends IOApp {
|
||||||
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
|
|
||||||
|
|
||||||
def run(args: List[String]): IO[ExitCode] =
|
def run(args: List[String]): IO[ExitCode] =
|
||||||
IO {
|
IO {
|
||||||
//val ods = ExampleFiles.examples_sample_ods.readURL[IO](8192, blocker)
|
//val ods = ExampleFiles.examples_sample_ods.readURL[IO](8192, blocker)
|
||||||
//val odt = ExampleFiles.examples_sample_odt.readURL[IO](8192, blocker)
|
//val odt = ExampleFiles.examples_sample_odt.readURL[IO](8192, blocker)
|
||||||
val rtf = ExampleFiles.examples_sample_rtf.readURL[IO](8192, blocker)
|
val rtf = ExampleFiles.examples_sample_rtf.readURL[IO](8192)
|
||||||
|
|
||||||
val x = for {
|
val x = for {
|
||||||
odsm1 <-
|
odsm1 <-
|
||||||
|
@ -1,29 +1,26 @@
|
|||||||
package docspell.files
|
package docspell.files
|
||||||
|
|
||||||
import scala.concurrent.ExecutionContext
|
import cats.effect._
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
import cats.effect.{Blocker, IO}
|
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
|
||||||
object TestFiles {
|
object TestFiles {
|
||||||
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
|
|
||||||
implicit val CS = IO.contextShift(ExecutionContext.global)
|
|
||||||
|
|
||||||
val letterSourceDE: Stream[IO, Byte] =
|
val letterSourceDE: Stream[IO, Byte] =
|
||||||
ExampleFiles.letter_de_pdf
|
ExampleFiles.letter_de_pdf
|
||||||
.readURL[IO](8 * 1024, blocker)
|
.readURL[IO](8 * 1024)
|
||||||
|
|
||||||
val letterSourceEN: Stream[IO, Byte] =
|
val letterSourceEN: Stream[IO, Byte] =
|
||||||
ExampleFiles.letter_en_pdf
|
ExampleFiles.letter_en_pdf
|
||||||
.readURL[IO](8 * 1024, blocker)
|
.readURL[IO](8 * 1024)
|
||||||
|
|
||||||
lazy val letterDEText =
|
lazy val letterDEText =
|
||||||
ExampleFiles.letter_de_txt
|
ExampleFiles.letter_de_txt
|
||||||
.readText[IO](8 * 1024, blocker)
|
.readText[IO](8 * 1024)
|
||||||
.unsafeRunSync()
|
.unsafeRunSync()
|
||||||
|
|
||||||
lazy val letterENText =
|
lazy val letterENText =
|
||||||
ExampleFiles.letter_en_txt
|
ExampleFiles.letter_en_txt
|
||||||
.readText[IO](8 * 1024, blocker)
|
.readText[IO](8 * 1024)
|
||||||
.unsafeRunSync()
|
.unsafeRunSync()
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
package docspell.files
|
package docspell.files
|
||||||
|
|
||||||
import scala.concurrent.ExecutionContext
|
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
|
import cats.effect.unsafe.implicits.global
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.common.Glob
|
import docspell.common.Glob
|
||||||
@ -11,12 +10,9 @@ import munit._
|
|||||||
|
|
||||||
class ZipTest extends FunSuite {
|
class ZipTest extends FunSuite {
|
||||||
|
|
||||||
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
|
|
||||||
implicit val CS = IO.contextShift(ExecutionContext.global)
|
|
||||||
|
|
||||||
test("unzip") {
|
test("unzip") {
|
||||||
val zipFile = ExampleFiles.letters_zip.readURL[IO](8192, blocker)
|
val zipFile = ExampleFiles.letters_zip.readURL[IO](8192)
|
||||||
val uncomp = zipFile.through(Zip.unzip(8192, blocker, Glob.all))
|
val uncomp = zipFile.through(Zip.unzip(8192, Glob.all))
|
||||||
|
|
||||||
uncomp
|
uncomp
|
||||||
.evalMap { entry =>
|
.evalMap { entry =>
|
||||||
|
@ -11,7 +11,7 @@ import org.http4s.client.Client
|
|||||||
import org.http4s.client.middleware.Logger
|
import org.http4s.client.middleware.Logger
|
||||||
import org.log4s.getLogger
|
import org.log4s.getLogger
|
||||||
|
|
||||||
final class SolrFtsClient[F[_]: Effect](
|
final class SolrFtsClient[F[_]: Async](
|
||||||
solrUpdate: SolrUpdate[F],
|
solrUpdate: SolrUpdate[F],
|
||||||
solrSetup: SolrSetup[F],
|
solrSetup: SolrSetup[F],
|
||||||
solrQuery: SolrQuery[F]
|
solrQuery: SolrQuery[F]
|
||||||
@ -77,7 +77,7 @@ final class SolrFtsClient[F[_]: Effect](
|
|||||||
object SolrFtsClient {
|
object SolrFtsClient {
|
||||||
private[this] val logger = getLogger
|
private[this] val logger = getLogger
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect](
|
def apply[F[_]: Async](
|
||||||
cfg: SolrConfig,
|
cfg: SolrConfig,
|
||||||
httpClient: Client[F]
|
httpClient: Client[F]
|
||||||
): Resource[F, FtsClient[F]] = {
|
): Resource[F, FtsClient[F]] = {
|
||||||
@ -91,7 +91,7 @@ object SolrFtsClient {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def loggingMiddleware[F[_]: Concurrent](
|
private def loggingMiddleware[F[_]: Async](
|
||||||
cfg: SolrConfig,
|
cfg: SolrConfig,
|
||||||
client: Client[F]
|
client: Client[F]
|
||||||
): Client[F] =
|
): Client[F] =
|
||||||
|
@ -22,7 +22,7 @@ trait SolrQuery[F[_]] {
|
|||||||
}
|
}
|
||||||
|
|
||||||
object SolrQuery {
|
object SolrQuery {
|
||||||
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrQuery[F] = {
|
def apply[F[_]: Async](cfg: SolrConfig, client: Client[F]): SolrQuery[F] = {
|
||||||
val dsl = new Http4sClientDsl[F] {}
|
val dsl = new Http4sClientDsl[F] {}
|
||||||
import dsl._
|
import dsl._
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ trait SolrSetup[F[_]] {
|
|||||||
object SolrSetup {
|
object SolrSetup {
|
||||||
private val versionDocId = "6d8f09f4-8d7e-4bc9-98b8-7c89223b36dd"
|
private val versionDocId = "6d8f09f4-8d7e-4bc9-98b8-7c89223b36dd"
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrSetup[F] = {
|
def apply[F[_]: Async](cfg: SolrConfig, client: Client[F]): SolrSetup[F] = {
|
||||||
val dsl = new Http4sClientDsl[F] {}
|
val dsl = new Http4sClientDsl[F] {}
|
||||||
import dsl._
|
import dsl._
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ trait SolrUpdate[F[_]] {
|
|||||||
|
|
||||||
object SolrUpdate {
|
object SolrUpdate {
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrUpdate[F] = {
|
def apply[F[_]: Async](cfg: SolrConfig, client: Client[F]): SolrUpdate[F] = {
|
||||||
val dsl = new Http4sClientDsl[F] {}
|
val dsl = new Http4sClientDsl[F] {}
|
||||||
import dsl._
|
import dsl._
|
||||||
|
|
||||||
|
@ -30,10 +30,10 @@ import docspell.store.queue._
|
|||||||
import docspell.store.records.RJobLog
|
import docspell.store.records.RJobLog
|
||||||
|
|
||||||
import emil.javamail._
|
import emil.javamail._
|
||||||
|
import org.http4s.blaze.client.BlazeClientBuilder
|
||||||
import org.http4s.client.Client
|
import org.http4s.client.Client
|
||||||
import org.http4s.client.blaze.BlazeClientBuilder
|
|
||||||
|
|
||||||
final class JoexAppImpl[F[_]: ConcurrentEffect: Timer](
|
final class JoexAppImpl[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
nodeOps: ONode[F],
|
nodeOps: ONode[F],
|
||||||
store: Store[F],
|
store: Store[F],
|
||||||
@ -49,8 +49,8 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: Timer](
|
|||||||
val prun = periodicScheduler.start.compile.drain
|
val prun = periodicScheduler.start.compile.drain
|
||||||
for {
|
for {
|
||||||
_ <- scheduleBackgroundTasks
|
_ <- scheduleBackgroundTasks
|
||||||
_ <- ConcurrentEffect[F].start(run)
|
_ <- Async[F].start(run)
|
||||||
_ <- ConcurrentEffect[F].start(prun)
|
_ <- Async[F].start(prun)
|
||||||
_ <- scheduler.periodicAwake
|
_ <- scheduler.periodicAwake
|
||||||
_ <- periodicScheduler.periodicAwake
|
_ <- periodicScheduler.periodicAwake
|
||||||
_ <- nodeOps.register(cfg.appId, NodeType.Joex, cfg.baseUrl)
|
_ <- nodeOps.register(cfg.appId, NodeType.Joex, cfg.baseUrl)
|
||||||
@ -79,17 +79,16 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: Timer](
|
|||||||
|
|
||||||
object JoexAppImpl {
|
object JoexAppImpl {
|
||||||
|
|
||||||
def create[F[_]: ConcurrentEffect: ContextShift: Timer](
|
def create[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
termSignal: SignallingRef[F, Boolean],
|
termSignal: SignallingRef[F, Boolean],
|
||||||
connectEC: ExecutionContext,
|
connectEC: ExecutionContext,
|
||||||
clientEC: ExecutionContext,
|
clientEC: ExecutionContext
|
||||||
blocker: Blocker
|
|
||||||
): Resource[F, JoexApp[F]] =
|
): Resource[F, JoexApp[F]] =
|
||||||
for {
|
for {
|
||||||
httpClient <- BlazeClientBuilder[F](clientEC).resource
|
httpClient <- BlazeClientBuilder[F](clientEC).resource
|
||||||
client = JoexClient(httpClient)
|
client = JoexClient(httpClient)
|
||||||
store <- Store.create(cfg.jdbc, connectEC, blocker)
|
store <- Store.create(cfg.jdbc, connectEC)
|
||||||
queue <- JobQueue(store)
|
queue <- JobQueue(store)
|
||||||
pstore <- PeriodicTaskStore.create(store)
|
pstore <- PeriodicTaskStore.create(store)
|
||||||
nodeOps <- ONode(store)
|
nodeOps <- ONode(store)
|
||||||
@ -97,11 +96,11 @@ object JoexAppImpl {
|
|||||||
upload <- OUpload(store, queue, cfg.files, joex)
|
upload <- OUpload(store, queue, cfg.files, joex)
|
||||||
fts <- createFtsClient(cfg)(httpClient)
|
fts <- createFtsClient(cfg)(httpClient)
|
||||||
itemOps <- OItem(store, fts, queue, joex)
|
itemOps <- OItem(store, fts, queue, joex)
|
||||||
analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig, blocker)
|
analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig)
|
||||||
regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, blocker, store)
|
regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, store)
|
||||||
javaEmil =
|
javaEmil =
|
||||||
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
|
JavaMailEmil(Settings.defaultSettings.copy(debug = cfg.mailDebug))
|
||||||
sch <- SchedulerBuilder(cfg.scheduler, blocker, store)
|
sch <- SchedulerBuilder(cfg.scheduler, store)
|
||||||
.withQueue(queue)
|
.withQueue(queue)
|
||||||
.withTask(
|
.withTask(
|
||||||
JobTask.json(
|
JobTask.json(
|
||||||
@ -207,14 +206,13 @@ object JoexAppImpl {
|
|||||||
sch,
|
sch,
|
||||||
queue,
|
queue,
|
||||||
pstore,
|
pstore,
|
||||||
client,
|
client
|
||||||
Timer[F]
|
|
||||||
)
|
)
|
||||||
app = new JoexAppImpl(cfg, nodeOps, store, queue, pstore, termSignal, sch, psch)
|
app = new JoexAppImpl(cfg, nodeOps, store, queue, pstore, termSignal, sch, psch)
|
||||||
appR <- Resource.make(app.init.map(_ => app))(_.shutdown)
|
appR <- Resource.make(app.init.map(_ => app))(_.shutdown)
|
||||||
} yield appR
|
} yield appR
|
||||||
|
|
||||||
private def createFtsClient[F[_]: ConcurrentEffect](
|
private def createFtsClient[F[_]: Async](
|
||||||
cfg: Config
|
cfg: Config
|
||||||
)(client: Client[F]): Resource[F, FtsClient[F]] =
|
)(client: Client[F]): Resource[F, FtsClient[F]] =
|
||||||
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
|
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
package docspell.joex
|
package docspell.joex
|
||||||
|
|
||||||
|
import cats.effect.Ref
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.effect.concurrent.Ref
|
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
import fs2.concurrent.SignallingRef
|
import fs2.concurrent.SignallingRef
|
||||||
|
|
||||||
@ -9,9 +9,9 @@ import docspell.common.Pools
|
|||||||
import docspell.joex.routes._
|
import docspell.joex.routes._
|
||||||
|
|
||||||
import org.http4s.HttpApp
|
import org.http4s.HttpApp
|
||||||
|
import org.http4s.blaze.server.BlazeServerBuilder
|
||||||
import org.http4s.implicits._
|
import org.http4s.implicits._
|
||||||
import org.http4s.server.Router
|
import org.http4s.server.Router
|
||||||
import org.http4s.server.blaze.BlazeServerBuilder
|
|
||||||
import org.http4s.server.middleware.Logger
|
import org.http4s.server.middleware.Logger
|
||||||
|
|
||||||
object JoexServer {
|
object JoexServer {
|
||||||
@ -22,17 +22,14 @@ object JoexServer {
|
|||||||
exitRef: Ref[F, ExitCode]
|
exitRef: Ref[F, ExitCode]
|
||||||
)
|
)
|
||||||
|
|
||||||
def stream[F[_]: ConcurrentEffect: ContextShift](
|
def stream[F[_]: Async](cfg: Config, pools: Pools): Stream[F, Nothing] = {
|
||||||
cfg: Config,
|
|
||||||
pools: Pools
|
|
||||||
)(implicit T: Timer[F]): Stream[F, Nothing] = {
|
|
||||||
|
|
||||||
val app = for {
|
val app = for {
|
||||||
signal <- Resource.eval(SignallingRef[F, Boolean](false))
|
signal <- Resource.eval(SignallingRef[F, Boolean](false))
|
||||||
exitCode <- Resource.eval(Ref[F].of(ExitCode.Success))
|
exitCode <- Resource.eval(Ref[F].of(ExitCode.Success))
|
||||||
joexApp <-
|
joexApp <-
|
||||||
JoexAppImpl
|
JoexAppImpl
|
||||||
.create[F](cfg, signal, pools.connectEC, pools.httpClientEC, pools.blocker)
|
.create[F](cfg, signal, pools.connectEC, pools.httpClientEC)
|
||||||
|
|
||||||
httpApp = Router(
|
httpApp = Router(
|
||||||
"/api/info" -> InfoRoutes(cfg),
|
"/api/info" -> InfoRoutes(cfg),
|
||||||
|
@ -57,9 +57,8 @@ object Main extends IOApp {
|
|||||||
val pools = for {
|
val pools = for {
|
||||||
cec <- connectEC
|
cec <- connectEC
|
||||||
bec <- blockingEC
|
bec <- blockingEC
|
||||||
blocker = Blocker.liftExecutorService(bec)
|
|
||||||
rec <- restserverEC
|
rec <- restserverEC
|
||||||
} yield Pools(cec, bec, blocker, rec)
|
} yield Pools(cec, bec, rec)
|
||||||
pools.use(p =>
|
pools.use(p =>
|
||||||
JoexServer
|
JoexServer
|
||||||
.stream[IO](cfg, p)
|
.stream[IO](cfg, p)
|
||||||
|
@ -33,16 +33,15 @@ object NerFile {
|
|||||||
private def jsonFilePath(directory: Path, collective: Ident): Path =
|
private def jsonFilePath(directory: Path, collective: Ident): Path =
|
||||||
directory.resolve(s"${collective.id}.json")
|
directory.resolve(s"${collective.id}.json")
|
||||||
|
|
||||||
def find[F[_]: Sync: ContextShift](
|
def find[F[_]: Async](
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
directory: Path,
|
directory: Path
|
||||||
blocker: Blocker
|
|
||||||
): F[Option[NerFile]] = {
|
): F[Option[NerFile]] = {
|
||||||
val file = jsonFilePath(directory, collective)
|
val file = jsonFilePath(directory, collective)
|
||||||
File.existsNonEmpty[F](file).flatMap {
|
File.existsNonEmpty[F](file).flatMap {
|
||||||
case true =>
|
case true =>
|
||||||
File
|
File
|
||||||
.readJson[F, NerFile](file, blocker)
|
.readJson[F, NerFile](file)
|
||||||
.map(_.some)
|
.map(_.some)
|
||||||
case false =>
|
case false =>
|
||||||
(None: Option[NerFile]).pure[F]
|
(None: Option[NerFile]).pure[F]
|
||||||
|
@ -3,7 +3,7 @@ package docspell.joex.analysis
|
|||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.effect.concurrent.Semaphore
|
import cats.effect.std.Semaphore
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
@ -31,19 +31,17 @@ object RegexNerFile {
|
|||||||
|
|
||||||
case class Config(maxEntries: Int, directory: Path, minTime: Duration)
|
case class Config(maxEntries: Int, directory: Path, minTime: Duration)
|
||||||
|
|
||||||
def apply[F[_]: Concurrent: ContextShift](
|
def apply[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
blocker: Blocker,
|
|
||||||
store: Store[F]
|
store: Store[F]
|
||||||
): Resource[F, RegexNerFile[F]] =
|
): Resource[F, RegexNerFile[F]] =
|
||||||
for {
|
for {
|
||||||
dir <- File.withTempDir[F](cfg.directory, "regexner-")
|
dir <- File.withTempDir[F](cfg.directory, "regexner-")
|
||||||
writer <- Resource.eval(Semaphore(1))
|
writer <- Resource.eval(Semaphore(1))
|
||||||
} yield new Impl[F](cfg.copy(directory = dir), blocker, store, writer)
|
} yield new Impl[F](cfg.copy(directory = dir), store, writer)
|
||||||
|
|
||||||
final private class Impl[F[_]: Concurrent: ContextShift](
|
final private class Impl[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
blocker: Blocker,
|
|
||||||
store: Store[F],
|
store: Store[F],
|
||||||
writer: Semaphore[F] //TODO allow parallelism per collective
|
writer: Semaphore[F] //TODO allow parallelism per collective
|
||||||
) extends RegexNerFile[F] {
|
) extends RegexNerFile[F] {
|
||||||
@ -55,7 +53,7 @@ object RegexNerFile {
|
|||||||
def doMakeFile(collective: Ident): F[Option[Path]] =
|
def doMakeFile(collective: Ident): F[Option[Path]] =
|
||||||
for {
|
for {
|
||||||
now <- Timestamp.current[F]
|
now <- Timestamp.current[F]
|
||||||
existing <- NerFile.find[F](collective, cfg.directory, blocker)
|
existing <- NerFile.find[F](collective, cfg.directory)
|
||||||
result <- existing match {
|
result <- existing match {
|
||||||
case Some(nf) =>
|
case Some(nf) =>
|
||||||
val dur = Duration.between(nf.creation, now)
|
val dur = Duration.between(nf.creation, now)
|
||||||
@ -105,11 +103,13 @@ object RegexNerFile {
|
|||||||
} yield result
|
} yield result
|
||||||
|
|
||||||
private def updateTimestamp(nf: NerFile, now: Timestamp): F[Unit] =
|
private def updateTimestamp(nf: NerFile, now: Timestamp): F[Unit] =
|
||||||
writer.withPermit(for {
|
writer.permit.use(_ =>
|
||||||
file <- Sync[F].pure(nf.jsonFilePath(cfg.directory))
|
for {
|
||||||
_ <- File.mkDir(file.getParent)
|
file <- Sync[F].pure(nf.jsonFilePath(cfg.directory))
|
||||||
_ <- File.writeString(file, nf.copy(creation = now).asJson.spaces2)
|
_ <- File.mkDir(file.getParent)
|
||||||
} yield ())
|
_ <- File.writeString(file, nf.copy(creation = now).asJson.spaces2)
|
||||||
|
} yield ()
|
||||||
|
)
|
||||||
|
|
||||||
private def createFile(
|
private def createFile(
|
||||||
lastUpdate: Timestamp,
|
lastUpdate: Timestamp,
|
||||||
@ -117,13 +117,17 @@ object RegexNerFile {
|
|||||||
now: Timestamp
|
now: Timestamp
|
||||||
): F[NerFile] = {
|
): F[NerFile] = {
|
||||||
def update(nf: NerFile, text: String): F[Unit] =
|
def update(nf: NerFile, text: String): F[Unit] =
|
||||||
writer.withPermit(for {
|
writer.permit.use(_ =>
|
||||||
jsonFile <- Sync[F].pure(nf.jsonFilePath(cfg.directory))
|
for {
|
||||||
_ <- logger.fdebug(s"Writing custom NER file for collective '${collective.id}'")
|
jsonFile <- Sync[F].pure(nf.jsonFilePath(cfg.directory))
|
||||||
_ <- File.mkDir(jsonFile.getParent)
|
_ <- logger.fdebug(
|
||||||
_ <- File.writeString(nf.nerFilePath(cfg.directory), text)
|
s"Writing custom NER file for collective '${collective.id}'"
|
||||||
_ <- File.writeString(jsonFile, nf.asJson.spaces2)
|
)
|
||||||
} yield ())
|
_ <- File.mkDir(jsonFile.getParent)
|
||||||
|
_ <- File.writeString(nf.nerFilePath(cfg.directory), text)
|
||||||
|
_ <- File.writeString(jsonFile, nf.asJson.spaces2)
|
||||||
|
} yield ()
|
||||||
|
)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
_ <- logger.finfo(s"Generating custom NER file for collective '${collective.id}'")
|
_ <- logger.finfo(s"Generating custom NER file for collective '${collective.id}'")
|
||||||
|
@ -28,7 +28,7 @@ object Migration {
|
|||||||
def from[F[_]: Applicative: FlatMap](fm: FtsMigration[F]): Migration[F] =
|
def from[F[_]: Applicative: FlatMap](fm: FtsMigration[F]): Migration[F] =
|
||||||
Migration(fm.version, fm.engine, fm.description, FtsWork.from(fm.task))
|
Migration(fm.version, fm.engine, fm.description, FtsWork.from(fm.task))
|
||||||
|
|
||||||
def apply[F[_]: Effect](
|
def apply[F[_]: Async](
|
||||||
cfg: Config.FullTextSearch,
|
cfg: Config.FullTextSearch,
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
store: Store[F],
|
store: Store[F],
|
||||||
@ -41,7 +41,7 @@ object Migration {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def applySingle[F[_]: Effect](ctx: FtsContext[F])(m: Migration[F]): F[Unit] =
|
def applySingle[F[_]: Async](ctx: FtsContext[F])(m: Migration[F]): F[Unit] =
|
||||||
for {
|
for {
|
||||||
_ <- ctx.logger.info(s"Apply ${m.version}/${m.description}")
|
_ <- ctx.logger.info(s"Apply ${m.version}/${m.description}")
|
||||||
_ <- m.task.run(ctx)
|
_ <- m.task.run(ctx)
|
||||||
|
@ -12,7 +12,7 @@ import docspell.store.records.RJob
|
|||||||
object MigrationTask {
|
object MigrationTask {
|
||||||
val taskName = Ident.unsafe("full-text-index")
|
val taskName = Ident.unsafe("full-text-index")
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect](
|
def apply[F[_]: Async](
|
||||||
cfg: Config.FullTextSearch,
|
cfg: Config.FullTextSearch,
|
||||||
fts: FtsClient[F]
|
fts: FtsClient[F]
|
||||||
): Task[F, Unit, Unit] =
|
): Task[F, Unit, Unit] =
|
||||||
@ -46,7 +46,7 @@ object MigrationTask {
|
|||||||
Some(DocspellSystem.migrationTaskTracker)
|
Some(DocspellSystem.migrationTaskTracker)
|
||||||
)
|
)
|
||||||
|
|
||||||
def migrationTasks[F[_]: Effect](fts: FtsClient[F]): F[List[Migration[F]]] =
|
def migrationTasks[F[_]: Async](fts: FtsClient[F]): F[List[Migration[F]]] =
|
||||||
fts.initialize.map(_.map(fm => Migration.from(fm)))
|
fts.initialize.map(_.map(fm => Migration.from(fm)))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,7 @@ object ReIndexTask {
|
|||||||
val taskName = ReIndexTaskArgs.taskName
|
val taskName = ReIndexTaskArgs.taskName
|
||||||
val tracker = DocspellSystem.migrationTaskTracker
|
val tracker = DocspellSystem.migrationTaskTracker
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect](
|
def apply[F[_]: Async](
|
||||||
cfg: Config.FullTextSearch,
|
cfg: Config.FullTextSearch,
|
||||||
fts: FtsClient[F]
|
fts: FtsClient[F]
|
||||||
): Task[F, Args, Unit] =
|
): Task[F, Args, Unit] =
|
||||||
@ -27,7 +27,7 @@ object ReIndexTask {
|
|||||||
def onCancel[F[_]]: Task[F, Args, Unit] =
|
def onCancel[F[_]]: Task[F, Args, Unit] =
|
||||||
Task.log[F, Args](_.warn("Cancelling full-text re-index task"))
|
Task.log[F, Args](_.warn("Cancelling full-text re-index task"))
|
||||||
|
|
||||||
private def clearData[F[_]: ConcurrentEffect](collective: Option[Ident]): FtsWork[F] =
|
private def clearData[F[_]: Async](collective: Option[Ident]): FtsWork[F] =
|
||||||
FtsWork.log[F](_.info("Clearing index data")) ++
|
FtsWork.log[F](_.info("Clearing index data")) ++
|
||||||
(collective match {
|
(collective match {
|
||||||
case Some(_) =>
|
case Some(_) =>
|
||||||
|
@ -7,19 +7,20 @@ import docspell.common._
|
|||||||
import docspell.joex.scheduler.{Context, Task}
|
import docspell.joex.scheduler.{Context, Task}
|
||||||
import docspell.store.records._
|
import docspell.store.records._
|
||||||
|
|
||||||
|
import org.http4s.blaze.client.BlazeClientBuilder
|
||||||
import org.http4s.client.Client
|
import org.http4s.client.Client
|
||||||
import org.http4s.client.blaze.BlazeClientBuilder
|
|
||||||
|
|
||||||
object CheckNodesTask {
|
object CheckNodesTask {
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect](
|
def apply[F[_]: Async](
|
||||||
cfg: HouseKeepingConfig.CheckNodes
|
cfg: HouseKeepingConfig.CheckNodes
|
||||||
): Task[F, Unit, Unit] =
|
): Task[F, Unit, Unit] =
|
||||||
Task { ctx =>
|
Task { ctx =>
|
||||||
if (cfg.enabled)
|
if (cfg.enabled)
|
||||||
for {
|
for {
|
||||||
_ <- ctx.logger.info("Check nodes reachability")
|
_ <- ctx.logger.info("Check nodes reachability")
|
||||||
_ <- BlazeClientBuilder[F](ctx.blocker.blockingContext).resource.use { client =>
|
ec = scala.concurrent.ExecutionContext.global
|
||||||
|
_ <- BlazeClientBuilder[F](ec).resource.use { client =>
|
||||||
checkNodes(ctx, client)
|
checkNodes(ctx, client)
|
||||||
}
|
}
|
||||||
_ <- ctx.logger.info(
|
_ <- ctx.logger.info(
|
||||||
@ -32,7 +33,7 @@ object CheckNodesTask {
|
|||||||
ctx.logger.info("CheckNodes task is disabled in the configuration")
|
ctx.logger.info("CheckNodes task is disabled in the configuration")
|
||||||
}
|
}
|
||||||
|
|
||||||
def checkNodes[F[_]: Sync](ctx: Context[F, _], client: Client[F]): F[Unit] =
|
def checkNodes[F[_]: Async](ctx: Context[F, _], client: Client[F]): F[Unit] =
|
||||||
ctx.store
|
ctx.store
|
||||||
.transact(RNode.streamAll)
|
.transact(RNode.streamAll)
|
||||||
.evalMap(node =>
|
.evalMap(node =>
|
||||||
@ -45,7 +46,7 @@ object CheckNodesTask {
|
|||||||
.compile
|
.compile
|
||||||
.drain
|
.drain
|
||||||
|
|
||||||
def checkNode[F[_]: Sync](logger: Logger[F], client: Client[F])(
|
def checkNode[F[_]: Async](logger: Logger[F], client: Client[F])(
|
||||||
url: LenientUri
|
url: LenientUri
|
||||||
): F[Boolean] = {
|
): F[Boolean] = {
|
||||||
val apiVersion = url / "api" / "info" / "version"
|
val apiVersion = url / "api" / "info" / "version"
|
||||||
|
@ -15,7 +15,7 @@ object HouseKeepingTask {
|
|||||||
|
|
||||||
val taskName: Ident = Ident.unsafe("housekeeping")
|
val taskName: Ident = Ident.unsafe("housekeeping")
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect](cfg: Config): Task[F, Unit, Unit] =
|
def apply[F[_]: Async](cfg: Config): Task[F, Unit, Unit] =
|
||||||
Task
|
Task
|
||||||
.log[F, Unit](_.info(s"Running house-keeping task now"))
|
.log[F, Unit](_.info(s"Running house-keeping task now"))
|
||||||
.flatMap(_ => CleanupInvitesTask(cfg.houseKeeping.cleanupInvites))
|
.flatMap(_ => CleanupInvitesTask(cfg.houseKeeping.cleanupInvites))
|
||||||
|
@ -5,6 +5,7 @@ import java.nio.file.Path
|
|||||||
import cats.data.OptionT
|
import cats.data.OptionT
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
import fs2.io.file.Files
|
||||||
|
|
||||||
import docspell.analysis.classifier.{ClassifierModel, TextClassifier}
|
import docspell.analysis.classifier.{ClassifierModel, TextClassifier}
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
@ -15,8 +16,7 @@ import bitpeace.RangeDef
|
|||||||
|
|
||||||
object Classify {
|
object Classify {
|
||||||
|
|
||||||
def apply[F[_]: Sync: ContextShift](
|
def apply[F[_]: Async](
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
workingDir: Path,
|
workingDir: Path,
|
||||||
store: Store[F],
|
store: Store[F],
|
||||||
@ -36,7 +36,7 @@ object Classify {
|
|||||||
cls <- OptionT(File.withTempDir(workingDir, "classify").use { dir =>
|
cls <- OptionT(File.withTempDir(workingDir, "classify").use { dir =>
|
||||||
val modelFile = dir.resolve("model.ser.gz")
|
val modelFile = dir.resolve("model.ser.gz")
|
||||||
modelData
|
modelData
|
||||||
.through(fs2.io.file.writeAll(modelFile, blocker))
|
.through(Files[F].writeAll(modelFile))
|
||||||
.compile
|
.compile
|
||||||
.drain
|
.drain
|
||||||
.flatMap(_ => classifier.classify(logger, ClassifierModel(modelFile), text))
|
.flatMap(_ => classifier.classify(logger, ClassifierModel(modelFile), text))
|
||||||
|
@ -20,7 +20,7 @@ object LearnClassifierTask {
|
|||||||
def onCancel[F[_]]: Task[F, Args, Unit] =
|
def onCancel[F[_]]: Task[F, Args, Unit] =
|
||||||
Task.log(_.warn("Cancelling learn-classifier task"))
|
Task.log(_.warn("Cancelling learn-classifier task"))
|
||||||
|
|
||||||
def apply[F[_]: Sync: ContextShift](
|
def apply[F[_]: Async](
|
||||||
cfg: Config.TextAnalysis,
|
cfg: Config.TextAnalysis,
|
||||||
analyser: TextAnalyser[F]
|
analyser: TextAnalyser[F]
|
||||||
): Task[F, Args, Unit] =
|
): Task[F, Args, Unit] =
|
||||||
@ -28,7 +28,7 @@ object LearnClassifierTask {
|
|||||||
.flatMap(_ => learnItemEntities(cfg, analyser))
|
.flatMap(_ => learnItemEntities(cfg, analyser))
|
||||||
.flatMap(_ => Task(_ => Sync[F].delay(System.gc())))
|
.flatMap(_ => Task(_ => Sync[F].delay(System.gc())))
|
||||||
|
|
||||||
private def learnItemEntities[F[_]: Sync: ContextShift](
|
private def learnItemEntities[F[_]: Async](
|
||||||
cfg: Config.TextAnalysis,
|
cfg: Config.TextAnalysis,
|
||||||
analyser: TextAnalyser[F]
|
analyser: TextAnalyser[F]
|
||||||
): Task[F, Args, Unit] =
|
): Task[F, Args, Unit] =
|
||||||
@ -45,7 +45,7 @@ object LearnClassifierTask {
|
|||||||
else ().pure[F]
|
else ().pure[F]
|
||||||
}
|
}
|
||||||
|
|
||||||
private def learnTags[F[_]: Sync: ContextShift](
|
private def learnTags[F[_]: Async](
|
||||||
cfg: Config.TextAnalysis,
|
cfg: Config.TextAnalysis,
|
||||||
analyser: TextAnalyser[F]
|
analyser: TextAnalyser[F]
|
||||||
): Task[F, Args, Unit] =
|
): Task[F, Args, Unit] =
|
||||||
|
@ -11,7 +11,7 @@ import docspell.common._
|
|||||||
import docspell.joex.scheduler._
|
import docspell.joex.scheduler._
|
||||||
|
|
||||||
object LearnItemEntities {
|
object LearnItemEntities {
|
||||||
def learnAll[F[_]: Sync: ContextShift, A](
|
def learnAll[F[_]: Async, A](
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
maxItems: Int,
|
maxItems: Int,
|
||||||
@ -22,7 +22,7 @@ object LearnItemEntities {
|
|||||||
.flatMap(_ => learnConcPerson(analyser, collective, maxItems, maxTextLen))
|
.flatMap(_ => learnConcPerson(analyser, collective, maxItems, maxTextLen))
|
||||||
.flatMap(_ => learnConcEquip(analyser, collective, maxItems, maxTextLen))
|
.flatMap(_ => learnConcEquip(analyser, collective, maxItems, maxTextLen))
|
||||||
|
|
||||||
def learnCorrOrg[F[_]: Sync: ContextShift, A](
|
def learnCorrOrg[F[_]: Async, A](
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
maxItems: Int,
|
maxItems: Int,
|
||||||
@ -33,7 +33,7 @@ object LearnItemEntities {
|
|||||||
ctx => SelectItems.forCorrOrg(ctx.store, collective, maxItems, maxTextLen)
|
ctx => SelectItems.forCorrOrg(ctx.store, collective, maxItems, maxTextLen)
|
||||||
)
|
)
|
||||||
|
|
||||||
def learnCorrPerson[F[_]: Sync: ContextShift, A](
|
def learnCorrPerson[F[_]: Async, A](
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
maxItems: Int,
|
maxItems: Int,
|
||||||
@ -44,7 +44,7 @@ object LearnItemEntities {
|
|||||||
ctx => SelectItems.forCorrPerson(ctx.store, collective, maxItems, maxTextLen)
|
ctx => SelectItems.forCorrPerson(ctx.store, collective, maxItems, maxTextLen)
|
||||||
)
|
)
|
||||||
|
|
||||||
def learnConcPerson[F[_]: Sync: ContextShift, A](
|
def learnConcPerson[F[_]: Async, A](
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
maxItems: Int,
|
maxItems: Int,
|
||||||
@ -55,7 +55,7 @@ object LearnItemEntities {
|
|||||||
ctx => SelectItems.forConcPerson(ctx.store, collective, maxItems, maxTextLen)
|
ctx => SelectItems.forConcPerson(ctx.store, collective, maxItems, maxTextLen)
|
||||||
)
|
)
|
||||||
|
|
||||||
def learnConcEquip[F[_]: Sync: ContextShift, A](
|
def learnConcEquip[F[_]: Async, A](
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
maxItems: Int,
|
maxItems: Int,
|
||||||
@ -66,7 +66,7 @@ object LearnItemEntities {
|
|||||||
ctx => SelectItems.forConcEquip(ctx.store, collective, maxItems, maxTextLen)
|
ctx => SelectItems.forConcEquip(ctx.store, collective, maxItems, maxTextLen)
|
||||||
)
|
)
|
||||||
|
|
||||||
private def learn[F[_]: Sync: ContextShift, A](
|
private def learn[F[_]: Async, A](
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
collective: Ident
|
collective: Ident
|
||||||
)(cname: ClassifierName, data: Context[F, _] => Stream[F, Data]): Task[F, A, Unit] =
|
)(cname: ClassifierName, data: Context[F, _] => Stream[F, Data]): Task[F, A, Unit] =
|
||||||
|
@ -11,7 +11,7 @@ import docspell.store.records.RClassifierSetting
|
|||||||
|
|
||||||
object LearnTags {
|
object LearnTags {
|
||||||
|
|
||||||
def learnTagCategory[F[_]: Sync: ContextShift, A](
|
def learnTagCategory[F[_]: Async, A](
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
maxItems: Int,
|
maxItems: Int,
|
||||||
@ -33,7 +33,7 @@ object LearnTags {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
def learnAllTagCategories[F[_]: Sync: ContextShift, A](analyser: TextAnalyser[F])(
|
def learnAllTagCategories[F[_]: Async, A](analyser: TextAnalyser[F])(
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
maxItems: Int,
|
maxItems: Int,
|
||||||
maxTextLen: Int
|
maxTextLen: Int
|
||||||
|
@ -2,6 +2,7 @@ package docspell.joex.learn
|
|||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
import fs2.io.file.Files
|
||||||
|
|
||||||
import docspell.analysis.classifier.ClassifierModel
|
import docspell.analysis.classifier.ClassifierModel
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
@ -13,18 +14,17 @@ import bitpeace.MimetypeHint
|
|||||||
|
|
||||||
object StoreClassifierModel {
|
object StoreClassifierModel {
|
||||||
|
|
||||||
def handleModel[F[_]: Sync: ContextShift](
|
def handleModel[F[_]: Async](
|
||||||
ctx: Context[F, _],
|
ctx: Context[F, _],
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
modelName: ClassifierName
|
modelName: ClassifierName
|
||||||
)(
|
)(
|
||||||
trainedModel: ClassifierModel
|
trainedModel: ClassifierModel
|
||||||
): F[Unit] =
|
): F[Unit] =
|
||||||
handleModel(ctx.store, ctx.blocker, ctx.logger)(collective, modelName, trainedModel)
|
handleModel(ctx.store, ctx.logger)(collective, modelName, trainedModel)
|
||||||
|
|
||||||
def handleModel[F[_]: Sync: ContextShift](
|
def handleModel[F[_]: Async](
|
||||||
store: Store[F],
|
store: Store[F],
|
||||||
blocker: Blocker,
|
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
)(
|
)(
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
@ -36,7 +36,7 @@ object StoreClassifierModel {
|
|||||||
RClassifierModel.findByName(collective, modelName.name).map(_.map(_.fileId))
|
RClassifierModel.findByName(collective, modelName.name).map(_.map(_.fileId))
|
||||||
)
|
)
|
||||||
_ <- logger.debug(s"Storing new trained model for: ${modelName.name}")
|
_ <- logger.debug(s"Storing new trained model for: ${modelName.name}")
|
||||||
fileData = fs2.io.file.readAll(trainedModel.model, blocker, 4096)
|
fileData = Files[F].readAll(trainedModel.model, 4096)
|
||||||
newFile <-
|
newFile <-
|
||||||
store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError
|
store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError
|
||||||
_ <- store.transact(
|
_ <- store.transact(
|
||||||
|
@ -15,7 +15,7 @@ import emil.{MimeType => _, _}
|
|||||||
|
|
||||||
object ReadMail {
|
object ReadMail {
|
||||||
|
|
||||||
def readBytesP[F[_]: ConcurrentEffect](
|
def readBytesP[F[_]: Async](
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
glob: Glob
|
glob: Glob
|
||||||
): Pipe[F, Byte, Binary[F]] =
|
): Pipe[F, Byte, Binary[F]] =
|
||||||
@ -26,7 +26,7 @@ object ReadMail {
|
|||||||
Stream.eval(logger.debug(s"Converting e-mail file...")) >>
|
Stream.eval(logger.debug(s"Converting e-mail file...")) >>
|
||||||
s.through(Mail.readBytes[F])
|
s.through(Mail.readBytes[F])
|
||||||
|
|
||||||
def mailToEntries[F[_]: ConcurrentEffect](
|
def mailToEntries[F[_]: Async](
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
glob: Glob
|
glob: Glob
|
||||||
)(mail: Mail[F]): Stream[F, Binary[F]] = {
|
)(mail: Mail[F]): Stream[F, Binary[F]] = {
|
||||||
|
@ -35,7 +35,7 @@ object PdfConvTask {
|
|||||||
|
|
||||||
val taskName = Ident.unsafe("pdf-files-migration")
|
val taskName = Ident.unsafe("pdf-files-migration")
|
||||||
|
|
||||||
def apply[F[_]: Sync: ContextShift](cfg: Config): Task[F, Args, Unit] =
|
def apply[F[_]: Async](cfg: Config): Task[F, Args, Unit] =
|
||||||
Task { ctx =>
|
Task { ctx =>
|
||||||
for {
|
for {
|
||||||
_ <- ctx.logger.info(s"Converting pdf file ${ctx.args} using ocrmypdf")
|
_ <- ctx.logger.info(s"Converting pdf file ${ctx.args} using ocrmypdf")
|
||||||
@ -62,7 +62,7 @@ object PdfConvTask {
|
|||||||
val existsPdf =
|
val existsPdf =
|
||||||
for {
|
for {
|
||||||
meta <- ctx.store.transact(RAttachment.findMeta(ctx.args.attachId))
|
meta <- ctx.store.transact(RAttachment.findMeta(ctx.args.attachId))
|
||||||
res = meta.filter(_.mimetype.matches(Mimetype.`application/pdf`))
|
res = meta.filter(_.mimetype.matches(Mimetype.applicationPdf))
|
||||||
_ <-
|
_ <-
|
||||||
if (res.isEmpty)
|
if (res.isEmpty)
|
||||||
ctx.logger.info(
|
ctx.logger.info(
|
||||||
@ -83,7 +83,7 @@ object PdfConvTask {
|
|||||||
else none.pure[F]
|
else none.pure[F]
|
||||||
}
|
}
|
||||||
|
|
||||||
def convert[F[_]: Sync: ContextShift](
|
def convert[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
ctx: Context[F, Args],
|
ctx: Context[F, Args],
|
||||||
in: FileMeta
|
in: FileMeta
|
||||||
@ -118,7 +118,6 @@ object PdfConvTask {
|
|||||||
cfg.convert.ocrmypdf,
|
cfg.convert.ocrmypdf,
|
||||||
lang,
|
lang,
|
||||||
in.chunksize,
|
in.chunksize,
|
||||||
ctx.blocker,
|
|
||||||
ctx.logger
|
ctx.logger
|
||||||
)(data, storeResult)
|
)(data, storeResult)
|
||||||
|
|
||||||
|
@ -95,7 +95,7 @@ object AttachmentPageCount {
|
|||||||
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
|
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
|
||||||
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
|
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
|
||||||
.map(_.mimetype)
|
.map(_.mimetype)
|
||||||
.getOrElse(Mimetype.`application/octet-stream`)
|
.getOrElse(Mimetype.applicationOctetStream)
|
||||||
.map(_.toLocal)
|
.map(_.toLocal)
|
||||||
|
|
||||||
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =
|
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =
|
||||||
|
@ -98,7 +98,7 @@ object AttachmentPreview {
|
|||||||
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
|
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
|
||||||
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
|
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
|
||||||
.map(_.mimetype)
|
.map(_.mimetype)
|
||||||
.getOrElse(Mimetype.`application/octet-stream`)
|
.getOrElse(Mimetype.applicationOctetStream)
|
||||||
.map(_.toLocal)
|
.map(_.toLocal)
|
||||||
|
|
||||||
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =
|
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =
|
||||||
|
@ -33,7 +33,7 @@ import bitpeace.{Mimetype, MimetypeHint, RangeDef}
|
|||||||
*/
|
*/
|
||||||
object ConvertPdf {
|
object ConvertPdf {
|
||||||
|
|
||||||
def apply[F[_]: Sync: ContextShift](
|
def apply[F[_]: Async](
|
||||||
cfg: ConvertConfig,
|
cfg: ConvertConfig,
|
||||||
item: ItemData
|
item: ItemData
|
||||||
): Task[F, ProcessItemArgs, ItemData] =
|
): Task[F, ProcessItemArgs, ItemData] =
|
||||||
@ -69,15 +69,15 @@ object ConvertPdf {
|
|||||||
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
|
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
|
||||||
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
|
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
|
||||||
.map(_.mimetype)
|
.map(_.mimetype)
|
||||||
.getOrElse(Mimetype.`application/octet-stream`)
|
.getOrElse(Mimetype.applicationOctetStream)
|
||||||
|
|
||||||
def convertSafe[F[_]: Sync: ContextShift](
|
def convertSafe[F[_]: Async](
|
||||||
cfg: ConvertConfig,
|
cfg: ConvertConfig,
|
||||||
sanitizeHtml: SanitizeHtml,
|
sanitizeHtml: SanitizeHtml,
|
||||||
ctx: Context[F, ProcessItemArgs],
|
ctx: Context[F, ProcessItemArgs],
|
||||||
item: ItemData
|
item: ItemData
|
||||||
)(ra: RAttachment, mime: Mimetype): F[(RAttachment, Option[RAttachmentMeta])] =
|
)(ra: RAttachment, mime: Mimetype): F[(RAttachment, Option[RAttachmentMeta])] =
|
||||||
Conversion.create[F](cfg, sanitizeHtml, ctx.blocker, ctx.logger).use { conv =>
|
Conversion.create[F](cfg, sanitizeHtml, ctx.logger).use { conv =>
|
||||||
mime.toLocal match {
|
mime.toLocal match {
|
||||||
case mt =>
|
case mt =>
|
||||||
val data = ctx.store.bitpeace
|
val data = ctx.store.bitpeace
|
||||||
|
@ -32,12 +32,12 @@ import emil.Mail
|
|||||||
*/
|
*/
|
||||||
object ExtractArchive {
|
object ExtractArchive {
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect: ContextShift](
|
def apply[F[_]: Async](
|
||||||
item: ItemData
|
item: ItemData
|
||||||
): Task[F, ProcessItemArgs, ItemData] =
|
): Task[F, ProcessItemArgs, ItemData] =
|
||||||
multiPass(item, None).map(_._2)
|
multiPass(item, None).map(_._2)
|
||||||
|
|
||||||
def multiPass[F[_]: ConcurrentEffect: ContextShift](
|
def multiPass[F[_]: Async](
|
||||||
item: ItemData,
|
item: ItemData,
|
||||||
archive: Option[RAttachmentArchive]
|
archive: Option[RAttachmentArchive]
|
||||||
): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] =
|
): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] =
|
||||||
@ -46,7 +46,7 @@ object ExtractArchive {
|
|||||||
else multiPass(t._2, t._1)
|
else multiPass(t._2, t._1)
|
||||||
}
|
}
|
||||||
|
|
||||||
def singlePass[F[_]: ConcurrentEffect: ContextShift](
|
def singlePass[F[_]: Async](
|
||||||
item: ItemData,
|
item: ItemData,
|
||||||
archive: Option[RAttachmentArchive]
|
archive: Option[RAttachmentArchive]
|
||||||
): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] =
|
): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] =
|
||||||
@ -85,9 +85,9 @@ object ExtractArchive {
|
|||||||
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
|
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
|
||||||
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
|
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
|
||||||
.map(_.mimetype)
|
.map(_.mimetype)
|
||||||
.getOrElse(Mimetype.`application/octet-stream`)
|
.getOrElse(Mimetype.applicationOctetStream)
|
||||||
|
|
||||||
def extractSafe[F[_]: ConcurrentEffect: ContextShift](
|
def extractSafe[F[_]: Async](
|
||||||
ctx: Context[F, ProcessItemArgs],
|
ctx: Context[F, ProcessItemArgs],
|
||||||
archive: Option[RAttachmentArchive]
|
archive: Option[RAttachmentArchive]
|
||||||
)(ra: RAttachment, pos: Int, mime: Mimetype): F[Extracted] =
|
)(ra: RAttachment, pos: Int, mime: Mimetype): F[Extracted] =
|
||||||
@ -131,7 +131,7 @@ object ExtractArchive {
|
|||||||
} yield extracted.copy(files = extracted.files.filter(_.id != ra.id))
|
} yield extracted.copy(files = extracted.files.filter(_.id != ra.id))
|
||||||
}
|
}
|
||||||
|
|
||||||
def extractZip[F[_]: ConcurrentEffect: ContextShift](
|
def extractZip[F[_]: Async](
|
||||||
ctx: Context[F, ProcessItemArgs],
|
ctx: Context[F, ProcessItemArgs],
|
||||||
archive: Option[RAttachmentArchive]
|
archive: Option[RAttachmentArchive]
|
||||||
)(ra: RAttachment, pos: Int): F[Extracted] = {
|
)(ra: RAttachment, pos: Int): F[Extracted] = {
|
||||||
@ -142,7 +142,7 @@ object ExtractArchive {
|
|||||||
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
|
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
|
||||||
ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *>
|
ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *>
|
||||||
zipData
|
zipData
|
||||||
.through(Zip.unzipP[F](8192, ctx.blocker, glob))
|
.through(Zip.unzipP[F](8192, glob))
|
||||||
.zipWithIndex
|
.zipWithIndex
|
||||||
.flatMap(handleEntry(ctx, ra, pos, archive, None))
|
.flatMap(handleEntry(ctx, ra, pos, archive, None))
|
||||||
.foldMonoid
|
.foldMonoid
|
||||||
@ -150,7 +150,7 @@ object ExtractArchive {
|
|||||||
.lastOrError
|
.lastOrError
|
||||||
}
|
}
|
||||||
|
|
||||||
def extractMail[F[_]: ConcurrentEffect](
|
def extractMail[F[_]: Async](
|
||||||
ctx: Context[F, ProcessItemArgs],
|
ctx: Context[F, ProcessItemArgs],
|
||||||
archive: Option[RAttachmentArchive]
|
archive: Option[RAttachmentArchive]
|
||||||
)(ra: RAttachment, pos: Int): F[Extracted] = {
|
)(ra: RAttachment, pos: Int): F[Extracted] = {
|
||||||
|
@ -28,7 +28,7 @@ object ItemHandler {
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
def newItem[F[_]: ConcurrentEffect: ContextShift](
|
def newItem[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
itemOps: OItem[F],
|
itemOps: OItem[F],
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
@ -62,7 +62,7 @@ object ItemHandler {
|
|||||||
def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] =
|
def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] =
|
||||||
Task(_.isLastRetry)
|
Task(_.isLastRetry)
|
||||||
|
|
||||||
def safeProcess[F[_]: ConcurrentEffect: ContextShift](
|
def safeProcess[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
itemOps: OItem[F],
|
itemOps: OItem[F],
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
|
@ -12,7 +12,7 @@ import docspell.joex.scheduler.Task
|
|||||||
|
|
||||||
object ProcessItem {
|
object ProcessItem {
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect: ContextShift](
|
def apply[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
itemOps: OItem[F],
|
itemOps: OItem[F],
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
@ -27,7 +27,7 @@ object ProcessItem {
|
|||||||
.flatMap(Task.setProgress(99))
|
.flatMap(Task.setProgress(99))
|
||||||
.flatMap(RemoveEmptyItem(itemOps))
|
.flatMap(RemoveEmptyItem(itemOps))
|
||||||
|
|
||||||
def processAttachments[F[_]: ConcurrentEffect: ContextShift](
|
def processAttachments[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
@ -35,7 +35,7 @@ object ProcessItem {
|
|||||||
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||||
processAttachments0[F](cfg, fts, analyser, regexNer, (30, 60, 90))(item)
|
processAttachments0[F](cfg, fts, analyser, regexNer, (30, 60, 90))(item)
|
||||||
|
|
||||||
def analysisOnly[F[_]: Sync: ContextShift](
|
def analysisOnly[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
regexNer: RegexNerFile[F]
|
regexNer: RegexNerFile[F]
|
||||||
@ -46,7 +46,7 @@ object ProcessItem {
|
|||||||
.flatMap(CrossCheckProposals[F])
|
.flatMap(CrossCheckProposals[F])
|
||||||
.flatMap(SaveProposals[F])
|
.flatMap(SaveProposals[F])
|
||||||
|
|
||||||
private def processAttachments0[F[_]: ConcurrentEffect: ContextShift](
|
private def processAttachments0[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
|
@ -20,7 +20,7 @@ import docspell.store.records.RItem
|
|||||||
object ReProcessItem {
|
object ReProcessItem {
|
||||||
type Args = ReProcessItemArgs
|
type Args = ReProcessItemArgs
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect: ContextShift](
|
def apply[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
itemOps: OItem[F],
|
itemOps: OItem[F],
|
||||||
@ -84,7 +84,7 @@ object ReProcessItem {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
def processFiles[F[_]: ConcurrentEffect: ContextShift](
|
def processFiles[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
itemOps: OItem[F],
|
itemOps: OItem[F],
|
||||||
@ -133,7 +133,7 @@ object ReProcessItem {
|
|||||||
def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] =
|
def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] =
|
||||||
Task(_.isLastRetry)
|
Task(_.isLastRetry)
|
||||||
|
|
||||||
def safeProcess[F[_]: ConcurrentEffect: ContextShift](
|
def safeProcess[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
fts: FtsClient[F],
|
fts: FtsClient[F],
|
||||||
itemOps: OItem[F],
|
itemOps: OItem[F],
|
||||||
|
@ -19,7 +19,7 @@ import docspell.store.records.{RAttachmentMeta, RClassifierSetting}
|
|||||||
object TextAnalysis {
|
object TextAnalysis {
|
||||||
type Args = ProcessItemArgs
|
type Args = ProcessItemArgs
|
||||||
|
|
||||||
def apply[F[_]: Sync: ContextShift](
|
def apply[F[_]: Async](
|
||||||
cfg: Config.TextAnalysis,
|
cfg: Config.TextAnalysis,
|
||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
nerFile: RegexNerFile[F]
|
nerFile: RegexNerFile[F]
|
||||||
@ -78,7 +78,7 @@ object TextAnalysis {
|
|||||||
} yield (rm.copy(nerlabels = labels.all.toList), AttachmentDates(rm, labels.dates))
|
} yield (rm.copy(nerlabels = labels.all.toList), AttachmentDates(rm, labels.dates))
|
||||||
}
|
}
|
||||||
|
|
||||||
def predictTags[F[_]: Sync: ContextShift](
|
def predictTags[F[_]: Async](
|
||||||
ctx: Context[F, Args],
|
ctx: Context[F, Args],
|
||||||
cfg: Config.TextAnalysis,
|
cfg: Config.TextAnalysis,
|
||||||
metas: Vector[RAttachmentMeta],
|
metas: Vector[RAttachmentMeta],
|
||||||
@ -97,7 +97,7 @@ object TextAnalysis {
|
|||||||
} yield tags.flatten
|
} yield tags.flatten
|
||||||
}
|
}
|
||||||
|
|
||||||
def predictItemEntities[F[_]: Sync: ContextShift](
|
def predictItemEntities[F[_]: Async](
|
||||||
ctx: Context[F, Args],
|
ctx: Context[F, Args],
|
||||||
cfg: Config.TextAnalysis,
|
cfg: Config.TextAnalysis,
|
||||||
metas: Vector[RAttachmentMeta],
|
metas: Vector[RAttachmentMeta],
|
||||||
@ -128,13 +128,12 @@ object TextAnalysis {
|
|||||||
.map(MetaProposalList.apply)
|
.map(MetaProposalList.apply)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def makeClassify[F[_]: Sync: ContextShift](
|
private def makeClassify[F[_]: Async](
|
||||||
ctx: Context[F, Args],
|
ctx: Context[F, Args],
|
||||||
cfg: Config.TextAnalysis,
|
cfg: Config.TextAnalysis,
|
||||||
classifier: TextClassifier[F]
|
classifier: TextClassifier[F]
|
||||||
)(text: String): ClassifierName => F[Option[String]] =
|
)(text: String): ClassifierName => F[Option[String]] =
|
||||||
Classify[F](
|
Classify[F](
|
||||||
ctx.blocker,
|
|
||||||
ctx.logger,
|
ctx.logger,
|
||||||
cfg.workingDir,
|
cfg.workingDir,
|
||||||
ctx.store,
|
ctx.store,
|
||||||
|
@ -15,7 +15,7 @@ import bitpeace.{Mimetype, RangeDef}
|
|||||||
|
|
||||||
object TextExtraction {
|
object TextExtraction {
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect: ContextShift](cfg: ExtractConfig, fts: FtsClient[F])(
|
def apply[F[_]: Async](cfg: ExtractConfig, fts: FtsClient[F])(
|
||||||
item: ItemData
|
item: ItemData
|
||||||
): Task[F, ProcessItemArgs, ItemData] =
|
): Task[F, ProcessItemArgs, ItemData] =
|
||||||
Task { ctx =>
|
Task { ctx =>
|
||||||
@ -60,7 +60,7 @@ object TextExtraction {
|
|||||||
|
|
||||||
case class Result(am: RAttachmentMeta, td: TextData, tags: List[String] = Nil)
|
case class Result(am: RAttachmentMeta, td: TextData, tags: List[String] = Nil)
|
||||||
|
|
||||||
def extractTextIfEmpty[F[_]: Sync: ContextShift](
|
def extractTextIfEmpty[F[_]: Async](
|
||||||
ctx: Context[F, ProcessItemArgs],
|
ctx: Context[F, ProcessItemArgs],
|
||||||
cfg: ExtractConfig,
|
cfg: ExtractConfig,
|
||||||
lang: Language,
|
lang: Language,
|
||||||
@ -93,7 +93,7 @@ object TextExtraction {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def extractTextToMeta[F[_]: Sync: ContextShift](
|
def extractTextToMeta[F[_]: Async](
|
||||||
ctx: Context[F, _],
|
ctx: Context[F, _],
|
||||||
cfg: ExtractConfig,
|
cfg: ExtractConfig,
|
||||||
lang: Language,
|
lang: Language,
|
||||||
@ -132,13 +132,13 @@ object TextExtraction {
|
|||||||
def findMime: F[Mimetype] =
|
def findMime: F[Mimetype] =
|
||||||
OptionT(ctx.store.transact(RFileMeta.findById(fileId)))
|
OptionT(ctx.store.transact(RFileMeta.findById(fileId)))
|
||||||
.map(_.mimetype)
|
.map(_.mimetype)
|
||||||
.getOrElse(Mimetype.`application/octet-stream`)
|
.getOrElse(Mimetype.applicationOctetStream)
|
||||||
|
|
||||||
findMime
|
findMime
|
||||||
.flatMap(mt => extr.extractText(data, DataType(mt.toLocal), lang))
|
.flatMap(mt => extr.extractText(data, DataType(mt.toLocal), lang))
|
||||||
}
|
}
|
||||||
|
|
||||||
private def extractTextFallback[F[_]: Sync: ContextShift](
|
private def extractTextFallback[F[_]: Async](
|
||||||
ctx: Context[F, _],
|
ctx: Context[F, _],
|
||||||
cfg: ExtractConfig,
|
cfg: ExtractConfig,
|
||||||
ra: RAttachment,
|
ra: RAttachment,
|
||||||
@ -149,7 +149,7 @@ object TextExtraction {
|
|||||||
ctx.logger.error(s"Cannot extract text").map(_ => None)
|
ctx.logger.error(s"Cannot extract text").map(_ => None)
|
||||||
|
|
||||||
case id :: rest =>
|
case id :: rest =>
|
||||||
val extr = Extraction.create[F](ctx.blocker, ctx.logger, cfg)
|
val extr = Extraction.create[F](ctx.logger, cfg)
|
||||||
|
|
||||||
extractText[F](ctx, extr, lang)(id)
|
extractText[F](ctx, extr, lang)(id)
|
||||||
.flatMap({
|
.flatMap({
|
||||||
|
@ -14,7 +14,7 @@ import org.http4s.dsl.Http4sDsl
|
|||||||
|
|
||||||
object JoexRoutes {
|
object JoexRoutes {
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect: Timer](app: JoexApp[F]): HttpRoutes[F] = {
|
def apply[F[_]: Async](app: JoexApp[F]): HttpRoutes[F] = {
|
||||||
val dsl = new Http4sDsl[F] {}
|
val dsl = new Http4sDsl[F] {}
|
||||||
import dsl._
|
import dsl._
|
||||||
HttpRoutes.of[F] {
|
HttpRoutes.of[F] {
|
||||||
@ -34,8 +34,8 @@ object JoexRoutes {
|
|||||||
|
|
||||||
case POST -> Root / "shutdownAndExit" =>
|
case POST -> Root / "shutdownAndExit" =>
|
||||||
for {
|
for {
|
||||||
_ <- ConcurrentEffect[F].start(
|
_ <- Async[F].start(
|
||||||
Timer[F].sleep(Duration.seconds(1).toScala) *> app.initShutdown
|
Temporal[F].sleep(Duration.seconds(1).toScala) *> app.initShutdown
|
||||||
)
|
)
|
||||||
resp <- Ok(BasicResult(true, "Shutdown initiated."))
|
resp <- Ok(BasicResult(true, "Shutdown initiated."))
|
||||||
} yield resp
|
} yield resp
|
||||||
|
@ -31,45 +31,40 @@ trait Context[F[_], A] { self =>
|
|||||||
last = config.retries == current.getOrElse(0)
|
last = config.retries == current.getOrElse(0)
|
||||||
} yield last
|
} yield last
|
||||||
|
|
||||||
def blocker: Blocker
|
|
||||||
|
|
||||||
def map[C](f: A => C)(implicit F: Functor[F]): Context[F, C] =
|
def map[C](f: A => C)(implicit F: Functor[F]): Context[F, C] =
|
||||||
new Context.ContextImpl[F, C](f(args), logger, store, blocker, config, jobId)
|
new Context.ContextImpl[F, C](f(args), logger, store, config, jobId)
|
||||||
}
|
}
|
||||||
|
|
||||||
object Context {
|
object Context {
|
||||||
private[this] val log = getLogger
|
private[this] val log = getLogger
|
||||||
|
|
||||||
def create[F[_]: Functor, A](
|
def create[F[_]: Async, A](
|
||||||
jobId: Ident,
|
jobId: Ident,
|
||||||
arg: A,
|
arg: A,
|
||||||
config: SchedulerConfig,
|
config: SchedulerConfig,
|
||||||
log: Logger[F],
|
log: Logger[F],
|
||||||
store: Store[F],
|
store: Store[F]
|
||||||
blocker: Blocker
|
|
||||||
): Context[F, A] =
|
): Context[F, A] =
|
||||||
new ContextImpl(arg, log, store, blocker, config, jobId)
|
new ContextImpl(arg, log, store, config, jobId)
|
||||||
|
|
||||||
def apply[F[_]: Concurrent, A](
|
def apply[F[_]: Async, A](
|
||||||
job: RJob,
|
job: RJob,
|
||||||
arg: A,
|
arg: A,
|
||||||
config: SchedulerConfig,
|
config: SchedulerConfig,
|
||||||
logSink: LogSink[F],
|
logSink: LogSink[F],
|
||||||
blocker: Blocker,
|
|
||||||
store: Store[F]
|
store: Store[F]
|
||||||
): F[Context[F, A]] =
|
): F[Context[F, A]] =
|
||||||
for {
|
for {
|
||||||
_ <- log.ftrace("Creating logger for task run")
|
_ <- log.ftrace("Creating logger for task run")
|
||||||
logger <- QueueLogger(job.id, job.info, config.logBufferSize, logSink)
|
logger <- QueueLogger(job.id, job.info, config.logBufferSize, logSink)
|
||||||
_ <- log.ftrace("Logger created, instantiating context")
|
_ <- log.ftrace("Logger created, instantiating context")
|
||||||
ctx = create[F, A](job.id, arg, config, logger, store, blocker)
|
ctx = create[F, A](job.id, arg, config, logger, store)
|
||||||
} yield ctx
|
} yield ctx
|
||||||
|
|
||||||
final private class ContextImpl[F[_]: Functor, A](
|
final private class ContextImpl[F[_]: Functor, A](
|
||||||
val args: A,
|
val args: A,
|
||||||
val logger: Logger[F],
|
val logger: Logger[F],
|
||||||
val store: Store[F],
|
val store: Store[F],
|
||||||
val blocker: Blocker,
|
|
||||||
val config: SchedulerConfig,
|
val config: SchedulerConfig,
|
||||||
val jobId: Ident
|
val jobId: Ident
|
||||||
) extends Context[F, A] {
|
) extends Context[F, A] {
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
package docspell.joex.scheduler
|
package docspell.joex.scheduler
|
||||||
|
|
||||||
import cats.effect.{Concurrent, Sync}
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.{Pipe, Stream}
|
import fs2.Pipe
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.common.syntax.all._
|
import docspell.common.syntax.all._
|
||||||
@ -45,7 +45,7 @@ object LogSink {
|
|||||||
def printer[F[_]: Sync]: LogSink[F] =
|
def printer[F[_]: Sync]: LogSink[F] =
|
||||||
LogSink(_.evalMap(e => logInternal(e)))
|
LogSink(_.evalMap(e => logInternal(e)))
|
||||||
|
|
||||||
def db[F[_]: Sync](store: Store[F]): LogSink[F] =
|
def db[F[_]: Async](store: Store[F]): LogSink[F] =
|
||||||
LogSink(
|
LogSink(
|
||||||
_.evalMap(ev =>
|
_.evalMap(ev =>
|
||||||
for {
|
for {
|
||||||
@ -63,9 +63,6 @@ object LogSink {
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
def dbAndLog[F[_]: Concurrent](store: Store[F]): LogSink[F] = {
|
def dbAndLog[F[_]: Async](store: Store[F]): LogSink[F] =
|
||||||
val s: Stream[F, Pipe[F, LogEvent, Unit]] =
|
LogSink(_.broadcastThrough(printer[F].receive, db[F](store).receive))
|
||||||
Stream.emits(Seq(printer[F].receive, db[F](store).receive))
|
|
||||||
LogSink(Pipe.join(s))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -24,20 +24,19 @@ trait PeriodicScheduler[F[_]] {
|
|||||||
|
|
||||||
def shutdown: F[Unit]
|
def shutdown: F[Unit]
|
||||||
|
|
||||||
def periodicAwake: F[Fiber[F, Unit]]
|
def periodicAwake: F[Fiber[F, Throwable, Unit]]
|
||||||
|
|
||||||
def notifyChange: F[Unit]
|
def notifyChange: F[Unit]
|
||||||
}
|
}
|
||||||
|
|
||||||
object PeriodicScheduler {
|
object PeriodicScheduler {
|
||||||
|
|
||||||
def create[F[_]: ConcurrentEffect](
|
def create[F[_]: Async](
|
||||||
cfg: PeriodicSchedulerConfig,
|
cfg: PeriodicSchedulerConfig,
|
||||||
sch: Scheduler[F],
|
sch: Scheduler[F],
|
||||||
queue: JobQueue[F],
|
queue: JobQueue[F],
|
||||||
store: PeriodicTaskStore[F],
|
store: PeriodicTaskStore[F],
|
||||||
client: JoexClient[F],
|
client: JoexClient[F]
|
||||||
timer: Timer[F]
|
|
||||||
): Resource[F, PeriodicScheduler[F]] =
|
): Resource[F, PeriodicScheduler[F]] =
|
||||||
for {
|
for {
|
||||||
waiter <- Resource.eval(SignallingRef(true))
|
waiter <- Resource.eval(SignallingRef(true))
|
||||||
@ -49,8 +48,7 @@ object PeriodicScheduler {
|
|||||||
store,
|
store,
|
||||||
client,
|
client,
|
||||||
waiter,
|
waiter,
|
||||||
state,
|
state
|
||||||
timer
|
|
||||||
)
|
)
|
||||||
_ <- Resource.eval(psch.init)
|
_ <- Resource.eval(psch.init)
|
||||||
} yield psch
|
} yield psch
|
||||||
|
@ -12,21 +12,19 @@ import docspell.joexapi.client.JoexClient
|
|||||||
import docspell.store.queue._
|
import docspell.store.queue._
|
||||||
import docspell.store.records.RPeriodicTask
|
import docspell.store.records.RPeriodicTask
|
||||||
|
|
||||||
import com.github.eikek.fs2calev._
|
import eu.timepit.fs2cron.calev.CalevScheduler
|
||||||
import org.log4s.getLogger
|
import org.log4s.getLogger
|
||||||
|
|
||||||
final class PeriodicSchedulerImpl[F[_]: ConcurrentEffect](
|
final class PeriodicSchedulerImpl[F[_]: Async](
|
||||||
val config: PeriodicSchedulerConfig,
|
val config: PeriodicSchedulerConfig,
|
||||||
sch: Scheduler[F],
|
sch: Scheduler[F],
|
||||||
queue: JobQueue[F],
|
queue: JobQueue[F],
|
||||||
store: PeriodicTaskStore[F],
|
store: PeriodicTaskStore[F],
|
||||||
client: JoexClient[F],
|
client: JoexClient[F],
|
||||||
waiter: SignallingRef[F, Boolean],
|
waiter: SignallingRef[F, Boolean],
|
||||||
state: SignallingRef[F, State[F]],
|
state: SignallingRef[F, State[F]]
|
||||||
timer: Timer[F]
|
|
||||||
) extends PeriodicScheduler[F] {
|
) extends PeriodicScheduler[F] {
|
||||||
private[this] val logger = getLogger
|
private[this] val logger = getLogger
|
||||||
implicit private val _timer: Timer[F] = timer
|
|
||||||
|
|
||||||
def start: Stream[F, Nothing] =
|
def start: Stream[F, Nothing] =
|
||||||
logger.sinfo("Starting periodic scheduler") ++
|
logger.sinfo("Starting periodic scheduler") ++
|
||||||
@ -35,8 +33,8 @@ final class PeriodicSchedulerImpl[F[_]: ConcurrentEffect](
|
|||||||
def shutdown: F[Unit] =
|
def shutdown: F[Unit] =
|
||||||
state.modify(_.requestShutdown)
|
state.modify(_.requestShutdown)
|
||||||
|
|
||||||
def periodicAwake: F[Fiber[F, Unit]] =
|
def periodicAwake: F[Fiber[F, Throwable, Unit]] =
|
||||||
ConcurrentEffect[F].start(
|
Async[F].start(
|
||||||
Stream
|
Stream
|
||||||
.awakeEvery[F](config.wakeupPeriod.toScala)
|
.awakeEvery[F](config.wakeupPeriod.toScala)
|
||||||
.evalMap(_ => logger.fdebug("Periodic awake reached") *> notifyChange)
|
.evalMap(_ => logger.fdebug("Periodic awake reached") *> notifyChange)
|
||||||
@ -127,10 +125,11 @@ final class PeriodicSchedulerImpl[F[_]: ConcurrentEffect](
|
|||||||
s"Scheduling next notify for timer ${pj.timer.asString} -> ${pj.timer.nextElapse(now.toUtcDateTime)}"
|
s"Scheduling next notify for timer ${pj.timer.asString} -> ${pj.timer.nextElapse(now.toUtcDateTime)}"
|
||||||
)
|
)
|
||||||
) *>
|
) *>
|
||||||
ConcurrentEffect[F]
|
Async[F]
|
||||||
.start(
|
.start(
|
||||||
CalevFs2
|
CalevScheduler
|
||||||
.sleep[F](pj.timer)
|
.utc[F]
|
||||||
|
.sleep(pj.timer)
|
||||||
.evalMap(_ => notifyChange)
|
.evalMap(_ => notifyChange)
|
||||||
.compile
|
.compile
|
||||||
.drain
|
.drain
|
||||||
@ -168,15 +167,15 @@ object PeriodicSchedulerImpl {
|
|||||||
|
|
||||||
case class State[F[_]](
|
case class State[F[_]](
|
||||||
shutdownRequest: Boolean,
|
shutdownRequest: Boolean,
|
||||||
scheduledNotify: Option[Fiber[F, Unit]]
|
scheduledNotify: Option[Fiber[F, Throwable, Unit]]
|
||||||
) {
|
) {
|
||||||
def requestShutdown: (State[F], Unit) =
|
def requestShutdown: (State[F], Unit) =
|
||||||
(copy(shutdownRequest = true), ())
|
(copy(shutdownRequest = true), ())
|
||||||
|
|
||||||
def setNotify(fb: Fiber[F, Unit]): (State[F], Unit) =
|
def setNotify(fb: Fiber[F, Throwable, Unit]): (State[F], Unit) =
|
||||||
(copy(scheduledNotify = Some(fb)), ())
|
(copy(scheduledNotify = Some(fb)), ())
|
||||||
|
|
||||||
def clearNotify: (State[F], Option[Fiber[F, Unit]]) =
|
def clearNotify: (State[F], Option[Fiber[F, Throwable, Unit]]) =
|
||||||
(copy(scheduledNotify = None), scheduledNotify)
|
(copy(scheduledNotify = None), scheduledNotify)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
package docspell.joex.scheduler
|
package docspell.joex.scheduler
|
||||||
|
|
||||||
import cats.effect.{Concurrent, Sync}
|
import cats.effect._
|
||||||
|
import cats.effect.std.Queue
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.concurrent.Queue
|
import fs2.Stream
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
|
||||||
@ -15,28 +16,28 @@ object QueueLogger {
|
|||||||
): Logger[F] =
|
): Logger[F] =
|
||||||
new Logger[F] {
|
new Logger[F] {
|
||||||
def trace(msg: => String): F[Unit] =
|
def trace(msg: => String): F[Unit] =
|
||||||
LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.enqueue1)
|
LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.offer)
|
||||||
|
|
||||||
def debug(msg: => String): F[Unit] =
|
def debug(msg: => String): F[Unit] =
|
||||||
LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.enqueue1)
|
LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.offer)
|
||||||
|
|
||||||
def info(msg: => String): F[Unit] =
|
def info(msg: => String): F[Unit] =
|
||||||
LogEvent.create[F](jobId, jobInfo, LogLevel.Info, msg).flatMap(q.enqueue1)
|
LogEvent.create[F](jobId, jobInfo, LogLevel.Info, msg).flatMap(q.offer)
|
||||||
|
|
||||||
def warn(msg: => String): F[Unit] =
|
def warn(msg: => String): F[Unit] =
|
||||||
LogEvent.create[F](jobId, jobInfo, LogLevel.Warn, msg).flatMap(q.enqueue1)
|
LogEvent.create[F](jobId, jobInfo, LogLevel.Warn, msg).flatMap(q.offer)
|
||||||
|
|
||||||
def error(ex: Throwable)(msg: => String): F[Unit] =
|
def error(ex: Throwable)(msg: => String): F[Unit] =
|
||||||
LogEvent
|
LogEvent
|
||||||
.create[F](jobId, jobInfo, LogLevel.Error, msg)
|
.create[F](jobId, jobInfo, LogLevel.Error, msg)
|
||||||
.map(le => le.copy(ex = Some(ex)))
|
.map(le => le.copy(ex = Some(ex)))
|
||||||
.flatMap(q.enqueue1)
|
.flatMap(q.offer)
|
||||||
|
|
||||||
def error(msg: => String): F[Unit] =
|
def error(msg: => String): F[Unit] =
|
||||||
LogEvent.create[F](jobId, jobInfo, LogLevel.Error, msg).flatMap(q.enqueue1)
|
LogEvent.create[F](jobId, jobInfo, LogLevel.Error, msg).flatMap(q.offer)
|
||||||
}
|
}
|
||||||
|
|
||||||
def apply[F[_]: Concurrent](
|
def apply[F[_]: Async](
|
||||||
jobId: Ident,
|
jobId: Ident,
|
||||||
jobInfo: String,
|
jobInfo: String,
|
||||||
bufferSize: Int,
|
bufferSize: Int,
|
||||||
@ -45,7 +46,9 @@ object QueueLogger {
|
|||||||
for {
|
for {
|
||||||
q <- Queue.circularBuffer[F, LogEvent](bufferSize)
|
q <- Queue.circularBuffer[F, LogEvent](bufferSize)
|
||||||
log = create(jobId, jobInfo, q)
|
log = create(jobId, jobInfo, q)
|
||||||
_ <- Concurrent[F].start(q.dequeue.through(sink.receive).compile.drain)
|
_ <- Async[F].start(
|
||||||
|
Stream.fromQueueUnterminated(q).through(sink.receive).compile.drain
|
||||||
|
)
|
||||||
} yield log
|
} yield log
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
package docspell.joex.scheduler
|
package docspell.joex.scheduler
|
||||||
|
|
||||||
import cats.effect.{Fiber, Timer}
|
import cats.effect._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
|
||||||
import docspell.common.Ident
|
import docspell.common.Ident
|
||||||
@ -30,5 +30,5 @@ trait Scheduler[F[_]] {
|
|||||||
*/
|
*/
|
||||||
def shutdown(cancelAll: Boolean): F[Unit]
|
def shutdown(cancelAll: Boolean): F[Unit]
|
||||||
|
|
||||||
def periodicAwake(implicit T: Timer[F]): F[Fiber[F, Unit]]
|
def periodicAwake: F[Fiber[F, Throwable, Unit]]
|
||||||
}
|
}
|
||||||
|
@ -1,18 +1,17 @@
|
|||||||
package docspell.joex.scheduler
|
package docspell.joex.scheduler
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.effect.concurrent.Semaphore
|
import cats.effect.std.Semaphore
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.concurrent.SignallingRef
|
import fs2.concurrent.SignallingRef
|
||||||
|
|
||||||
import docspell.store.Store
|
import docspell.store.Store
|
||||||
import docspell.store.queue.JobQueue
|
import docspell.store.queue.JobQueue
|
||||||
|
|
||||||
case class SchedulerBuilder[F[_]: ConcurrentEffect: ContextShift](
|
case class SchedulerBuilder[F[_]: Async](
|
||||||
config: SchedulerConfig,
|
config: SchedulerConfig,
|
||||||
tasks: JobTaskRegistry[F],
|
tasks: JobTaskRegistry[F],
|
||||||
store: Store[F],
|
store: Store[F],
|
||||||
blocker: Blocker,
|
|
||||||
queue: Resource[F, JobQueue[F]],
|
queue: Resource[F, JobQueue[F]],
|
||||||
logSink: LogSink[F]
|
logSink: LogSink[F]
|
||||||
) {
|
) {
|
||||||
@ -27,10 +26,7 @@ case class SchedulerBuilder[F[_]: ConcurrentEffect: ContextShift](
|
|||||||
withTaskRegistry(tasks.withTask(task))
|
withTaskRegistry(tasks.withTask(task))
|
||||||
|
|
||||||
def withQueue(queue: Resource[F, JobQueue[F]]): SchedulerBuilder[F] =
|
def withQueue(queue: Resource[F, JobQueue[F]]): SchedulerBuilder[F] =
|
||||||
SchedulerBuilder[F](config, tasks, store, blocker, queue, logSink)
|
SchedulerBuilder[F](config, tasks, store, queue, logSink)
|
||||||
|
|
||||||
def withBlocker(blocker: Blocker): SchedulerBuilder[F] =
|
|
||||||
copy(blocker = blocker)
|
|
||||||
|
|
||||||
def withLogSink(sink: LogSink[F]): SchedulerBuilder[F] =
|
def withLogSink(sink: LogSink[F]): SchedulerBuilder[F] =
|
||||||
copy(logSink = sink)
|
copy(logSink = sink)
|
||||||
@ -39,19 +35,16 @@ case class SchedulerBuilder[F[_]: ConcurrentEffect: ContextShift](
|
|||||||
copy(queue = Resource.pure[F, JobQueue[F]](queue))
|
copy(queue = Resource.pure[F, JobQueue[F]](queue))
|
||||||
|
|
||||||
def serve: Resource[F, Scheduler[F]] =
|
def serve: Resource[F, Scheduler[F]] =
|
||||||
resource.evalMap(sch =>
|
resource.evalMap(sch => Async[F].start(sch.start.compile.drain).map(_ => sch))
|
||||||
ConcurrentEffect[F].start(sch.start.compile.drain).map(_ => sch)
|
|
||||||
)
|
|
||||||
|
|
||||||
def resource: Resource[F, Scheduler[F]] = {
|
def resource: Resource[F, Scheduler[F]] = {
|
||||||
val scheduler = for {
|
val scheduler: Resource[F, SchedulerImpl[F]] = for {
|
||||||
jq <- queue
|
jq <- queue
|
||||||
waiter <- Resource.eval(SignallingRef(true))
|
waiter <- Resource.eval(SignallingRef(true))
|
||||||
state <- Resource.eval(SignallingRef(SchedulerImpl.emptyState[F]))
|
state <- Resource.eval(SignallingRef(SchedulerImpl.emptyState[F]))
|
||||||
perms <- Resource.eval(Semaphore(config.poolSize.toLong))
|
perms <- Resource.eval(Semaphore(config.poolSize.toLong))
|
||||||
} yield new SchedulerImpl[F](
|
} yield new SchedulerImpl[F](
|
||||||
config,
|
config,
|
||||||
blocker,
|
|
||||||
jq,
|
jq,
|
||||||
tasks,
|
tasks,
|
||||||
store,
|
store,
|
||||||
@ -68,16 +61,14 @@ case class SchedulerBuilder[F[_]: ConcurrentEffect: ContextShift](
|
|||||||
|
|
||||||
object SchedulerBuilder {
|
object SchedulerBuilder {
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect: ContextShift](
|
def apply[F[_]: Async](
|
||||||
config: SchedulerConfig,
|
config: SchedulerConfig,
|
||||||
blocker: Blocker,
|
|
||||||
store: Store[F]
|
store: Store[F]
|
||||||
): SchedulerBuilder[F] =
|
): SchedulerBuilder[F] =
|
||||||
new SchedulerBuilder[F](
|
new SchedulerBuilder[F](
|
||||||
config,
|
config,
|
||||||
JobTaskRegistry.empty[F],
|
JobTaskRegistry.empty[F],
|
||||||
store,
|
store,
|
||||||
blocker,
|
|
||||||
JobQueue(store),
|
JobQueue(store),
|
||||||
LogSink.db[F](store)
|
LogSink.db[F](store)
|
||||||
)
|
)
|
||||||
|
@ -2,7 +2,7 @@ package docspell.joex.scheduler
|
|||||||
|
|
||||||
import cats.data.OptionT
|
import cats.data.OptionT
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.effect.concurrent.Semaphore
|
import cats.effect.std.Semaphore
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
import fs2.concurrent.SignallingRef
|
import fs2.concurrent.SignallingRef
|
||||||
@ -17,9 +17,8 @@ import docspell.store.records.RJob
|
|||||||
|
|
||||||
import org.log4s._
|
import org.log4s._
|
||||||
|
|
||||||
final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
|
final class SchedulerImpl[F[_]: Async](
|
||||||
val config: SchedulerConfig,
|
val config: SchedulerConfig,
|
||||||
blocker: Blocker,
|
|
||||||
queue: JobQueue[F],
|
queue: JobQueue[F],
|
||||||
tasks: JobTaskRegistry[F],
|
tasks: JobTaskRegistry[F],
|
||||||
store: Store[F],
|
store: Store[F],
|
||||||
@ -37,8 +36,8 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
|
|||||||
def init: F[Unit] =
|
def init: F[Unit] =
|
||||||
QJob.runningToWaiting(config.name, store)
|
QJob.runningToWaiting(config.name, store)
|
||||||
|
|
||||||
def periodicAwake(implicit T: Timer[F]): F[Fiber[F, Unit]] =
|
def periodicAwake: F[Fiber[F, Throwable, Unit]] =
|
||||||
ConcurrentEffect[F].start(
|
Async[F].start(
|
||||||
Stream
|
Stream
|
||||||
.awakeEvery[F](config.wakeupPeriod.toScala)
|
.awakeEvery[F](config.wakeupPeriod.toScala)
|
||||||
.evalMap(_ => logger.fdebug("Periodic awake reached") *> notifyChange)
|
.evalMap(_ => logger.fdebug("Periodic awake reached") *> notifyChange)
|
||||||
@ -153,7 +152,7 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
|
|||||||
for {
|
for {
|
||||||
_ <-
|
_ <-
|
||||||
logger.fdebug(s"Creating context for job ${job.info} to run cancellation $t")
|
logger.fdebug(s"Creating context for job ${job.info} to run cancellation $t")
|
||||||
ctx <- Context[F, String](job, job.args, config, logSink, blocker, store)
|
ctx <- Context[F, String](job, job.args, config, logSink, store)
|
||||||
_ <- t.onCancel.run(ctx)
|
_ <- t.onCancel.run(ctx)
|
||||||
_ <- state.modify(_.markCancelled(job))
|
_ <- state.modify(_.markCancelled(job))
|
||||||
_ <- onFinish(job, JobState.Cancelled)
|
_ <- onFinish(job, JobState.Cancelled)
|
||||||
@ -177,7 +176,7 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
|
|||||||
case Right(t) =>
|
case Right(t) =>
|
||||||
for {
|
for {
|
||||||
_ <- logger.fdebug(s"Creating context for job ${job.info} to run $t")
|
_ <- logger.fdebug(s"Creating context for job ${job.info} to run $t")
|
||||||
ctx <- Context[F, String](job, job.args, config, logSink, blocker, store)
|
ctx <- Context[F, String](job, job.args, config, logSink, store)
|
||||||
jot = wrapTask(job, t.task, ctx)
|
jot = wrapTask(job, t.task, ctx)
|
||||||
tok <- forkRun(job, jot.run(ctx), t.onCancel.run(ctx), ctx)
|
tok <- forkRun(job, jot.run(ctx), t.onCancel.run(ctx), ctx)
|
||||||
_ <- state.modify(_.addRunning(job, tok))
|
_ <- state.modify(_.addRunning(job, tok))
|
||||||
@ -208,9 +207,7 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
|
|||||||
ctx: Context[F, String]
|
ctx: Context[F, String]
|
||||||
): Task[F, String, Unit] =
|
): Task[F, String, Unit] =
|
||||||
task
|
task
|
||||||
.mapF(fa =>
|
.mapF(fa => onStart(job) *> logger.fdebug("Starting task now") *> fa)
|
||||||
onStart(job) *> logger.fdebug("Starting task now") *> blocker.blockOn(fa)
|
|
||||||
)
|
|
||||||
.mapF(_.attempt.flatMap({
|
.mapF(_.attempt.flatMap({
|
||||||
case Right(()) =>
|
case Right(()) =>
|
||||||
logger.info(s"Job execution successful: ${job.info}")
|
logger.info(s"Job execution successful: ${job.info}")
|
||||||
@ -252,11 +249,10 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
|
|||||||
code: F[Unit],
|
code: F[Unit],
|
||||||
onCancel: F[Unit],
|
onCancel: F[Unit],
|
||||||
ctx: Context[F, String]
|
ctx: Context[F, String]
|
||||||
): F[F[Unit]] = {
|
): F[F[Unit]] =
|
||||||
val bfa = blocker.blockOn(code)
|
|
||||||
logger.fdebug(s"Forking job ${job.info}") *>
|
logger.fdebug(s"Forking job ${job.info}") *>
|
||||||
ConcurrentEffect[F]
|
Async[F]
|
||||||
.start(bfa)
|
.start(code)
|
||||||
.map(fiber =>
|
.map(fiber =>
|
||||||
logger.fdebug(s"Cancelling job ${job.info}") *>
|
logger.fdebug(s"Cancelling job ${job.info}") *>
|
||||||
fiber.cancel *>
|
fiber.cancel *>
|
||||||
@ -271,11 +267,12 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
|
|||||||
ctx.logger.warn("Job has been cancelled.") *>
|
ctx.logger.warn("Job has been cancelled.") *>
|
||||||
logger.fdebug(s"Job ${job.info} has been cancelled.")
|
logger.fdebug(s"Job ${job.info} has been cancelled.")
|
||||||
)
|
)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
object SchedulerImpl {
|
object SchedulerImpl {
|
||||||
|
|
||||||
|
type CancelToken[F[_]] = F[Unit]
|
||||||
|
|
||||||
def emptyState[F[_]]: State[F] =
|
def emptyState[F[_]]: State[F] =
|
||||||
State(Map.empty, Set.empty, Map.empty, false)
|
State(Map.empty, Set.empty, Map.empty, false)
|
||||||
|
|
||||||
|
@ -9,9 +9,9 @@ import docspell.common.syntax.all._
|
|||||||
import docspell.common.{Ident, LenientUri}
|
import docspell.common.{Ident, LenientUri}
|
||||||
import docspell.joexapi.model.BasicResult
|
import docspell.joexapi.model.BasicResult
|
||||||
|
|
||||||
import org.http4s.circe.CirceEntityDecoder._
|
import org.http4s.blaze.client.BlazeClientBuilder
|
||||||
|
import org.http4s.circe.CirceEntityDecoder
|
||||||
import org.http4s.client.Client
|
import org.http4s.client.Client
|
||||||
import org.http4s.client.blaze.BlazeClientBuilder
|
|
||||||
import org.http4s.{Method, Request, Uri}
|
import org.http4s.{Method, Request, Uri}
|
||||||
import org.log4s.getLogger
|
import org.log4s.getLogger
|
||||||
|
|
||||||
@ -29,8 +29,9 @@ object JoexClient {
|
|||||||
|
|
||||||
private[this] val logger = getLogger
|
private[this] val logger = getLogger
|
||||||
|
|
||||||
def apply[F[_]: Sync](client: Client[F]): JoexClient[F] =
|
def apply[F[_]: Async](client: Client[F]): JoexClient[F] =
|
||||||
new JoexClient[F] {
|
new JoexClient[F] with CirceEntityDecoder {
|
||||||
|
|
||||||
def notifyJoex(base: LenientUri): F[BasicResult] = {
|
def notifyJoex(base: LenientUri): F[BasicResult] = {
|
||||||
val notifyUrl = base / "api" / "v1" / "notify"
|
val notifyUrl = base / "api" / "v1" / "notify"
|
||||||
val req = Request[F](Method.POST, uri(notifyUrl))
|
val req = Request[F](Method.POST, uri(notifyUrl))
|
||||||
@ -62,6 +63,6 @@ object JoexClient {
|
|||||||
Uri.unsafeFromString(u.asString)
|
Uri.unsafeFromString(u.asString)
|
||||||
}
|
}
|
||||||
|
|
||||||
def resource[F[_]: ConcurrentEffect](ec: ExecutionContext): Resource[F, JoexClient[F]] =
|
def resource[F[_]: Async](ec: ExecutionContext): Resource[F, JoexClient[F]] =
|
||||||
BlazeClientBuilder[F](ec).resource.map(apply[F])
|
BlazeClientBuilder[F](ec).resource.map(apply[F])
|
||||||
}
|
}
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
package docspell.restserver
|
package docspell.restserver
|
||||||
|
|
||||||
import java.net.InetAddress
|
|
||||||
|
|
||||||
import docspell.backend.auth.Login
|
import docspell.backend.auth.Login
|
||||||
import docspell.backend.{Config => BackendConfig}
|
import docspell.backend.{Config => BackendConfig}
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.ftssolr.SolrConfig
|
import docspell.ftssolr.SolrConfig
|
||||||
|
|
||||||
|
import com.comcast.ip4s.IpAddress
|
||||||
|
|
||||||
case class Config(
|
case class Config(
|
||||||
appName: String,
|
appName: String,
|
||||||
appId: Ident,
|
appId: Ident,
|
||||||
@ -42,12 +42,14 @@ object Config {
|
|||||||
case class HttpHeader(enabled: Boolean, headerName: String, headerValue: String)
|
case class HttpHeader(enabled: Boolean, headerName: String, headerValue: String)
|
||||||
case class AllowedIps(enabled: Boolean, ips: Set[String]) {
|
case class AllowedIps(enabled: Boolean, ips: Set[String]) {
|
||||||
|
|
||||||
def containsAddress(inet: InetAddress): Boolean = {
|
def containsAddress(inet: IpAddress): Boolean = {
|
||||||
val ip = inet.getHostAddress
|
val ip = inet.fold(_.toUriString, _.toUriString) //.getHostAddress
|
||||||
lazy val ipParts = ip.split('.')
|
lazy val ipParts = ip.split('.')
|
||||||
|
|
||||||
def checkSingle(pattern: String): Boolean =
|
def checkSingle(pattern: String): Boolean =
|
||||||
pattern == ip || (inet.isLoopbackAddress && pattern == "127.0.0.1") || (pattern
|
pattern == ip || (ip.contains(
|
||||||
|
"localhost"
|
||||||
|
) && pattern == "127.0.0.1") || (pattern
|
||||||
.split('.')
|
.split('.')
|
||||||
.zip(ipParts)
|
.zip(ipParts)
|
||||||
.foldLeft(true) { case (r, (a, b)) =>
|
.foldLeft(true) { case (r, (a, b)) =>
|
||||||
|
@ -52,9 +52,8 @@ object Main extends IOApp {
|
|||||||
val pools = for {
|
val pools = for {
|
||||||
cec <- connectEC
|
cec <- connectEC
|
||||||
bec <- blockingEC
|
bec <- blockingEC
|
||||||
blocker = Blocker.liftExecutorService(bec)
|
|
||||||
rec <- restserverEC
|
rec <- restserverEC
|
||||||
} yield Pools(cec, bec, blocker, rec)
|
} yield Pools(cec, bec, rec)
|
||||||
|
|
||||||
logger.info(s"\n${banner.render("***>")}")
|
logger.info(s"\n${banner.render("***>")}")
|
||||||
if (EnvMode.current.isDev) {
|
if (EnvMode.current.isDev) {
|
||||||
|
@ -24,21 +24,20 @@ final class RestAppImpl[F[_]](val config: Config, val backend: BackendApp[F])
|
|||||||
|
|
||||||
object RestAppImpl {
|
object RestAppImpl {
|
||||||
|
|
||||||
def create[F[_]: ConcurrentEffect: ContextShift](
|
def create[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
connectEC: ExecutionContext,
|
connectEC: ExecutionContext,
|
||||||
httpClientEc: ExecutionContext,
|
httpClientEc: ExecutionContext
|
||||||
blocker: Blocker
|
|
||||||
): Resource[F, RestApp[F]] =
|
): Resource[F, RestApp[F]] =
|
||||||
for {
|
for {
|
||||||
backend <- BackendApp(cfg.backend, connectEC, httpClientEc, blocker)(
|
backend <- BackendApp(cfg.backend, connectEC, httpClientEc)(
|
||||||
createFtsClient[F](cfg)
|
createFtsClient[F](cfg)
|
||||||
)
|
)
|
||||||
app = new RestAppImpl[F](cfg, backend)
|
app = new RestAppImpl[F](cfg, backend)
|
||||||
appR <- Resource.make(app.init.map(_ => app))(_.shutdown)
|
appR <- Resource.make(app.init.map(_ => app))(_.shutdown)
|
||||||
} yield appR
|
} yield appR
|
||||||
|
|
||||||
private def createFtsClient[F[_]: ConcurrentEffect](
|
private def createFtsClient[F[_]: Async](
|
||||||
cfg: Config
|
cfg: Config
|
||||||
)(client: Client[F]): Resource[F, FtsClient[F]] =
|
)(client: Client[F]): Resource[F, FtsClient[F]] =
|
||||||
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
|
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
|
||||||
|
@ -11,36 +11,33 @@ import docspell.restserver.routes._
|
|||||||
import docspell.restserver.webapp._
|
import docspell.restserver.webapp._
|
||||||
|
|
||||||
import org.http4s._
|
import org.http4s._
|
||||||
|
import org.http4s.blaze.server.BlazeServerBuilder
|
||||||
import org.http4s.dsl.Http4sDsl
|
import org.http4s.dsl.Http4sDsl
|
||||||
import org.http4s.headers.Location
|
import org.http4s.headers.Location
|
||||||
import org.http4s.implicits._
|
import org.http4s.implicits._
|
||||||
import org.http4s.server.Router
|
import org.http4s.server.Router
|
||||||
import org.http4s.server.blaze.BlazeServerBuilder
|
|
||||||
import org.http4s.server.middleware.Logger
|
import org.http4s.server.middleware.Logger
|
||||||
|
|
||||||
object RestServer {
|
object RestServer {
|
||||||
|
|
||||||
def stream[F[_]: ConcurrentEffect](
|
def stream[F[_]: Async](cfg: Config, pools: Pools): Stream[F, Nothing] = {
|
||||||
cfg: Config,
|
|
||||||
pools: Pools
|
|
||||||
)(implicit T: Timer[F], CS: ContextShift[F]): Stream[F, Nothing] = {
|
|
||||||
|
|
||||||
val templates = TemplateRoutes[F](pools.blocker, cfg)
|
val templates = TemplateRoutes[F](cfg)
|
||||||
val app = for {
|
val app = for {
|
||||||
restApp <-
|
restApp <-
|
||||||
RestAppImpl
|
RestAppImpl
|
||||||
.create[F](cfg, pools.connectEC, pools.httpClientEC, pools.blocker)
|
.create[F](cfg, pools.connectEC, pools.httpClientEC)
|
||||||
httpApp = Router(
|
httpApp = Router(
|
||||||
"/api/info" -> routes.InfoRoutes(),
|
"/api/info" -> routes.InfoRoutes(),
|
||||||
"/api/v1/open/" -> openRoutes(cfg, restApp),
|
"/api/v1/open/" -> openRoutes(cfg, restApp),
|
||||||
"/api/v1/sec/" -> Authenticate(restApp.backend.login, cfg.auth) { token =>
|
"/api/v1/sec/" -> Authenticate(restApp.backend.login, cfg.auth) { token =>
|
||||||
securedRoutes(cfg, pools, restApp, token)
|
securedRoutes(cfg, restApp, token)
|
||||||
},
|
},
|
||||||
"/api/v1/admin" -> AdminRoutes(cfg.adminEndpoint) {
|
"/api/v1/admin" -> AdminRoutes(cfg.adminEndpoint) {
|
||||||
adminRoutes(cfg, restApp)
|
adminRoutes(cfg, restApp)
|
||||||
},
|
},
|
||||||
"/api/doc" -> templates.doc,
|
"/api/doc" -> templates.doc,
|
||||||
"/app/assets" -> EnvMiddleware(WebjarRoutes.appRoutes[F](pools.blocker)),
|
"/app/assets" -> EnvMiddleware(WebjarRoutes.appRoutes[F]),
|
||||||
"/app" -> EnvMiddleware(templates.app),
|
"/app" -> EnvMiddleware(templates.app),
|
||||||
"/sw.js" -> EnvMiddleware(templates.serviceWorker),
|
"/sw.js" -> EnvMiddleware(templates.serviceWorker),
|
||||||
"/" -> redirectTo("/app")
|
"/" -> redirectTo("/app")
|
||||||
@ -61,9 +58,8 @@ object RestServer {
|
|||||||
)
|
)
|
||||||
}.drain
|
}.drain
|
||||||
|
|
||||||
def securedRoutes[F[_]: Effect: ContextShift](
|
def securedRoutes[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
pools: Pools,
|
|
||||||
restApp: RestApp[F],
|
restApp: RestApp[F],
|
||||||
token: AuthToken
|
token: AuthToken
|
||||||
): HttpRoutes[F] =
|
): HttpRoutes[F] =
|
||||||
@ -77,9 +73,9 @@ object RestServer {
|
|||||||
"user" -> UserRoutes(restApp.backend, token),
|
"user" -> UserRoutes(restApp.backend, token),
|
||||||
"collective" -> CollectiveRoutes(restApp.backend, token),
|
"collective" -> CollectiveRoutes(restApp.backend, token),
|
||||||
"queue" -> JobQueueRoutes(restApp.backend, token),
|
"queue" -> JobQueueRoutes(restApp.backend, token),
|
||||||
"item" -> ItemRoutes(cfg, pools.blocker, restApp.backend, token),
|
"item" -> ItemRoutes(cfg, restApp.backend, token),
|
||||||
"items" -> ItemMultiRoutes(restApp.backend, token),
|
"items" -> ItemMultiRoutes(restApp.backend, token),
|
||||||
"attachment" -> AttachmentRoutes(pools.blocker, restApp.backend, token),
|
"attachment" -> AttachmentRoutes(restApp.backend, token),
|
||||||
"attachments" -> AttachmentMultiRoutes(restApp.backend, token),
|
"attachments" -> AttachmentMultiRoutes(restApp.backend, token),
|
||||||
"upload" -> UploadRoutes.secured(restApp.backend, cfg, token),
|
"upload" -> UploadRoutes.secured(restApp.backend, cfg, token),
|
||||||
"checkfile" -> CheckFileRoutes.secured(restApp.backend, token),
|
"checkfile" -> CheckFileRoutes.secured(restApp.backend, token),
|
||||||
@ -95,7 +91,7 @@ object RestServer {
|
|||||||
"clientSettings" -> ClientSettingsRoutes(restApp.backend, token)
|
"clientSettings" -> ClientSettingsRoutes(restApp.backend, token)
|
||||||
)
|
)
|
||||||
|
|
||||||
def openRoutes[F[_]: Effect](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
|
def openRoutes[F[_]: Async](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
|
||||||
Router(
|
Router(
|
||||||
"auth" -> LoginRoutes.login(restApp.backend.login, cfg),
|
"auth" -> LoginRoutes.login(restApp.backend.login, cfg),
|
||||||
"signup" -> RegisterRoutes(restApp.backend, cfg),
|
"signup" -> RegisterRoutes(restApp.backend, cfg),
|
||||||
@ -104,14 +100,14 @@ object RestServer {
|
|||||||
"integration" -> IntegrationEndpointRoutes.open(restApp.backend, cfg)
|
"integration" -> IntegrationEndpointRoutes.open(restApp.backend, cfg)
|
||||||
)
|
)
|
||||||
|
|
||||||
def adminRoutes[F[_]: Effect](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
|
def adminRoutes[F[_]: Async](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
|
||||||
Router(
|
Router(
|
||||||
"fts" -> FullTextIndexRoutes.admin(cfg, restApp.backend),
|
"fts" -> FullTextIndexRoutes.admin(cfg, restApp.backend),
|
||||||
"user" -> UserRoutes.admin(restApp.backend),
|
"user" -> UserRoutes.admin(restApp.backend),
|
||||||
"info" -> InfoRoutes.admin(cfg)
|
"info" -> InfoRoutes.admin(cfg)
|
||||||
)
|
)
|
||||||
|
|
||||||
def redirectTo[F[_]: Effect](path: String): HttpRoutes[F] = {
|
def redirectTo[F[_]: Async](path: String): HttpRoutes[F] = {
|
||||||
val dsl = new Http4sDsl[F] {}
|
val dsl = new Http4sDsl[F] {}
|
||||||
import dsl._
|
import dsl._
|
||||||
|
|
||||||
@ -119,7 +115,7 @@ object RestServer {
|
|||||||
Response[F](
|
Response[F](
|
||||||
Status.SeeOther,
|
Status.SeeOther,
|
||||||
body = Stream.empty,
|
body = Stream.empty,
|
||||||
headers = Headers.of(Location(Uri(path = path)))
|
headers = Headers(Location(Uri(path = Uri.Path.unsafeFromString(path))))
|
||||||
).pure[F]
|
).pure[F]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@ import docspell.common.AccountId
|
|||||||
import docspell.common.LenientUri
|
import docspell.common.LenientUri
|
||||||
|
|
||||||
import org.http4s._
|
import org.http4s._
|
||||||
import org.http4s.util._
|
import org.typelevel.ci.CIString
|
||||||
|
|
||||||
case class CookieData(auth: AuthToken) {
|
case class CookieData(auth: AuthToken) {
|
||||||
def accountId: AccountId = auth.account
|
def accountId: AccountId = auth.account
|
||||||
@ -37,7 +37,7 @@ object CookieData {
|
|||||||
|
|
||||||
def fromCookie[F[_]](req: Request[F]): Either[String, String] =
|
def fromCookie[F[_]](req: Request[F]): Either[String, String] =
|
||||||
for {
|
for {
|
||||||
header <- headers.Cookie.from(req.headers).toRight("Cookie parsing error")
|
header <- req.headers.get[headers.Cookie].toRight("Cookie parsing error")
|
||||||
cookie <-
|
cookie <-
|
||||||
header.values.toList
|
header.values.toList
|
||||||
.find(_.name == cookieName)
|
.find(_.name == cookieName)
|
||||||
@ -46,8 +46,8 @@ object CookieData {
|
|||||||
|
|
||||||
def fromHeader[F[_]](req: Request[F]): Either[String, String] =
|
def fromHeader[F[_]](req: Request[F]): Either[String, String] =
|
||||||
req.headers
|
req.headers
|
||||||
.get(CaseInsensitiveString(headerName))
|
.get(CIString(headerName))
|
||||||
.map(_.value)
|
.map(_.head.value)
|
||||||
.toRight("Couldn't find an authenticator")
|
.toRight("Couldn't find an authenticator")
|
||||||
|
|
||||||
def deleteCookie(baseUrl: LenientUri): ResponseCookie =
|
def deleteCookie(baseUrl: LenientUri): ResponseCookie =
|
||||||
|
@ -33,7 +33,7 @@ object RememberCookieData {
|
|||||||
|
|
||||||
def fromCookie[F[_]](req: Request[F]): Option[String] =
|
def fromCookie[F[_]](req: Request[F]): Option[String] =
|
||||||
for {
|
for {
|
||||||
header <- headers.Cookie.from(req.headers)
|
header <- req.headers.get[headers.Cookie]
|
||||||
cookie <- header.values.toList.find(_.name == cookieName)
|
cookie <- header.values.toList.find(_.name == cookieName)
|
||||||
} yield cookie.content
|
} yield cookie.content
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ package docspell.restserver.conv
|
|||||||
|
|
||||||
import java.time.{LocalDate, ZoneId}
|
import java.time.{LocalDate, ZoneId}
|
||||||
|
|
||||||
import cats.effect.{Effect, Sync}
|
import cats.effect.{Async, Sync}
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
|
||||||
@ -294,7 +294,7 @@ trait Conversions {
|
|||||||
JobLogEvent(jl.created, jl.level, jl.message)
|
JobLogEvent(jl.created, jl.level, jl.message)
|
||||||
|
|
||||||
// upload
|
// upload
|
||||||
def readMultipart[F[_]: Effect](
|
def readMultipart[F[_]: Async](
|
||||||
mp: Multipart[F],
|
mp: Multipart[F],
|
||||||
sourceName: String,
|
sourceName: String,
|
||||||
logger: Logger,
|
logger: Logger,
|
||||||
@ -347,11 +347,11 @@ trait Conversions {
|
|||||||
.filter(p => p.name.forall(s => !s.equalsIgnoreCase("meta")))
|
.filter(p => p.name.forall(s => !s.equalsIgnoreCase("meta")))
|
||||||
.map(p =>
|
.map(p =>
|
||||||
OUpload
|
OUpload
|
||||||
.File(p.filename, p.headers.get(`Content-Type`).map(fromContentType), p.body)
|
.File(p.filename, p.headers.get[`Content-Type`].map(fromContentType), p.body)
|
||||||
)
|
)
|
||||||
for {
|
for {
|
||||||
metaData <- meta
|
metaData <- meta
|
||||||
_ <- Effect[F].delay(logger.debug(s"Parsed upload meta data: $metaData"))
|
_ <- Async[F].delay(logger.debug(s"Parsed upload meta data: $metaData"))
|
||||||
tracker <- Ident.randomId[F]
|
tracker <- Ident.randomId[F]
|
||||||
} yield UploadData(metaData._1, metaData._2, files, prio, Some(tracker))
|
} yield UploadData(metaData._1, metaData._2, files, prio, Some(tracker))
|
||||||
}
|
}
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user