Upgrade code base to CE3

This commit is contained in:
eikek 2021-06-21 21:33:54 +02:00
parent 903ec26e54
commit bd791b4593
146 changed files with 638 additions and 758 deletions

View File

@ -1,7 +1,3 @@
updates.ignore = [ updates.ignore = [
{ groupId = "org.apache.poi" }, { groupId = "org.apache.poi" },
] ]
updates.pin = [
{ groupId = "co.fs2", version = "2." }
]

View File

@ -32,10 +32,7 @@ object TextAnalyser {
labels ++ dates.map(dl => dl.label.copy(label = dl.date.toString)) labels ++ dates.map(dl => dl.label.copy(label = dl.date.toString))
} }
def create[F[_]: Concurrent: Timer: ContextShift]( def create[F[_]: Async](cfg: TextAnalysisConfig): Resource[F, TextAnalyser[F]] =
cfg: TextAnalysisConfig,
blocker: Blocker
): Resource[F, TextAnalyser[F]] =
Resource Resource
.eval(Nlp(cfg.nlpConfig)) .eval(Nlp(cfg.nlpConfig))
.map(stanfordNer => .map(stanfordNer =>
@ -56,7 +53,7 @@ object TextAnalyser {
} yield Result(spans ++ list, dates) } yield Result(spans ++ list, dates)
def classifier: TextClassifier[F] = def classifier: TextClassifier[F] =
new StanfordTextClassifier[F](cfg.classifier, blocker) new StanfordTextClassifier[F](cfg.classifier)
private def textLimit(logger: Logger[F], text: String): F[String] = private def textLimit(logger: Logger[F], text: String): F[String] =
if (cfg.maxLength <= 0) if (cfg.maxLength <= 0)
@ -82,7 +79,7 @@ object TextAnalyser {
/** Provides the nlp pipeline based on the configuration. */ /** Provides the nlp pipeline based on the configuration. */
private object Nlp { private object Nlp {
def apply[F[_]: Concurrent: Timer]( def apply[F[_]: Async](
cfg: TextAnalysisConfig.NlpConfig cfg: TextAnalysisConfig.NlpConfig
): F[Input[F] => F[Vector[NerLabel]]] = ): F[Input[F] => F[Vector[NerLabel]]] =
cfg.mode match { cfg.mode match {
@ -104,7 +101,7 @@ object TextAnalyser {
text: String text: String
) )
def annotate[F[_]: BracketThrow]( def annotate[F[_]: Async](
cache: PipelineCache[F] cache: PipelineCache[F]
)(input: Input[F]): F[Vector[NerLabel]] = )(input: Input[F]): F[Vector[NerLabel]] =
cache cache

View File

@ -2,10 +2,11 @@ package docspell.analysis.classifier
import java.nio.file.Path import java.nio.file.Path
import cats.effect.Ref
import cats.effect._ import cats.effect._
import cats.effect.concurrent.Ref
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
import fs2.io.file.Files
import docspell.analysis.classifier import docspell.analysis.classifier
import docspell.analysis.classifier.TextClassifier._ import docspell.analysis.classifier.TextClassifier._
@ -15,10 +16,8 @@ import docspell.common.syntax.FileSyntax._
import edu.stanford.nlp.classify.ColumnDataClassifier import edu.stanford.nlp.classify.ColumnDataClassifier
final class StanfordTextClassifier[F[_]: Sync: ContextShift]( final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
cfg: TextClassifierConfig, extends TextClassifier[F] {
blocker: Blocker
) extends TextClassifier[F] {
def trainClassifier[A]( def trainClassifier[A](
logger: Logger[F], logger: Logger[F],
@ -28,7 +27,7 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
.withTempDir(cfg.workingDir, "trainclassifier") .withTempDir(cfg.workingDir, "trainclassifier")
.use { dir => .use { dir =>
for { for {
rawData <- writeDataFile(blocker, dir, data) rawData <- writeDataFile(dir, data)
_ <- logger.debug(s"Learning from ${rawData.count} items.") _ <- logger.debug(s"Learning from ${rawData.count} items.")
trainData <- splitData(logger, rawData) trainData <- splitData(logger, rawData)
scores <- cfg.classifierConfigs.traverse(m => train(logger, trainData, m)) scores <- cfg.classifierConfigs.traverse(m => train(logger, trainData, m))
@ -81,8 +80,8 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
TrainData(in.file.resolveSibling("train.txt"), in.file.resolveSibling("test.txt")) TrainData(in.file.resolveSibling("train.txt"), in.file.resolveSibling("test.txt"))
val fileLines = val fileLines =
fs2.io.file File
.readAll(in.file, blocker, 4096) .readAll[F](in.file, 4096)
.through(fs2.text.utf8Decode) .through(fs2.text.utf8Decode)
.through(fs2.text.lines) .through(fs2.text.lines)
@ -95,7 +94,7 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
.take(nTest) .take(nTest)
.intersperse("\n") .intersperse("\n")
.through(fs2.text.utf8Encode) .through(fs2.text.utf8Encode)
.through(fs2.io.file.writeAll(td.test, blocker)) .through(Files[F].writeAll(td.test))
.compile .compile
.drain .drain
_ <- _ <-
@ -103,13 +102,13 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
.drop(nTest) .drop(nTest)
.intersperse("\n") .intersperse("\n")
.through(fs2.text.utf8Encode) .through(fs2.text.utf8Encode)
.through(fs2.io.file.writeAll(td.train, blocker)) .through(Files[F].writeAll(td.train))
.compile .compile
.drain .drain
} yield td } yield td
} }
def writeDataFile(blocker: Blocker, dir: Path, data: Stream[F, Data]): F[RawData] = { def writeDataFile(dir: Path, data: Stream[F, Data]): F[RawData] = {
val target = dir.resolve("rawdata") val target = dir.resolve("rawdata")
for { for {
counter <- Ref.of[F, Long](0L) counter <- Ref.of[F, Long](0L)
@ -120,7 +119,7 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
.evalTap(_ => counter.update(_ + 1)) .evalTap(_ => counter.update(_ + 1))
.intersperse("\r\n") .intersperse("\r\n")
.through(fs2.text.utf8Encode) .through(fs2.text.utf8Encode)
.through(fs2.io.file.writeAll(target, blocker)) .through(Files[F].writeAll(target))
.compile .compile
.drain .drain
lines <- counter.get lines <- counter.get

View File

@ -19,7 +19,7 @@ object DateFind {
.splitToken(text, " \t.,\n\r/".toSet) .splitToken(text, " \t.,\n\r/".toSet)
.filter(w => lang != Language.Latvian || w.value != "gada") .filter(w => lang != Language.Latvian || w.value != "gada")
.sliding(3) .sliding(3)
.filter(_.length == 3) .filter(_.size == 3)
.flatMap(q => .flatMap(q =>
Stream.emits( Stream.emits(
SimpleDate SimpleDate
@ -28,9 +28,9 @@ object DateFind {
NerDateLabel( NerDateLabel(
sd.toLocalDate, sd.toLocalDate,
NerLabel( NerLabel(
text.substring(q.head.begin, q(2).end), text.substring(q.head.get.begin, q(2).end),
NerTag.Date, NerTag.Date,
q.head.begin, q.head.get.begin,
q(2).end q(2).end
) )
) )

View File

@ -2,9 +2,8 @@ package docspell.analysis.nlp
import scala.concurrent.duration.{Duration => _, _} import scala.concurrent.duration.{Duration => _, _}
import cats.Applicative import cats.effect.Ref
import cats.effect._ import cats.effect._
import cats.effect.concurrent.Ref
import cats.implicits._ import cats.implicits._
import docspell.analysis.NlpSettings import docspell.analysis.NlpSettings
@ -28,7 +27,7 @@ trait PipelineCache[F[_]] {
object PipelineCache { object PipelineCache {
private[this] val logger = getLogger private[this] val logger = getLogger
def apply[F[_]: Concurrent: Timer](clearInterval: Duration)( def apply[F[_]: Async](clearInterval: Duration)(
creator: NlpSettings => Annotator[F], creator: NlpSettings => Annotator[F],
release: F[Unit] release: F[Unit]
): F[PipelineCache[F]] = ): F[PipelineCache[F]] =
@ -38,7 +37,7 @@ object PipelineCache {
_ <- Logger.log4s(logger).info("Creating nlp pipeline cache") _ <- Logger.log4s(logger).info("Creating nlp pipeline cache")
} yield new Impl[F](data, creator, cacheClear) } yield new Impl[F](data, creator, cacheClear)
final private class Impl[F[_]: Sync]( final private class Impl[F[_]: Async](
data: Ref[F, Map[String, Entry[Annotator[F]]]], data: Ref[F, Map[String, Entry[Annotator[F]]]],
creator: NlpSettings => Annotator[F], creator: NlpSettings => Annotator[F],
cacheClear: CacheClearing[F] cacheClear: CacheClearing[F]
@ -97,20 +96,20 @@ object PipelineCache {
} }
object CacheClearing { object CacheClearing {
def none[F[_]: Applicative]: CacheClearing[F] = def none[F[_]]: CacheClearing[F] =
new CacheClearing[F] { new CacheClearing[F] {
def withCache: Resource[F, Unit] = def withCache: Resource[F, Unit] =
Resource.pure[F, Unit](()) Resource.pure[F, Unit](())
} }
def create[F[_]: Concurrent: Timer, A]( def create[F[_]: Async, A](
data: Ref[F, Map[String, Entry[A]]], data: Ref[F, Map[String, Entry[A]]],
interval: Duration, interval: Duration,
release: F[Unit] release: F[Unit]
): F[CacheClearing[F]] = ): F[CacheClearing[F]] =
for { for {
counter <- Ref.of(0L) counter <- Ref.of(0L)
cleaning <- Ref.of(None: Option[Fiber[F, Unit]]) cleaning <- Ref.of(None: Option[Fiber[F, Throwable, Unit]])
log = Logger.log4s(logger) log = Logger.log4s(logger)
result <- result <-
if (interval.millis <= 0) if (interval.millis <= 0)
@ -135,10 +134,10 @@ object PipelineCache {
final private class CacheClearingImpl[F[_], A]( final private class CacheClearingImpl[F[_], A](
data: Ref[F, Map[String, Entry[A]]], data: Ref[F, Map[String, Entry[A]]],
counter: Ref[F, Long], counter: Ref[F, Long],
cleaningFiber: Ref[F, Option[Fiber[F, Unit]]], cleaningFiber: Ref[F, Option[Fiber[F, Throwable, Unit]]],
clearInterval: FiniteDuration, clearInterval: FiniteDuration,
release: F[Unit] release: F[Unit]
)(implicit T: Timer[F], F: Concurrent[F]) )(implicit F: Async[F])
extends CacheClearing[F] { extends CacheClearing[F] {
private[this] val log = Logger.log4s[F](logger) private[this] val log = Logger.log4s[F](logger)
@ -157,8 +156,8 @@ object PipelineCache {
case None => ().pure[F] case None => ().pure[F]
} }
private def clearAllLater: F[Fiber[F, Unit]] = private def clearAllLater: F[Fiber[F, Throwable, Unit]] =
F.start(T.sleep(clearInterval) *> clearAll) F.start(F.sleep(clearInterval) *> clearAll)
private def logDontClear: F[Unit] = private def logDontClear: F[Unit] =
log.info("Cancel stanford cache clearing, as it has been used in between.") log.info("Cancel stanford cache clearing, as it has been used in between.")

View File

@ -2,12 +2,12 @@ package docspell.analysis.classifier
import java.nio.file.Paths import java.nio.file.Paths
import scala.concurrent.ExecutionContext
import cats.data.Kleisli import cats.data.Kleisli
import cats.data.NonEmptyList import cats.data.NonEmptyList
import cats.effect._ import cats.effect._
import cats.effect.unsafe.implicits.global
import fs2.Stream import fs2.Stream
import fs2.io.file.Files
import docspell.analysis.classifier.TextClassifier.Data import docspell.analysis.classifier.TextClassifier.Data
import docspell.common._ import docspell.common._
@ -17,8 +17,6 @@ import munit._
class StanfordTextClassifierSuite extends FunSuite { class StanfordTextClassifierSuite extends FunSuite {
val logger = Logger.log4s[IO](org.log4s.getLogger) val logger = Logger.log4s[IO](org.log4s.getLogger)
implicit val CS = IO.contextShift(ExecutionContext.global)
test("learn from data") { test("learn from data") {
val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map())) val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map()))
@ -38,34 +36,30 @@ class StanfordTextClassifierSuite extends FunSuite {
}) })
.covary[IO] .covary[IO]
val modelExists = val modelExists = {
Blocker[IO].use { blocker => val classifier = new StanfordTextClassifier[IO](cfg)
val classifier = new StanfordTextClassifier[IO](cfg, blocker) classifier.trainClassifier[Boolean](logger, data)(
classifier.trainClassifier[Boolean](logger, data)( Kleisli(result => File.existsNonEmpty[IO](result.model))
Kleisli(result => File.existsNonEmpty[IO](result.model)) )
) }
}
assertEquals(modelExists.unsafeRunSync(), true) assertEquals(modelExists.unsafeRunSync(), true)
} }
test("run classifier") { test("run classifier") {
val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map())) val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map()))
val things = for { val things = File.withTempDir[IO](Paths.get("target"), "testcls")
dir <- File.withTempDir[IO](Paths.get("target"), "testcls")
blocker <- Blocker[IO]
} yield (dir, blocker)
things things
.use { case (dir, blocker) => .use { dir =>
val classifier = new StanfordTextClassifier[IO](cfg, blocker) val classifier = new StanfordTextClassifier[IO](cfg)
val modelFile = dir.resolve("test.ser.gz") val modelFile = dir.resolve("test.ser.gz")
for { for {
_ <- _ <-
LenientUri LenientUri
.fromJava(getClass.getResource("/test.ser.gz")) .fromJava(getClass.getResource("/test.ser.gz"))
.readURL[IO](4096, blocker) .readURL[IO](4096)
.through(fs2.io.file.writeAll(modelFile, blocker)) .through(Files[IO].writeAll(modelFile))
.compile .compile
.drain .drain
model = ClassifierModel(modelFile) model = ClassifierModel(modelFile)

View File

@ -3,6 +3,7 @@ package docspell.analysis.nlp
import java.nio.file.Paths import java.nio.file.Paths
import cats.effect.IO import cats.effect.IO
import cats.effect.unsafe.implicits.global
import docspell.analysis.Env import docspell.analysis.Env
import docspell.common._ import docspell.common._

View File

@ -14,8 +14,8 @@ import docspell.store.queue.JobQueue
import docspell.store.usertask.UserTaskStore import docspell.store.usertask.UserTaskStore
import emil.javamail.{JavaMailEmil, Settings} import emil.javamail.{JavaMailEmil, Settings}
import org.http4s.blaze.client.BlazeClientBuilder
import org.http4s.client.Client import org.http4s.client.Client
import org.http4s.client.blaze.BlazeClientBuilder
trait BackendApp[F[_]] { trait BackendApp[F[_]] {
@ -43,12 +43,11 @@ trait BackendApp[F[_]] {
object BackendApp { object BackendApp {
def create[F[_]: ConcurrentEffect: ContextShift]( def create[F[_]: Async](
cfg: Config, cfg: Config,
store: Store[F], store: Store[F],
httpClient: Client[F], httpClient: Client[F],
ftsClient: FtsClient[F], ftsClient: FtsClient[F]
blocker: Blocker
): Resource[F, BackendApp[F]] = ): Resource[F, BackendApp[F]] =
for { for {
utStore <- UserTaskStore(store) utStore <- UserTaskStore(store)
@ -68,7 +67,7 @@ object BackendApp {
itemSearchImpl <- OItemSearch(store) itemSearchImpl <- OItemSearch(store)
fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl) fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl)
javaEmil = javaEmil =
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug)) JavaMailEmil(Settings.defaultSettings.copy(debug = cfg.mailDebug))
mailImpl <- OMail(store, javaEmil) mailImpl <- OMail(store, javaEmil)
userTaskImpl <- OUserTask(utStore, queue, joexImpl) userTaskImpl <- OUserTask(utStore, queue, joexImpl)
folderImpl <- OFolder(store) folderImpl <- OFolder(store)
@ -98,16 +97,15 @@ object BackendApp {
val clientSettings = clientSettingsImpl val clientSettings = clientSettingsImpl
} }
def apply[F[_]: ConcurrentEffect: ContextShift]( def apply[F[_]: Async](
cfg: Config, cfg: Config,
connectEC: ExecutionContext, connectEC: ExecutionContext,
httpClientEc: ExecutionContext, httpClientEc: ExecutionContext
blocker: Blocker
)(ftsFactory: Client[F] => Resource[F, FtsClient[F]]): Resource[F, BackendApp[F]] = )(ftsFactory: Client[F] => Resource[F, FtsClient[F]]): Resource[F, BackendApp[F]] =
for { for {
store <- Store.create(cfg.jdbc, connectEC, blocker) store <- Store.create(cfg.jdbc, connectEC)
httpClient <- BlazeClientBuilder[F](httpClientEc).resource httpClient <- BlazeClientBuilder[F](httpClientEc).resource
ftsClient <- ftsFactory(httpClient) ftsClient <- ftsFactory(httpClient)
backend <- create(cfg, store, httpClient, ftsClient, blocker) backend <- create(cfg, store, httpClient, ftsClient)
} yield backend } yield backend
} }

View File

@ -69,7 +69,7 @@ object Login {
def invalidTime: Result = InvalidTime def invalidTime: Result = InvalidTime
} }
def apply[F[_]: Effect](store: Store[F]): Resource[F, Login[F]] = def apply[F[_]: Async](store: Store[F]): Resource[F, Login[F]] =
Resource.pure[F, Login[F]](new Login[F] { Resource.pure[F, Login[F]](new Login[F] {
private val logF = Logger.log4s(logger) private val logF = Logger.log4s(logger)

View File

@ -1,7 +1,7 @@
package docspell.backend.ops package docspell.backend.ops
import cats.data.OptionT import cats.data.OptionT
import cats.effect.{Effect, Resource} import cats.effect.{Async, Resource}
import cats.implicits._ import cats.implicits._
import docspell.common.AccountId import docspell.common.AccountId
@ -25,7 +25,7 @@ trait OClientSettings[F[_]] {
object OClientSettings { object OClientSettings {
private[this] val logger = getLogger private[this] val logger = getLogger
def apply[F[_]: Effect](store: Store[F]): Resource[F, OClientSettings[F]] = def apply[F[_]: Async](store: Store[F]): Resource[F, OClientSettings[F]] =
Resource.pure[F, OClientSettings[F]](new OClientSettings[F] { Resource.pure[F, OClientSettings[F]](new OClientSettings[F] {
private def getUserId(account: AccountId): OptionT[F, Ident] = private def getUserId(account: AccountId): OptionT[F, Ident] =
@ -58,7 +58,7 @@ object OClientSettings {
store.transact(RClientSettings.upsert(clientId, userId, data)) store.transact(RClientSettings.upsert(clientId, userId, data))
) )
_ <- OptionT.liftF( _ <- OptionT.liftF(
if (n <= 0) Effect[F].raiseError(new Exception("No rows updated!")) if (n <= 0) Async[F].raiseError(new Exception("No rows updated!"))
else ().pure[F] else ().pure[F]
) )
} yield ()).getOrElse(()) } yield ()).getOrElse(())

View File

@ -1,6 +1,6 @@
package docspell.backend.ops package docspell.backend.ops
import cats.effect.{Effect, Resource} import cats.effect.{Async, Resource}
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
@ -126,7 +126,7 @@ object OCollective {
} }
} }
def apply[F[_]: Effect]( def apply[F[_]: Async](
store: Store[F], store: Store[F],
uts: UserTaskStore[F], uts: UserTaskStore[F],
queue: JobQueue[F], queue: JobQueue[F],

View File

@ -87,7 +87,7 @@ object OCustomFields {
collective: Ident collective: Ident
) )
def apply[F[_]: Effect]( def apply[F[_]: Async](
store: Store[F] store: Store[F]
): Resource[F, OCustomFields[F]] = ): Resource[F, OCustomFields[F]] =
Resource.pure[F, OCustomFields[F]](new OCustomFields[F] { Resource.pure[F, OCustomFields[F]](new OCustomFields[F] {

View File

@ -1,6 +1,6 @@
package docspell.backend.ops package docspell.backend.ops
import cats.effect.{Effect, Resource} import cats.effect.{Async, Resource}
import cats.implicits._ import cats.implicits._
import docspell.common.{AccountId, Ident} import docspell.common.{AccountId, Ident}
@ -22,7 +22,7 @@ trait OEquipment[F[_]] {
object OEquipment { object OEquipment {
def apply[F[_]: Effect](store: Store[F]): Resource[F, OEquipment[F]] = def apply[F[_]: Async](store: Store[F]): Resource[F, OEquipment[F]] =
Resource.pure[F, OEquipment[F]](new OEquipment[F] { Resource.pure[F, OEquipment[F]](new OEquipment[F] {
def findAll(account: AccountId, nameQuery: Option[String]): F[Vector[REquipment]] = def findAll(account: AccountId, nameQuery: Option[String]): F[Vector[REquipment]] =
store.transact(REquipment.findAll(account.collective, nameQuery, _.name)) store.transact(REquipment.findAll(account.collective, nameQuery, _.name))

View File

@ -55,7 +55,7 @@ object OFolder {
type FolderDetail = QFolder.FolderDetail type FolderDetail = QFolder.FolderDetail
val FolderDetail = QFolder.FolderDetail val FolderDetail = QFolder.FolderDetail
def apply[F[_]: Effect](store: Store[F]): Resource[F, OFolder[F]] = def apply[F[_]](store: Store[F]): Resource[F, OFolder[F]] =
Resource.pure[F, OFolder[F]](new OFolder[F] { Resource.pure[F, OFolder[F]](new OFolder[F] {
def findAll( def findAll(
account: AccountId, account: AccountId,

View File

@ -77,7 +77,7 @@ object OFulltext {
case class FtsItem(item: ListItem, ftsData: FtsData) case class FtsItem(item: ListItem, ftsData: FtsData)
case class FtsItemWithTags(item: ListItemWithTags, ftsData: FtsData) case class FtsItemWithTags(item: ListItemWithTags, ftsData: FtsData)
def apply[F[_]: Effect]( def apply[F[_]: Async](
itemSearch: OItemSearch[F], itemSearch: OItemSearch[F],
fts: FtsClient[F], fts: FtsClient[F],
store: Store[F], store: Store[F],

View File

@ -1,7 +1,7 @@
package docspell.backend.ops package docspell.backend.ops
import cats.data.{NonEmptyList, OptionT} import cats.data.{NonEmptyList, OptionT}
import cats.effect.{Effect, Resource} import cats.effect.{Async, Resource}
import cats.implicits._ import cats.implicits._
import docspell.backend.JobFactory import docspell.backend.JobFactory
@ -191,7 +191,7 @@ trait OItem[F[_]] {
object OItem { object OItem {
def apply[F[_]: Effect]( def apply[F[_]: Async](
store: Store[F], store: Store[F],
fts: FtsClient[F], fts: FtsClient[F],
queue: JobQueue[F], queue: JobQueue[F],

View File

@ -1,7 +1,7 @@
package docspell.backend.ops package docspell.backend.ops
import cats.data.OptionT import cats.data.OptionT
import cats.effect.{Effect, Resource} import cats.effect.{Async, Resource}
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
@ -118,7 +118,7 @@ object OItemSearch {
val fileId = rs.fileId val fileId = rs.fileId
} }
def apply[F[_]: Effect](store: Store[F]): Resource[F, OItemSearch[F]] = def apply[F[_]: Async](store: Store[F]): Resource[F, OItemSearch[F]] =
Resource.pure[F, OItemSearch[F]](new OItemSearch[F] { Resource.pure[F, OItemSearch[F]](new OItemSearch[F] {
def findItem(id: Ident, collective: Ident): F[Option[ItemData]] = def findItem(id: Ident, collective: Ident): F[Option[ItemData]] =

View File

@ -36,7 +36,7 @@ object OJoex {
} yield cancel.success).getOrElse(false) } yield cancel.success).getOrElse(false)
}) })
def create[F[_]: ConcurrentEffect]( def create[F[_]: Async](
ec: ExecutionContext, ec: ExecutionContext,
store: Store[F] store: Store[F]
): Resource[F, OJoex[F]] = ): Resource[F, OJoex[F]] =

View File

@ -141,7 +141,7 @@ object OMail {
) )
} }
def apply[F[_]: Effect](store: Store[F], emil: Emil[F]): Resource[F, OMail[F]] = def apply[F[_]: Async](store: Store[F], emil: Emil[F]): Resource[F, OMail[F]] =
Resource.pure[F, OMail[F]](new OMail[F] { Resource.pure[F, OMail[F]](new OMail[F] {
def getSmtpSettings( def getSmtpSettings(
accId: AccountId, accId: AccountId,

View File

@ -1,6 +1,6 @@
package docspell.backend.ops package docspell.backend.ops
import cats.effect.{Effect, Resource} import cats.effect.{Async, Resource}
import cats.implicits._ import cats.implicits._
import docspell.common.syntax.all._ import docspell.common.syntax.all._
@ -20,7 +20,7 @@ trait ONode[F[_]] {
object ONode { object ONode {
private[this] val logger = getLogger private[this] val logger = getLogger
def apply[F[_]: Effect](store: Store[F]): Resource[F, ONode[F]] = def apply[F[_]: Async](store: Store[F]): Resource[F, ONode[F]] =
Resource.pure[F, ONode[F]](new ONode[F] { Resource.pure[F, ONode[F]](new ONode[F] {
def register(appId: Ident, nodeType: NodeType, uri: LenientUri): F[Unit] = def register(appId: Ident, nodeType: NodeType, uri: LenientUri): F[Unit] =

View File

@ -1,6 +1,6 @@
package docspell.backend.ops package docspell.backend.ops
import cats.effect.{Effect, Resource} import cats.effect.{Async, Resource}
import cats.implicits._ import cats.implicits._
import docspell.backend.ops.OOrganization._ import docspell.backend.ops.OOrganization._
@ -49,7 +49,7 @@ object OOrganization {
contacts: Seq[RContact] contacts: Seq[RContact]
) )
def apply[F[_]: Effect](store: Store[F]): Resource[F, OOrganization[F]] = def apply[F[_]: Async](store: Store[F]): Resource[F, OOrganization[F]] =
Resource.pure[F, OOrganization[F]](new OOrganization[F] { Resource.pure[F, OOrganization[F]](new OOrganization[F] {
def findAllOrg( def findAllOrg(

View File

@ -1,6 +1,6 @@
package docspell.backend.ops package docspell.backend.ops
import cats.effect.{Effect, Resource} import cats.effect.{Async, Resource}
import cats.implicits._ import cats.implicits._
import docspell.common.{AccountId, Ident} import docspell.common.{AccountId, Ident}
@ -22,7 +22,7 @@ trait OSource[F[_]] {
object OSource { object OSource {
def apply[F[_]: Effect](store: Store[F]): Resource[F, OSource[F]] = def apply[F[_]: Async](store: Store[F]): Resource[F, OSource[F]] =
Resource.pure[F, OSource[F]](new OSource[F] { Resource.pure[F, OSource[F]](new OSource[F] {
def findAll(account: AccountId): F[Vector[SourceData]] = def findAll(account: AccountId): F[Vector[SourceData]] =
store store

View File

@ -1,6 +1,6 @@
package docspell.backend.ops package docspell.backend.ops
import cats.effect.{Effect, Resource} import cats.effect.{Async, Resource}
import cats.implicits._ import cats.implicits._
import docspell.common.{AccountId, Ident} import docspell.common.{AccountId, Ident}
@ -25,7 +25,7 @@ trait OTag[F[_]] {
object OTag { object OTag {
def apply[F[_]: Effect](store: Store[F]): Resource[F, OTag[F]] = def apply[F[_]: Async](store: Store[F]): Resource[F, OTag[F]] =
Resource.pure[F, OTag[F]](new OTag[F] { Resource.pure[F, OTag[F]](new OTag[F] {
def findAll(account: AccountId, nameQuery: Option[String]): F[Vector[RTag]] = def findAll(account: AccountId, nameQuery: Option[String]): F[Vector[RTag]] =
store.transact(RTag.findAll(account.collective, nameQuery, _.name)) store.transact(RTag.findAll(account.collective, nameQuery, _.name))

View File

@ -62,7 +62,7 @@ trait OUserTask[F[_]] {
object OUserTask { object OUserTask {
def apply[F[_]: Effect]( def apply[F[_]: Async](
store: UserTaskStore[F], store: UserTaskStore[F],
queue: JobQueue[F], queue: JobQueue[F],
joex: OJoex[F] joex: OJoex[F]

View File

@ -1,6 +1,6 @@
package docspell.backend.signup package docspell.backend.signup
import cats.effect.{Effect, Resource} import cats.effect.{Async, Resource}
import cats.implicits._ import cats.implicits._
import docspell.backend.PasswordCrypt import docspell.backend.PasswordCrypt
@ -23,7 +23,7 @@ trait OSignup[F[_]] {
object OSignup { object OSignup {
private[this] val logger = getLogger private[this] val logger = getLogger
def apply[F[_]: Effect](store: Store[F]): Resource[F, OSignup[F]] = def apply[F[_]: Async](store: Store[F]): Resource[F, OSignup[F]] =
Resource.pure[F, OSignup[F]](new OSignup[F] { Resource.pure[F, OSignup[F]](new OSignup[F] {
def newInvite(cfg: Config)(password: Password): F[NewInviteResult] = def newInvite(cfg: Config)(password: Password): F[NewInviteResult] =
@ -35,7 +35,7 @@ object OSignup {
.transact(RInvitation.insertNew) .transact(RInvitation.insertNew)
.map(ri => NewInviteResult.success(ri.id)) .map(ri => NewInviteResult.success(ri.id))
else else
Effect[F].pure(NewInviteResult.invitationClosed) Async[F].pure(NewInviteResult.invitationClosed)
def register(cfg: Config)(data: RegisterData): F[SignupResult] = def register(cfg: Config)(data: RegisterData): F[SignupResult] =
cfg.mode match { cfg.mode match {

View File

@ -1,47 +1,48 @@
package docspell.common package docspell.common
import java.io.IOException import java.io.IOException
import java.nio.charset.StandardCharsets
import java.nio.file._
import java.nio.file.attribute.BasicFileAttributes import java.nio.file.attribute.BasicFileAttributes
import java.nio.file.{Files => JFiles, _}
import java.util.concurrent.atomic.AtomicInteger import java.util.concurrent.atomic.AtomicInteger
import scala.jdk.CollectionConverters._ import scala.jdk.CollectionConverters._
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.io.file.Files
import fs2.{Chunk, Stream}
import docspell.common.syntax.all._ import docspell.common.syntax.all._
import io.circe.Decoder import io.circe.Decoder
import scodec.bits.ByteVector
//TODO use io.fs2.files.Files api
object File { object File {
def mkDir[F[_]: Sync](dir: Path): F[Path] = def mkDir[F[_]: Sync](dir: Path): F[Path] =
Sync[F].delay(Files.createDirectories(dir)) Sync[F].blocking(JFiles.createDirectories(dir))
def mkTempDir[F[_]: Sync](parent: Path, prefix: String): F[Path] = def mkTempDir[F[_]: Sync](parent: Path, prefix: String): F[Path] =
mkDir(parent).map(p => Files.createTempDirectory(p, prefix)) mkDir(parent).map(p => JFiles.createTempDirectory(p, prefix))
def mkTempFile[F[_]: Sync]( def mkTempFile[F[_]: Sync](
parent: Path, parent: Path,
prefix: String, prefix: String,
suffix: Option[String] = None suffix: Option[String] = None
): F[Path] = ): F[Path] =
mkDir(parent).map(p => Files.createTempFile(p, prefix, suffix.orNull)) mkDir(parent).map(p => JFiles.createTempFile(p, prefix, suffix.orNull))
def deleteDirectory[F[_]: Sync](dir: Path): F[Int] = def deleteDirectory[F[_]: Sync](dir: Path): F[Int] =
Sync[F].delay { Sync[F].delay {
val count = new AtomicInteger(0) val count = new AtomicInteger(0)
Files.walkFileTree( JFiles.walkFileTree(
dir, dir,
new SimpleFileVisitor[Path]() { new SimpleFileVisitor[Path]() {
override def visitFile( override def visitFile(
file: Path, file: Path,
attrs: BasicFileAttributes attrs: BasicFileAttributes
): FileVisitResult = { ): FileVisitResult = {
Files.deleteIfExists(file) JFiles.deleteIfExists(file)
count.incrementAndGet() count.incrementAndGet()
FileVisitResult.CONTINUE FileVisitResult.CONTINUE
} }
@ -49,7 +50,7 @@ object File {
Option(e) match { Option(e) match {
case Some(ex) => throw ex case Some(ex) => throw ex
case None => case None =>
Files.deleteIfExists(dir) JFiles.deleteIfExists(dir)
FileVisitResult.CONTINUE FileVisitResult.CONTINUE
} }
} }
@ -58,47 +59,57 @@ object File {
} }
def exists[F[_]: Sync](file: Path): F[Boolean] = def exists[F[_]: Sync](file: Path): F[Boolean] =
Sync[F].delay(Files.exists(file)) Sync[F].delay(JFiles.exists(file))
def size[F[_]: Sync](file: Path): F[Long] = def size[F[_]: Sync](file: Path): F[Long] =
Sync[F].delay(Files.size(file)) Sync[F].delay(JFiles.size(file))
def existsNonEmpty[F[_]: Sync](file: Path, minSize: Long = 0): F[Boolean] = def existsNonEmpty[F[_]: Sync](file: Path, minSize: Long = 0): F[Boolean] =
Sync[F].delay(Files.exists(file) && Files.size(file) > minSize) Sync[F].delay(JFiles.exists(file) && JFiles.size(file) > minSize)
def deleteFile[F[_]: Sync](file: Path): F[Unit] = def deleteFile[F[_]: Sync](file: Path): F[Unit] =
Sync[F].delay(Files.deleteIfExists(file)).map(_ => ()) Sync[F].delay(JFiles.deleteIfExists(file)).map(_ => ())
def delete[F[_]: Sync](path: Path): F[Int] = def delete[F[_]: Sync](path: Path): F[Int] =
if (Files.isDirectory(path)) deleteDirectory(path) if (JFiles.isDirectory(path)) deleteDirectory(path)
else deleteFile(path).map(_ => 1) else deleteFile(path).map(_ => 1)
def withTempDir[F[_]: Sync](parent: Path, prefix: String): Resource[F, Path] = def withTempDir[F[_]: Sync](parent: Path, prefix: String): Resource[F, Path] =
Resource.make(mkTempDir(parent, prefix))(p => delete(p).map(_ => ())) Resource.make(mkTempDir(parent, prefix))(p => delete(p).map(_ => ()))
def listFiles[F[_]: Sync](pred: Path => Boolean, dir: Path): F[List[Path]] = def listJFiles[F[_]: Sync](pred: Path => Boolean, dir: Path): F[List[Path]] =
Sync[F].delay { Sync[F].delay {
val javaList = val javaList =
Files.list(dir).filter(p => pred(p)).collect(java.util.stream.Collectors.toList()) JFiles
.list(dir)
.filter(p => pred(p))
.collect(java.util.stream.Collectors.toList())
javaList.asScala.toList.sortBy(_.getFileName.toString) javaList.asScala.toList.sortBy(_.getFileName.toString)
} }
def readAll[F[_]: Sync: ContextShift]( def readAll[F[_]: Files](
file: Path, file: Path,
blocker: Blocker,
chunkSize: Int chunkSize: Int
): Stream[F, Byte] = ): Stream[F, Byte] =
fs2.io.file.readAll(file, blocker, chunkSize) Files[F].readAll(file, chunkSize)
def readText[F[_]: Sync: ContextShift](file: Path, blocker: Blocker): F[String] = def readText[F[_]: Files: Concurrent](file: Path): F[String] =
readAll[F](file, blocker, 8192).through(fs2.text.utf8Decode).compile.foldMonoid readAll[F](file, 8192).through(fs2.text.utf8Decode).compile.foldMonoid
def writeString[F[_]: Sync](file: Path, content: String): F[Path] = def writeString[F[_]: Files: Concurrent](file: Path, content: String): F[Path] =
Sync[F].delay(Files.write(file, content.getBytes(StandardCharsets.UTF_8))) ByteVector.encodeUtf8(content) match {
case Right(bv) =>
Stream
.chunk(Chunk.byteVector(bv))
.through(Files[F].writeAll(file))
.compile
.drain
.map(_ => file)
case Left(ex) =>
Concurrent[F].raiseError(ex)
}
def readJson[F[_]: Sync: ContextShift, A](file: Path, blocker: Blocker)(implicit def readJson[F[_]: Async, A](file: Path)(implicit d: Decoder[A]): F[A] =
d: Decoder[A] readText[F](file).map(_.parseJsonAs[A]).rethrow
): F[A] =
readText[F](file, blocker).map(_.parseJsonAs[A]).rethrow
} }

View File

@ -6,7 +6,7 @@ import java.net.URLEncoder
import cats.data.NonEmptyList import cats.data.NonEmptyList
import cats.effect.Resource import cats.effect.Resource
import cats.effect.{Blocker, ContextShift, Sync} import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
@ -66,20 +66,17 @@ case class LenientUri(
) )
} }
def readURL[F[_]: Sync: ContextShift]( def readURL[F[_]: Sync](chunkSize: Int): Stream[F, Byte] =
chunkSize: Int,
blocker: Blocker
): Stream[F, Byte] =
Stream Stream
.emit(Either.catchNonFatal(new URL(asString))) .emit(Either.catchNonFatal(new URL(asString)))
.covary[F] .covary[F]
.rethrow .rethrow
.flatMap(url => .flatMap(url =>
fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, blocker, true) fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, true)
) )
def readText[F[_]: Sync: ContextShift](chunkSize: Int, blocker: Blocker): F[String] = def readText[F[_]: Sync](chunkSize: Int): F[String] =
readURL[F](chunkSize, blocker).through(fs2.text.utf8Decode).compile.foldMonoid readURL[F](chunkSize).through(fs2.text.utf8Decode).compile.foldMonoid
def host: Option[String] = def host: Option[String] =
authority.map(a => authority.map(a =>

View File

@ -2,13 +2,10 @@ package docspell.common
import scala.concurrent.ExecutionContext import scala.concurrent.ExecutionContext
import cats.effect._
/** Captures thread pools to use in an application. /** Captures thread pools to use in an application.
*/ */
case class Pools( case class Pools(
connectEC: ExecutionContext, connectEC: ExecutionContext,
httpClientEC: ExecutionContext, httpClientEC: ExecutionContext,
blocker: Blocker,
restEC: ExecutionContext restEC: ExecutionContext
) )

View File

@ -7,7 +7,7 @@ import java.util.concurrent.TimeUnit
import scala.jdk.CollectionConverters._ import scala.jdk.CollectionConverters._
import cats.effect.{Blocker, ContextShift, Sync} import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.{Stream, io, text} import fs2.{Stream, io, text}
@ -34,9 +34,8 @@ object SystemCommand {
final case class Result(rc: Int, stdout: String, stderr: String) final case class Result(rc: Int, stdout: String, stderr: String)
def exec[F[_]: Sync: ContextShift]( def exec[F[_]: Sync](
cmd: Config, cmd: Config,
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
wd: Option[Path] = None, wd: Option[Path] = None,
stdin: Stream[F, Byte] = Stream.empty stdin: Stream[F, Byte] = Stream.empty
@ -44,8 +43,8 @@ object SystemCommand {
startProcess(cmd, wd, logger, stdin) { proc => startProcess(cmd, wd, logger, stdin) { proc =>
Stream.eval { Stream.eval {
for { for {
_ <- writeToProcess(stdin, proc, blocker) _ <- writeToProcess(stdin, proc)
term <- Sync[F].delay(proc.waitFor(cmd.timeout.seconds, TimeUnit.SECONDS)) term <- Sync[F].blocking(proc.waitFor(cmd.timeout.seconds, TimeUnit.SECONDS))
_ <- _ <-
if (term) if (term)
logger.debug(s"Command `${cmd.cmdString}` finished: ${proc.exitValue}") logger.debug(s"Command `${cmd.cmdString}` finished: ${proc.exitValue}")
@ -55,23 +54,22 @@ object SystemCommand {
) )
_ <- if (!term) timeoutError(proc, cmd) else Sync[F].pure(()) _ <- if (!term) timeoutError(proc, cmd) else Sync[F].pure(())
out <- out <-
if (term) inputStreamToString(proc.getInputStream, blocker) if (term) inputStreamToString(proc.getInputStream)
else Sync[F].pure("") else Sync[F].pure("")
err <- err <-
if (term) inputStreamToString(proc.getErrorStream, blocker) if (term) inputStreamToString(proc.getErrorStream)
else Sync[F].pure("") else Sync[F].pure("")
} yield Result(proc.exitValue, out, err) } yield Result(proc.exitValue, out, err)
} }
} }
def execSuccess[F[_]: Sync: ContextShift]( def execSuccess[F[_]: Sync](
cmd: Config, cmd: Config,
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
wd: Option[Path] = None, wd: Option[Path] = None,
stdin: Stream[F, Byte] = Stream.empty stdin: Stream[F, Byte] = Stream.empty
): Stream[F, Result] = ): Stream[F, Result] =
exec(cmd, blocker, logger, wd, stdin).flatMap { r => exec(cmd, logger, wd, stdin).flatMap { r =>
if (r.rc != 0) if (r.rc != 0)
Stream.raiseError[F]( Stream.raiseError[F](
new Exception( new Exception(
@ -92,7 +90,7 @@ object SystemCommand {
val log = logger.debug(s"Running external command: ${cmd.cmdString}") val log = logger.debug(s"Running external command: ${cmd.cmdString}")
val hasStdin = stdin.take(1).compile.last.map(_.isDefined) val hasStdin = stdin.take(1).compile.last.map(_.isDefined)
val proc = log *> hasStdin.flatMap(flag => val proc = log *> hasStdin.flatMap(flag =>
Sync[F].delay { Sync[F].blocking {
val pb = new ProcessBuilder(cmd.toCmd.asJava) val pb = new ProcessBuilder(cmd.toCmd.asJava)
.redirectInput(if (flag) Redirect.PIPE else Redirect.INHERIT) .redirectInput(if (flag) Redirect.PIPE else Redirect.INHERIT)
.redirectError(Redirect.PIPE) .redirectError(Redirect.PIPE)
@ -109,11 +107,8 @@ object SystemCommand {
.flatMap(f) .flatMap(f)
} }
private def inputStreamToString[F[_]: Sync: ContextShift]( private def inputStreamToString[F[_]: Sync](in: InputStream): F[String] =
in: InputStream, io.readInputStream(Sync[F].pure(in), 16 * 1024, closeAfterUse = false)
blocker: Blocker
): F[String] =
io.readInputStream(Sync[F].pure(in), 16 * 1024, blocker, closeAfterUse = false)
.through(text.utf8Decode) .through(text.utf8Decode)
.chunks .chunks
.map(_.toVector.mkString) .map(_.toVector.mkString)
@ -122,18 +117,17 @@ object SystemCommand {
.last .last
.map(_.getOrElse("")) .map(_.getOrElse(""))
private def writeToProcess[F[_]: Sync: ContextShift]( private def writeToProcess[F[_]: Sync](
data: Stream[F, Byte], data: Stream[F, Byte],
proc: Process, proc: Process
blocker: Blocker
): F[Unit] = ): F[Unit] =
data data
.through(io.writeOutputStream(Sync[F].delay(proc.getOutputStream), blocker)) .through(io.writeOutputStream(Sync[F].blocking(proc.getOutputStream)))
.compile .compile
.drain .drain
private def timeoutError[F[_]: Sync](proc: Process, cmd: Config): F[Unit] = private def timeoutError[F[_]: Sync](proc: Process, cmd: Config): F[Unit] =
Sync[F].delay(proc.destroyForcibly()).attempt *> { Sync[F].blocking(proc.destroyForcibly()).attempt *> {
Sync[F].raiseError( Sync[F].raiseError(
new Exception( new Exception(
s"Command `${cmd.cmdString}` timed out (${cmd.timeout.formatExact})" s"Command `${cmd.cmdString}` timed out (${cmd.timeout.formatExact})"

View File

@ -12,6 +12,8 @@ import docspell.convert.extern._
import docspell.convert.flexmark.Markdown import docspell.convert.flexmark.Markdown
import docspell.files.{ImageSize, TikaMimetype} import docspell.files.{ImageSize, TikaMimetype}
import scodec.bits.ByteVector
trait Conversion[F[_]] { trait Conversion[F[_]] {
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])( def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(
@ -22,10 +24,9 @@ trait Conversion[F[_]] {
object Conversion { object Conversion {
def create[F[_]: Sync: ContextShift]( def create[F[_]: Async](
cfg: ConvertConfig, cfg: ConvertConfig,
sanitizeHtml: SanitizeHtml, sanitizeHtml: SanitizeHtml,
blocker: Blocker,
logger: Logger[F] logger: Logger[F]
): Resource[F, Conversion[F]] = ): Resource[F, Conversion[F]] =
Resource.pure[F, Conversion[F]](new Conversion[F] { Resource.pure[F, Conversion[F]](new Conversion[F] {
@ -36,12 +37,12 @@ object Conversion {
TikaMimetype.resolve(dataType, in).flatMap { TikaMimetype.resolve(dataType, in).flatMap {
case MimeType.PdfMatch(_) => case MimeType.PdfMatch(_) =>
OcrMyPdf OcrMyPdf
.toPDF(cfg.ocrmypdf, lang, cfg.chunkSize, blocker, logger)(in, handler) .toPDF(cfg.ocrmypdf, lang, cfg.chunkSize, logger)(in, handler)
case MimeType.HtmlMatch(mt) => case MimeType.HtmlMatch(mt) =>
val cs = mt.charsetOrUtf8 val cs = mt.charsetOrUtf8
WkHtmlPdf WkHtmlPdf
.toPDF(cfg.wkhtmlpdf, cfg.chunkSize, cs, sanitizeHtml, blocker, logger)( .toPDF(cfg.wkhtmlpdf, cfg.chunkSize, cs, sanitizeHtml, logger)(
in, in,
handler handler
) )
@ -50,14 +51,15 @@ object Conversion {
val cs = mt.charsetOrUtf8 val cs = mt.charsetOrUtf8
Markdown.toHtml(in, cfg.markdown, cs).flatMap { html => Markdown.toHtml(in, cfg.markdown, cs).flatMap { html =>
val bytes = Stream val bytes = Stream
.chunk(Chunk.bytes(html.getBytes(StandardCharsets.UTF_8))) .chunk(
Chunk.byteVector(ByteVector.view(html.getBytes(StandardCharsets.UTF_8)))
)
.covary[F] .covary[F]
WkHtmlPdf.toPDF( WkHtmlPdf.toPDF(
cfg.wkhtmlpdf, cfg.wkhtmlpdf,
cfg.chunkSize, cfg.chunkSize,
StandardCharsets.UTF_8, StandardCharsets.UTF_8,
sanitizeHtml, sanitizeHtml,
blocker,
logger logger
)(bytes, handler) )(bytes, handler)
} }
@ -77,7 +79,7 @@ object Conversion {
) )
) )
else else
Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, blocker, logger)( Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, logger)(
in, in,
handler handler
) )
@ -86,14 +88,14 @@ object Conversion {
logger.info( logger.info(
s"Cannot read image when determining size for ${mt.asString}. Converting anyways." s"Cannot read image when determining size for ${mt.asString}. Converting anyways."
) *> ) *>
Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, blocker, logger)( Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, logger)(
in, in,
handler handler
) )
} }
case Office(_) => case Office(_) =>
Unoconv.toPDF(cfg.unoconv, cfg.chunkSize, blocker, logger)(in, handler) Unoconv.toPDF(cfg.unoconv, cfg.chunkSize, logger)(in, handler)
case mt => case mt =>
handler.run(ConversionResult.unsupportedFormat(mt)) handler.run(ConversionResult.unsupportedFormat(mt))

View File

@ -4,6 +4,7 @@ import java.nio.file.Path
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.io.file.Files
import fs2.{Pipe, Stream} import fs2.{Pipe, Stream}
import docspell.common._ import docspell.common._
@ -12,12 +13,11 @@ import docspell.convert.ConversionResult.{Handler, successPdf, successPdfTxt}
private[extern] object ExternConv { private[extern] object ExternConv {
def toPDF[F[_]: Sync: ContextShift, A]( def toPDF[F[_]: Async, A](
name: String, name: String,
cmdCfg: SystemCommand.Config, cmdCfg: SystemCommand.Config,
wd: Path, wd: Path,
useStdin: Boolean, useStdin: Boolean,
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] reader: (Path, SystemCommand.Result) => F[ConversionResult[F]]
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = )(in: Stream[F, Byte], handler: Handler[F, A]): F[A] =
@ -37,13 +37,12 @@ private[extern] object ExternConv {
val createInput: Pipe[F, Byte, Unit] = val createInput: Pipe[F, Byte, Unit] =
if (useStdin) _ => Stream.emit(()) if (useStdin) _ => Stream.emit(())
else storeDataToFile(name, blocker, logger, inFile) else storeDataToFile(name, logger, inFile)
in.through(createInput).flatMap { _ => in.through(createInput).flatMap { _ =>
SystemCommand SystemCommand
.exec[F]( .exec[F](
sysCfg, sysCfg,
blocker,
logger, logger,
Some(dir), Some(dir),
if (useStdin) in if (useStdin) in
@ -66,8 +65,7 @@ private[extern] object ExternConv {
handler.run(ConversionResult.failure(ex)) handler.run(ConversionResult.failure(ex))
} }
def readResult[F[_]: Sync: ContextShift]( def readResult[F[_]: Async](
blocker: Blocker,
chunkSize: Int, chunkSize: Int,
logger: Logger[F] logger: Logger[F]
)(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] = )(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] =
@ -77,15 +75,15 @@ private[extern] object ExternConv {
File.existsNonEmpty[F](outTxt).flatMap { File.existsNonEmpty[F](outTxt).flatMap {
case true => case true =>
successPdfTxt( successPdfTxt(
File.readAll(out, blocker, chunkSize), File.readAll(out, chunkSize),
File.readText(outTxt, blocker) File.readText(outTxt)
).pure[F] ).pure[F]
case false => case false =>
successPdf(File.readAll(out, blocker, chunkSize)).pure[F] successPdf(File.readAll(out, chunkSize)).pure[F]
} }
case true => case true =>
logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *> logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
successPdf(File.readAll(out, blocker, chunkSize)).pure[F] successPdf(File.readAll(out, chunkSize)).pure[F]
case false => case false =>
ConversionResult ConversionResult
@ -95,9 +93,8 @@ private[extern] object ExternConv {
.pure[F] .pure[F]
} }
def readResultTesseract[F[_]: Sync: ContextShift]( def readResultTesseract[F[_]: Async](
outPrefix: String, outPrefix: String,
blocker: Blocker,
chunkSize: Int, chunkSize: Int,
logger: Logger[F] logger: Logger[F]
)(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] = { )(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] = {
@ -106,9 +103,9 @@ private[extern] object ExternConv {
case true => case true =>
val outTxt = out.resolveSibling(s"$outPrefix.txt") val outTxt = out.resolveSibling(s"$outPrefix.txt")
File.exists(outTxt).flatMap { txtExists => File.exists(outTxt).flatMap { txtExists =>
val pdfData = File.readAll(out, blocker, chunkSize) val pdfData = File.readAll(out, chunkSize)
if (result.rc == 0) if (result.rc == 0)
if (txtExists) successPdfTxt(pdfData, File.readText(outTxt, blocker)).pure[F] if (txtExists) successPdfTxt(pdfData, File.readText(outTxt)).pure[F]
else successPdf(pdfData).pure[F] else successPdf(pdfData).pure[F]
else else
logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *> logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
@ -124,9 +121,8 @@ private[extern] object ExternConv {
} }
} }
private def storeDataToFile[F[_]: Sync: ContextShift]( private def storeDataToFile[F[_]: Async](
name: String, name: String,
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
inFile: Path inFile: Path
): Pipe[F, Byte, Unit] = ): Pipe[F, Byte, Unit] =
@ -134,7 +130,7 @@ private[extern] object ExternConv {
Stream Stream
.eval(logger.debug(s"Storing input to file ${inFile} for running $name")) .eval(logger.debug(s"Storing input to file ${inFile} for running $name"))
.drain ++ .drain ++
Stream.eval(storeFile(in, inFile, blocker)) Stream.eval(storeFile(in, inFile))
private def logResult[F[_]: Sync]( private def logResult[F[_]: Sync](
name: String, name: String,
@ -144,10 +140,9 @@ private[extern] object ExternConv {
logger.debug(s"$name stdout: ${result.stdout}") *> logger.debug(s"$name stdout: ${result.stdout}") *>
logger.debug(s"$name stderr: ${result.stderr}") logger.debug(s"$name stderr: ${result.stderr}")
private def storeFile[F[_]: Sync: ContextShift]( private def storeFile[F[_]: Async](
in: Stream[F, Byte], in: Stream[F, Byte],
target: Path, target: Path
blocker: Blocker
): F[Unit] = ): F[Unit] =
in.through(fs2.io.file.writeAll(target, blocker)).compile.drain in.through(Files[F].writeAll(target)).compile.drain
} }

View File

@ -11,23 +11,21 @@ import docspell.convert.ConversionResult.Handler
object OcrMyPdf { object OcrMyPdf {
def toPDF[F[_]: Sync: ContextShift, A]( def toPDF[F[_]: Async, A](
cfg: OcrMyPdfConfig, cfg: OcrMyPdfConfig,
lang: Language, lang: Language,
chunkSize: Int, chunkSize: Int,
blocker: Blocker,
logger: Logger[F] logger: Logger[F]
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = )(in: Stream[F, Byte], handler: Handler[F, A]): F[A] =
if (cfg.enabled) { if (cfg.enabled) {
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] = val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
ExternConv.readResult[F](blocker, chunkSize, logger) ExternConv.readResult[F](chunkSize, logger)
ExternConv.toPDF[F, A]( ExternConv.toPDF[F, A](
"ocrmypdf", "ocrmypdf",
cfg.command.replace(Map("{{lang}}" -> lang.iso3)), cfg.command.replace(Map("{{lang}}" -> lang.iso3)),
cfg.workingDir, cfg.workingDir,
false, false,
blocker,
logger, logger,
reader reader
)(in, handler) )(in, handler)

View File

@ -11,23 +11,21 @@ import docspell.convert.ConversionResult.Handler
object Tesseract { object Tesseract {
def toPDF[F[_]: Sync: ContextShift, A]( def toPDF[F[_]: Async, A](
cfg: TesseractConfig, cfg: TesseractConfig,
lang: Language, lang: Language,
chunkSize: Int, chunkSize: Int,
blocker: Blocker,
logger: Logger[F] logger: Logger[F]
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = { )(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
val outBase = cfg.command.args.tail.headOption.getOrElse("out") val outBase = cfg.command.args.tail.headOption.getOrElse("out")
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] = val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
ExternConv.readResultTesseract[F](outBase, blocker, chunkSize, logger) ExternConv.readResultTesseract[F](outBase, chunkSize, logger)
ExternConv.toPDF[F, A]( ExternConv.toPDF[F, A](
"tesseract", "tesseract",
cfg.command.replace(Map("{{lang}}" -> lang.iso3)), cfg.command.replace(Map("{{lang}}" -> lang.iso3)),
cfg.workingDir, cfg.workingDir,
false, false,
blocker,
logger, logger,
reader reader
)(in, handler) )(in, handler)

View File

@ -11,21 +11,19 @@ import docspell.convert.ConversionResult.Handler
object Unoconv { object Unoconv {
def toPDF[F[_]: Sync: ContextShift, A]( def toPDF[F[_]: Async, A](
cfg: UnoconvConfig, cfg: UnoconvConfig,
chunkSize: Int, chunkSize: Int,
blocker: Blocker,
logger: Logger[F] logger: Logger[F]
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = { )(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] = val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
ExternConv.readResult[F](blocker, chunkSize, logger) ExternConv.readResult[F](chunkSize, logger)
ExternConv.toPDF[F, A]( ExternConv.toPDF[F, A](
"unoconv", "unoconv",
cfg.command, cfg.command,
cfg.workingDir, cfg.workingDir,
false, false,
blocker,
logger, logger,
reader reader
)( )(

View File

@ -13,16 +13,15 @@ import docspell.convert.{ConversionResult, SanitizeHtml}
object WkHtmlPdf { object WkHtmlPdf {
def toPDF[F[_]: Sync: ContextShift, A]( def toPDF[F[_]: Async, A](
cfg: WkHtmlPdfConfig, cfg: WkHtmlPdfConfig,
chunkSize: Int, chunkSize: Int,
charset: Charset, charset: Charset,
sanitizeHtml: SanitizeHtml, sanitizeHtml: SanitizeHtml,
blocker: Blocker,
logger: Logger[F] logger: Logger[F]
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = { )(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] = val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
ExternConv.readResult[F](blocker, chunkSize, logger) ExternConv.readResult[F](chunkSize, logger)
val cmdCfg = cfg.command.replace(Map("{{encoding}}" -> charset.name())) val cmdCfg = cfg.command.replace(Map("{{encoding}}" -> charset.name()))
@ -40,7 +39,7 @@ object WkHtmlPdf {
) )
ExternConv ExternConv
.toPDF[F, A]("wkhtmltopdf", cmdCfg, cfg.workingDir, true, blocker, logger, reader)( .toPDF[F, A]("wkhtmltopdf", cmdCfg, cfg.workingDir, true, logger, reader)(
inSane, inSane,
handler handler
) )

View File

@ -4,6 +4,7 @@ import java.nio.file.Paths
import cats.data.Kleisli import cats.data.Kleisli
import cats.effect.IO import cats.effect.IO
import cats.effect.unsafe.implicits.global
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
@ -12,13 +13,11 @@ import docspell.convert.ConversionResult.Handler
import docspell.convert.extern.OcrMyPdfConfig import docspell.convert.extern.OcrMyPdfConfig
import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig} import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig}
import docspell.convert.flexmark.MarkdownConfig import docspell.convert.flexmark.MarkdownConfig
import docspell.files.{ExampleFiles, TestFiles} import docspell.files.ExampleFiles
import munit._ import munit._
class ConversionTest extends FunSuite with FileChecks { class ConversionTest extends FunSuite with FileChecks {
val blocker = TestFiles.blocker
implicit val CS = TestFiles.CS
val logger = Logger.log4s[IO](org.log4s.getLogger) val logger = Logger.log4s[IO](org.log4s.getLogger)
val target = Paths.get("target") val target = Paths.get("target")
@ -73,7 +72,7 @@ class ConversionTest extends FunSuite with FileChecks {
) )
val conversion = val conversion =
Conversion.create[IO](convertConfig, SanitizeHtml.none, blocker, logger) Conversion.create[IO](convertConfig, SanitizeHtml.none, logger)
val bombs = List( val bombs = List(
ExampleFiles.bombs_20K_gray_jpeg, ExampleFiles.bombs_20K_gray_jpeg,
@ -167,7 +166,7 @@ class ConversionTest extends FunSuite with FileChecks {
.covary[IO] .covary[IO]
.zipWithIndex .zipWithIndex
.evalMap({ case (uri, index) => .evalMap({ case (uri, index) =>
val load = uri.readURL[IO](8192, blocker) val load = uri.readURL[IO](8192)
val dataType = DataType.filename(uri.path.segments.last) val dataType = DataType.filename(uri.path.segments.last)
logger.info(s"Processing file ${uri.path.asString}") *> logger.info(s"Processing file ${uri.path.asString}") *>
conv.toPDF(dataType, Language.German, handler(index))(load) conv.toPDF(dataType, Language.German, handler(index))(load)

View File

@ -5,6 +5,7 @@ import java.nio.file.{Files, Path}
import cats.data.Kleisli import cats.data.Kleisli
import cats.effect.IO import cats.effect.IO
import cats.effect.unsafe.implicits.global
import fs2.{Pipe, Stream} import fs2.{Pipe, Stream}
import docspell.common.MimeType import docspell.common.MimeType

View File

@ -4,19 +4,18 @@ import java.nio.charset.StandardCharsets
import java.nio.file.{Path, Paths} import java.nio.file.{Path, Paths}
import cats.effect._ import cats.effect._
import cats.effect.unsafe.implicits.global
import docspell.common._ import docspell.common._
import docspell.convert._ import docspell.convert._
import docspell.files.{ExampleFiles, TestFiles} import docspell.files.ExampleFiles
import munit._ import munit._
class ExternConvTest extends FunSuite with FileChecks { class ExternConvTest extends FunSuite with FileChecks {
val blocker = TestFiles.blocker val utf8 = StandardCharsets.UTF_8
implicit val CS = TestFiles.CS val logger = Logger.log4s[IO](org.log4s.getLogger)
val utf8 = StandardCharsets.UTF_8 val target = Paths.get("target")
val logger = Logger.log4s[IO](org.log4s.getLogger)
val target = Paths.get("target")
test("convert html to pdf") { test("convert html to pdf") {
val cfg = SystemCommand.Config( val cfg = SystemCommand.Config(
@ -32,8 +31,8 @@ class ExternConvTest extends FunSuite with FileChecks {
val wkCfg = WkHtmlPdfConfig(cfg, target) val wkCfg = WkHtmlPdfConfig(cfg, target)
val p = val p =
WkHtmlPdf WkHtmlPdf
.toPDF[IO, Path](wkCfg, 8192, utf8, SanitizeHtml.none, blocker, logger)( .toPDF[IO, Path](wkCfg, 8192, utf8, SanitizeHtml.none, logger)(
ExampleFiles.letter_de_html.readURL[IO](8192, blocker), ExampleFiles.letter_de_html.readURL[IO](8192),
storePdfHandler(dir.resolve("test.pdf")) storePdfHandler(dir.resolve("test.pdf"))
) )
.unsafeRunSync() .unsafeRunSync()
@ -59,8 +58,8 @@ class ExternConvTest extends FunSuite with FileChecks {
val ucCfg = UnoconvConfig(cfg, target) val ucCfg = UnoconvConfig(cfg, target)
val p = val p =
Unoconv Unoconv
.toPDF[IO, Path](ucCfg, 8192, blocker, logger)( .toPDF[IO, Path](ucCfg, 8192, logger)(
ExampleFiles.examples_sample_docx.readURL[IO](8192, blocker), ExampleFiles.examples_sample_docx.readURL[IO](8192),
storePdfHandler(dir.resolve("test.pdf")) storePdfHandler(dir.resolve("test.pdf"))
) )
.unsafeRunSync() .unsafeRunSync()
@ -85,8 +84,8 @@ class ExternConvTest extends FunSuite with FileChecks {
val tessCfg = TesseractConfig(cfg, target) val tessCfg = TesseractConfig(cfg, target)
val (pdf, txt) = val (pdf, txt) =
Tesseract Tesseract
.toPDF[IO, (Path, Path)](tessCfg, Language.German, 8192, blocker, logger)( .toPDF[IO, (Path, Path)](tessCfg, Language.German, 8192, logger)(
ExampleFiles.camera_letter_en_jpg.readURL[IO](8192, blocker), ExampleFiles.camera_letter_en_jpg.readURL[IO](8192),
storePdfTxtHandler(dir.resolve("test.pdf"), dir.resolve("test.txt")) storePdfTxtHandler(dir.resolve("test.pdf"), dir.resolve("test.txt"))
) )
.unsafeRunSync() .unsafeRunSync()

View File

@ -25,8 +25,7 @@ trait Extraction[F[_]] {
object Extraction { object Extraction {
def create[F[_]: Sync: ContextShift]( def create[F[_]: Async](
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
cfg: ExtractConfig cfg: ExtractConfig
): Extraction[F] = ): Extraction[F] =
@ -39,7 +38,7 @@ object Extraction {
TikaMimetype.resolve(dataType, data).flatMap { TikaMimetype.resolve(dataType, data).flatMap {
case MimeType.PdfMatch(_) => case MimeType.PdfMatch(_) =>
PdfExtract PdfExtract
.get(data, blocker, lang, cfg.pdf.minTextLen, cfg.ocr, logger) .get(data, lang, cfg.pdf.minTextLen, cfg.ocr, logger)
.map(ExtractResult.fromEitherResult) .map(ExtractResult.fromEitherResult)
case PoiType(mt) => case PoiType(mt) =>
@ -59,7 +58,7 @@ object Extraction {
case OcrType(mt) => case OcrType(mt) =>
val doExtract = TextExtract val doExtract = TextExtract
.extractOCR(data, blocker, logger, lang.iso3, cfg.ocr) .extractOCR(data, logger, lang.iso3, cfg.ocr)
.compile .compile
.lastOrError .lastOrError
.map(_.value) .map(_.value)

View File

@ -17,9 +17,8 @@ object PdfExtract {
Result(t._1, t._2) Result(t._1, t._2)
} }
def get[F[_]: Sync: ContextShift]( def get[F[_]: Async](
in: Stream[F, Byte], in: Stream[F, Byte],
blocker: Blocker,
lang: Language, lang: Language,
stripMinLen: Int, stripMinLen: Int,
ocrCfg: OcrConfig, ocrCfg: OcrConfig,
@ -27,7 +26,7 @@ object PdfExtract {
): F[Either[Throwable, Result]] = { ): F[Either[Throwable, Result]] = {
val runOcr = val runOcr =
TextExtract.extractOCR(in, blocker, logger, lang.iso3, ocrCfg).compile.lastOrError TextExtract.extractOCR(in, logger, lang.iso3, ocrCfg).compile.lastOrError
def chooseResult(ocrStr: Text, strippedRes: (Text, Option[PdfMetaData])) = def chooseResult(ocrStr: Text, strippedRes: (Text, Option[PdfMetaData])) =
if (ocrStr.length > strippedRes._1.length) if (ocrStr.length > strippedRes._1.length)

View File

@ -2,7 +2,7 @@ package docspell.extract.ocr
import java.nio.file.Path import java.nio.file.Path
import cats.effect.{Blocker, ContextShift, Sync} import cats.effect._
import fs2.Stream import fs2.Stream
import docspell.common._ import docspell.common._
@ -11,16 +11,15 @@ object Ocr {
/** Extract the text of all pages in the given pdf file. /** Extract the text of all pages in the given pdf file.
*/ */
def extractPdf[F[_]: Sync: ContextShift]( def extractPdf[F[_]: Async](
pdf: Stream[F, Byte], pdf: Stream[F, Byte],
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
lang: String, lang: String,
config: OcrConfig config: OcrConfig
): F[Option[String]] = ): F[Option[String]] =
File.withTempDir(config.ghostscript.workingDir, "extractpdf").use { wd => File.withTempDir(config.ghostscript.workingDir, "extractpdf").use { wd =>
runGhostscript(pdf, config, wd, blocker, logger) runGhostscript(pdf, config, wd, logger)
.flatMap(tmpImg => runTesseractFile(tmpImg, blocker, logger, lang, config)) .flatMap(tmpImg => runTesseractFile(tmpImg, logger, lang, config))
.fold1(_ + "\n\n\n" + _) .fold1(_ + "\n\n\n" + _)
.compile .compile
.last .last
@ -28,47 +27,43 @@ object Ocr {
/** Extract the text from the given image file /** Extract the text from the given image file
*/ */
def extractImage[F[_]: Sync: ContextShift]( def extractImage[F[_]: Async](
img: Stream[F, Byte], img: Stream[F, Byte],
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
lang: String, lang: String,
config: OcrConfig config: OcrConfig
): Stream[F, String] = ): Stream[F, String] =
runTesseractStdin(img, blocker, logger, lang, config) runTesseractStdin(img, logger, lang, config)
def extractPdFFile[F[_]: Sync: ContextShift]( def extractPdFFile[F[_]: Async](
pdf: Path, pdf: Path,
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
lang: String, lang: String,
config: OcrConfig config: OcrConfig
): F[Option[String]] = ): F[Option[String]] =
File.withTempDir(config.ghostscript.workingDir, "extractpdf").use { wd => File.withTempDir(config.ghostscript.workingDir, "extractpdf").use { wd =>
runGhostscriptFile(pdf, config.ghostscript.command, wd, blocker, logger) runGhostscriptFile(pdf, config.ghostscript.command, wd, logger)
.flatMap(tif => runTesseractFile(tif, blocker, logger, lang, config)) .flatMap(tif => runTesseractFile(tif, logger, lang, config))
.fold1(_ + "\n\n\n" + _) .fold1(_ + "\n\n\n" + _)
.compile .compile
.last .last
} }
def extractImageFile[F[_]: Sync: ContextShift]( def extractImageFile[F[_]: Async](
img: Path, img: Path,
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
lang: String, lang: String,
config: OcrConfig config: OcrConfig
): Stream[F, String] = ): Stream[F, String] =
runTesseractFile(img, blocker, logger, lang, config) runTesseractFile(img, logger, lang, config)
/** Run ghostscript to extract all pdf pages into tiff files. The /** Run ghostscript to extract all pdf pages into tiff files. The
* files are stored to a temporary location on disk and returned. * files are stored to a temporary location on disk and returned.
*/ */
private[extract] def runGhostscript[F[_]: Sync: ContextShift]( private[extract] def runGhostscript[F[_]: Async](
pdf: Stream[F, Byte], pdf: Stream[F, Byte],
cfg: OcrConfig, cfg: OcrConfig,
wd: Path, wd: Path,
blocker: Blocker,
logger: Logger[F] logger: Logger[F]
): Stream[F, Path] = { ): Stream[F, Path] = {
val xargs = val xargs =
@ -84,19 +79,18 @@ object Ocr {
) )
) )
SystemCommand SystemCommand
.execSuccess(cmd, blocker, logger, wd = Some(wd), stdin = pdf) .execSuccess(cmd, logger, wd = Some(wd), stdin = pdf)
.evalMap(_ => File.listFiles(pathEndsWith(".tif"), wd)) .evalMap(_ => File.listJFiles(pathEndsWith(".tif"), wd))
.flatMap(fs => Stream.emits(fs)) .flatMap(fs => Stream.emits(fs))
} }
/** Run ghostscript to extract all pdf pages into tiff files. The /** Run ghostscript to extract all pdf pages into tiff files. The
* files are stored to a temporary location on disk and returned. * files are stored to a temporary location on disk and returned.
*/ */
private[extract] def runGhostscriptFile[F[_]: Sync: ContextShift]( private[extract] def runGhostscriptFile[F[_]: Async](
pdf: Path, pdf: Path,
ghostscript: SystemCommand.Config, ghostscript: SystemCommand.Config,
wd: Path, wd: Path,
blocker: Blocker,
logger: Logger[F] logger: Logger[F]
): Stream[F, Path] = { ): Stream[F, Path] = {
val cmd = ghostscript.replace( val cmd = ghostscript.replace(
@ -106,8 +100,8 @@ object Ocr {
) )
) )
SystemCommand SystemCommand
.execSuccess[F](cmd, blocker, logger, wd = Some(wd)) .execSuccess[F](cmd, logger, wd = Some(wd))
.evalMap(_ => File.listFiles(pathEndsWith(".tif"), wd)) .evalMap(_ => File.listJFiles(pathEndsWith(".tif"), wd))
.flatMap(fs => Stream.emits(fs)) .flatMap(fs => Stream.emits(fs))
} }
@ -117,11 +111,10 @@ object Ocr {
/** Run unpaper to optimize the image for ocr. The /** Run unpaper to optimize the image for ocr. The
* files are stored to a temporary location on disk and returned. * files are stored to a temporary location on disk and returned.
*/ */
private[extract] def runUnpaperFile[F[_]: Sync: ContextShift]( private[extract] def runUnpaperFile[F[_]: Async](
img: Path, img: Path,
unpaper: SystemCommand.Config, unpaper: SystemCommand.Config,
wd: Path, wd: Path,
blocker: Blocker,
logger: Logger[F] logger: Logger[F]
): Stream[F, Path] = { ): Stream[F, Path] = {
val targetFile = img.resolveSibling("u-" + img.getFileName.toString).toAbsolutePath val targetFile = img.resolveSibling("u-" + img.getFileName.toString).toAbsolutePath
@ -132,7 +125,7 @@ object Ocr {
) )
) )
SystemCommand SystemCommand
.execSuccess[F](cmd, blocker, logger, wd = Some(wd)) .execSuccess[F](cmd, logger, wd = Some(wd))
.map(_ => targetFile) .map(_ => targetFile)
.handleErrorWith { th => .handleErrorWith { th =>
logger logger
@ -146,39 +139,36 @@ object Ocr {
/** Run tesseract on the given image file and return the extracted /** Run tesseract on the given image file and return the extracted
* text. * text.
*/ */
private[extract] def runTesseractFile[F[_]: Sync: ContextShift]( private[extract] def runTesseractFile[F[_]: Async](
img: Path, img: Path,
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
lang: String, lang: String,
config: OcrConfig config: OcrConfig
): Stream[F, String] = ): Stream[F, String] =
// tesseract cannot cope with absolute filenames // tesseract cannot cope with absolute filenames
// so use the parent as working dir // so use the parent as working dir
runUnpaperFile(img, config.unpaper.command, img.getParent, blocker, logger).flatMap { runUnpaperFile(img, config.unpaper.command, img.getParent, logger).flatMap { uimg =>
uimg => val cmd = config.tesseract.command
val cmd = config.tesseract.command .replace(
.replace( Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang))
Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang)) )
) SystemCommand
SystemCommand .execSuccess[F](cmd, logger, wd = Some(uimg.getParent))
.execSuccess[F](cmd, blocker, logger, wd = Some(uimg.getParent)) .map(_.stdout)
.map(_.stdout)
} }
/** Run tesseract on the given image file and return the extracted /** Run tesseract on the given image file and return the extracted
* text. * text.
*/ */
private[extract] def runTesseractStdin[F[_]: Sync: ContextShift]( private[extract] def runTesseractStdin[F[_]: Async](
img: Stream[F, Byte], img: Stream[F, Byte],
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
lang: String, lang: String,
config: OcrConfig config: OcrConfig
): Stream[F, String] = { ): Stream[F, String] = {
val cmd = config.tesseract.command val cmd = config.tesseract.command
.replace(Map("{{file}}" -> "stdin", "{{lang}}" -> fixLanguage(lang))) .replace(Map("{{file}}" -> "stdin", "{{lang}}" -> fixLanguage(lang)))
SystemCommand.execSuccess(cmd, blocker, logger, stdin = img).map(_.stdout) SystemCommand.execSuccess(cmd, logger, stdin = img).map(_.stdout)
} }
private def fixLanguage(lang: String): String = private def fixLanguage(lang: String): String =

View File

@ -1,6 +1,6 @@
package docspell.extract.ocr package docspell.extract.ocr
import cats.effect.{Blocker, ContextShift, Sync} import cats.effect._
import fs2.Stream import fs2.Stream
import docspell.common._ import docspell.common._
@ -9,18 +9,16 @@ import docspell.files._
object TextExtract { object TextExtract {
def extract[F[_]: Sync: ContextShift]( def extract[F[_]: Async](
in: Stream[F, Byte], in: Stream[F, Byte],
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
lang: String, lang: String,
config: OcrConfig config: OcrConfig
): Stream[F, Text] = ): Stream[F, Text] =
extractOCR(in, blocker, logger, lang, config) extractOCR(in, logger, lang, config)
def extractOCR[F[_]: Sync: ContextShift]( def extractOCR[F[_]: Async](
in: Stream[F, Byte], in: Stream[F, Byte],
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
lang: String, lang: String,
config: OcrConfig config: OcrConfig
@ -29,10 +27,10 @@ object TextExtract {
.eval(TikaMimetype.detect(in, MimeTypeHint.none)) .eval(TikaMimetype.detect(in, MimeTypeHint.none))
.flatMap({ .flatMap({
case MimeType.pdf => case MimeType.pdf =>
Stream.eval(Ocr.extractPdf(in, blocker, logger, lang, config)).unNoneTerminate Stream.eval(Ocr.extractPdf(in, logger, lang, config)).unNoneTerminate
case mt if mt.primary == "image" => case mt if mt.primary == "image" =>
Ocr.extractImage(in, blocker, logger, lang, config) Ocr.extractImage(in, logger, lang, config)
case mt => case mt =>
raiseError(s"File `$mt` not supported") raiseError(s"File `$mt` not supported")

View File

@ -12,6 +12,7 @@ import fs2.Stream
import org.apache.commons.io.output.ByteArrayOutputStream import org.apache.commons.io.output.ByteArrayOutputStream
import org.apache.pdfbox.pdmodel.PDDocument import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.rendering.PDFRenderer import org.apache.pdfbox.rendering.PDFRenderer
import scodec.bits.ByteVector
trait PdfboxPreview[F[_]] { trait PdfboxPreview[F[_]] {
@ -50,7 +51,7 @@ object PdfboxPreview {
private def pngStream[F[_]](img: RenderedImage): Stream[F, Byte] = { private def pngStream[F[_]](img: RenderedImage): Stream[F, Byte] = {
val out = new ByteArrayOutputStream() val out = new ByteArrayOutputStream()
ImageIO.write(img, "PNG", out) ImageIO.write(img, "PNG", out)
Stream.chunk(Chunk.bytes(out.toByteArray())) Stream.chunk(Chunk.byteVector(ByteVector.view(out.toByteArray())))
} }
} }

View File

@ -1,6 +1,7 @@
package docspell.extract.ocr package docspell.extract.ocr
import cats.effect.IO import cats.effect.IO
import cats.effect.unsafe.implicits.global
import docspell.common.Logger import docspell.common.Logger
import docspell.files.TestFiles import docspell.files.TestFiles
@ -14,7 +15,7 @@ class TextExtractionSuite extends FunSuite {
test("extract english pdf".ignore) { test("extract english pdf".ignore) {
val text = TextExtract val text = TextExtract
.extract[IO](letterSourceEN, blocker, logger, "eng", OcrConfig.default) .extract[IO](letterSourceEN, logger, "eng", OcrConfig.default)
.compile .compile
.lastOrError .lastOrError
.unsafeRunSync() .unsafeRunSync()
@ -24,7 +25,7 @@ class TextExtractionSuite extends FunSuite {
test("extract german pdf".ignore) { test("extract german pdf".ignore) {
val expect = TestFiles.letterDEText val expect = TestFiles.letterDEText
val extract = TextExtract val extract = TextExtract
.extract[IO](letterSourceDE, blocker, logger, "deu", OcrConfig.default) .extract[IO](letterSourceDE, logger, "deu", OcrConfig.default)
.compile .compile
.lastOrError .lastOrError
.unsafeRunSync() .unsafeRunSync()

View File

@ -1,14 +1,13 @@
package docspell.extract.odf package docspell.extract.odf
import cats.effect._ import cats.effect._
import cats.effect.unsafe.implicits.global
import docspell.files.{ExampleFiles, TestFiles} import docspell.files.ExampleFiles
import munit._ import munit._
class OdfExtractTest extends FunSuite { class OdfExtractTest extends FunSuite {
val blocker = TestFiles.blocker
implicit val CS = TestFiles.CS
val files = List( val files = List(
ExampleFiles.examples_sample_odt -> 6372, ExampleFiles.examples_sample_odt -> 6372,
@ -21,7 +20,7 @@ class OdfExtractTest extends FunSuite {
val str1 = OdfExtract.get(is).fold(throw _, identity) val str1 = OdfExtract.get(is).fold(throw _, identity)
assertEquals(str1.length, len) assertEquals(str1.length, len)
val data = file.readURL[IO](8192, blocker) val data = file.readURL[IO](8192)
val str2 = OdfExtract.get[IO](data).unsafeRunSync().fold(throw _, identity) val str2 = OdfExtract.get[IO](data).unsafeRunSync().fold(throw _, identity)
assertEquals(str2, str1) assertEquals(str2, str1)
} }

View File

@ -1,14 +1,13 @@
package docspell.extract.pdfbox package docspell.extract.pdfbox
import cats.effect._ import cats.effect._
import cats.effect.unsafe.implicits.global
import docspell.files.{ExampleFiles, TestFiles} import docspell.files.{ExampleFiles, TestFiles}
import munit._ import munit._
class PdfboxExtractTest extends FunSuite { class PdfboxExtractTest extends FunSuite {
val blocker = TestFiles.blocker
implicit val CS = TestFiles.CS
val textPDFs = List( val textPDFs = List(
ExampleFiles.letter_de_pdf -> TestFiles.letterDEText, ExampleFiles.letter_de_pdf -> TestFiles.letterDEText,
@ -27,7 +26,7 @@ class PdfboxExtractTest extends FunSuite {
test("extract text from text PDFs via Stream") { test("extract text from text PDFs via Stream") {
textPDFs.foreach { case (file, txt) => textPDFs.foreach { case (file, txt) =>
val data = file.readURL[IO](8192, blocker) val data = file.readURL[IO](8192)
val str = PdfboxExtract.getText(data).unsafeRunSync().fold(throw _, identity) val str = PdfboxExtract.getText(data).unsafeRunSync().fold(throw _, identity)
val received = removeFormatting(str.value) val received = removeFormatting(str.value)
val expect = removeFormatting(txt) val expect = removeFormatting(txt)

View File

@ -3,15 +3,15 @@ package docspell.extract.pdfbox
import java.nio.file.Path import java.nio.file.Path
import cats.effect._ import cats.effect._
import cats.effect.unsafe.implicits.global
import fs2.Stream import fs2.Stream
import fs2.io.file.Files
import docspell.files.{ExampleFiles, TestFiles} import docspell.files.ExampleFiles
import munit._ import munit._
class PdfboxPreviewTest extends FunSuite { class PdfboxPreviewTest extends FunSuite {
val blocker = TestFiles.blocker
implicit val CS = TestFiles.CS
val testPDFs = List( val testPDFs = List(
ExampleFiles.letter_de_pdf -> "7d98be75b239816d6c751b3f3c56118ebf1a4632c43baf35a68a662f9d595ab8", ExampleFiles.letter_de_pdf -> "7d98be75b239816d6c751b3f3c56118ebf1a4632c43baf35a68a662f9d595ab8",
@ -21,7 +21,7 @@ class PdfboxPreviewTest extends FunSuite {
test("extract first page image from PDFs".flaky) { test("extract first page image from PDFs".flaky) {
testPDFs.foreach { case (file, checksum) => testPDFs.foreach { case (file, checksum) =>
val data = file.readURL[IO](8192, blocker) val data = file.readURL[IO](8192)
val sha256out = val sha256out =
Stream Stream
.eval(PdfboxPreview[IO](PreviewConfig(48))) .eval(PdfboxPreview[IO](PreviewConfig(48)))
@ -42,7 +42,7 @@ class PdfboxPreviewTest extends FunSuite {
def writeToFile(data: Stream[IO, Byte], file: Path): IO[Unit] = def writeToFile(data: Stream[IO, Byte], file: Path): IO[Unit] =
data data
.through( .through(
fs2.io.file.writeAll(file, blocker) Files[IO].writeAll(file)
) )
.compile .compile
.drain .drain

View File

@ -1,15 +1,14 @@
package docspell.extract.poi package docspell.extract.poi
import cats.effect._ import cats.effect._
import cats.effect.unsafe.implicits.global
import docspell.common.MimeTypeHint import docspell.common.MimeTypeHint
import docspell.files.{ExampleFiles, TestFiles} import docspell.files.ExampleFiles
import munit._ import munit._
class PoiExtractTest extends FunSuite { class PoiExtractTest extends FunSuite {
val blocker = TestFiles.blocker
implicit val CS = TestFiles.CS
val officeFiles = List( val officeFiles = List(
ExampleFiles.examples_sample_doc -> 6241, ExampleFiles.examples_sample_doc -> 6241,
@ -21,13 +20,13 @@ class PoiExtractTest extends FunSuite {
test("extract text from ms office files") { test("extract text from ms office files") {
officeFiles.foreach { case (file, len) => officeFiles.foreach { case (file, len) =>
val str1 = PoiExtract val str1 = PoiExtract
.get[IO](file.readURL[IO](8192, blocker), MimeTypeHint.none) .get[IO](file.readURL[IO](8192), MimeTypeHint.none)
.unsafeRunSync() .unsafeRunSync()
.fold(throw _, identity) .fold(throw _, identity)
val str2 = PoiExtract val str2 = PoiExtract
.get[IO]( .get[IO](
file.readURL[IO](8192, blocker), file.readURL[IO](8192),
MimeTypeHint(Some(file.path.segments.last), None) MimeTypeHint(Some(file.path.segments.last), None)
) )
.unsafeRunSync() .unsafeRunSync()

View File

@ -13,28 +13,19 @@ import docspell.common.Glob
object Zip { object Zip {
def unzipP[F[_]: ConcurrentEffect: ContextShift]( def unzipP[F[_]: Async](chunkSize: Int, glob: Glob): Pipe[F, Byte, Binary[F]] =
chunkSize: Int, s => unzip[F](chunkSize, glob)(s)
blocker: Blocker,
glob: Glob
): Pipe[F, Byte, Binary[F]] =
s => unzip[F](chunkSize, blocker, glob)(s)
def unzip[F[_]: ConcurrentEffect: ContextShift]( def unzip[F[_]: Async](chunkSize: Int, glob: Glob)(
chunkSize: Int,
blocker: Blocker,
glob: Glob
)(
data: Stream[F, Byte] data: Stream[F, Byte]
): Stream[F, Binary[F]] = ): Stream[F, Binary[F]] =
data data
.through(fs2.io.toInputStream[F]) .through(fs2.io.toInputStream[F])
.flatMap(in => unzipJava(in, chunkSize, blocker, glob)) .flatMap(in => unzipJava(in, chunkSize, glob))
def unzipJava[F[_]: Sync: ContextShift]( def unzipJava[F[_]: Async](
in: InputStream, in: InputStream,
chunkSize: Int, chunkSize: Int,
blocker: Blocker,
glob: Glob glob: Glob
): Stream[F, Binary[F]] = { ): Stream[F, Binary[F]] = {
val zin = new ZipInputStream(in) val zin = new ZipInputStream(in)
@ -52,7 +43,7 @@ object Zip {
.map { ze => .map { ze =>
val name = Paths.get(ze.getName()).getFileName.toString val name = Paths.get(ze.getName()).getFileName.toString
val data = val data =
fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, blocker, false) fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, false)
Binary(name, data) Binary(name, data)
} }
} }

View File

@ -1,16 +1,14 @@
package docspell.files package docspell.files
import scala.concurrent.ExecutionContext
import scala.util.Using import scala.util.Using
import cats.effect.{Blocker, IO} import cats.effect._
import cats.effect.unsafe.implicits.global
import cats.implicits._ import cats.implicits._
import munit._ import munit._
class ImageSizeTest extends FunSuite { class ImageSizeTest extends FunSuite {
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
implicit val CS = IO.contextShift(ExecutionContext.global)
//tiff files are not supported on the jdk by default //tiff files are not supported on the jdk by default
//requires an external library //requires an external library
@ -37,7 +35,7 @@ class ImageSizeTest extends FunSuite {
test("get sizes from stream") { test("get sizes from stream") {
files.foreach { case (uri, expect) => files.foreach { case (uri, expect) =>
val stream = uri.readURL[IO](8192, blocker) val stream = uri.readURL[IO](8192)
val dim = ImageSize.get(stream).unsafeRunSync() val dim = ImageSize.get(stream).unsafeRunSync()
assertEquals(dim, expect.some) assertEquals(dim, expect.some)
} }

View File

@ -1,19 +1,17 @@
package docspell.files package docspell.files
import scala.concurrent.ExecutionContext
import cats.effect._ import cats.effect._
import cats.effect.unsafe.implicits.global
import docspell.common.MimeTypeHint import docspell.common.MimeTypeHint
object Playing extends IOApp { object Playing extends IOApp {
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
def run(args: List[String]): IO[ExitCode] = def run(args: List[String]): IO[ExitCode] =
IO { IO {
//val ods = ExampleFiles.examples_sample_ods.readURL[IO](8192, blocker) //val ods = ExampleFiles.examples_sample_ods.readURL[IO](8192, blocker)
//val odt = ExampleFiles.examples_sample_odt.readURL[IO](8192, blocker) //val odt = ExampleFiles.examples_sample_odt.readURL[IO](8192, blocker)
val rtf = ExampleFiles.examples_sample_rtf.readURL[IO](8192, blocker) val rtf = ExampleFiles.examples_sample_rtf.readURL[IO](8192)
val x = for { val x = for {
odsm1 <- odsm1 <-

View File

@ -1,29 +1,26 @@
package docspell.files package docspell.files
import scala.concurrent.ExecutionContext import cats.effect._
import cats.effect.unsafe.implicits.global
import cats.effect.{Blocker, IO}
import fs2.Stream import fs2.Stream
object TestFiles { object TestFiles {
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
implicit val CS = IO.contextShift(ExecutionContext.global)
val letterSourceDE: Stream[IO, Byte] = val letterSourceDE: Stream[IO, Byte] =
ExampleFiles.letter_de_pdf ExampleFiles.letter_de_pdf
.readURL[IO](8 * 1024, blocker) .readURL[IO](8 * 1024)
val letterSourceEN: Stream[IO, Byte] = val letterSourceEN: Stream[IO, Byte] =
ExampleFiles.letter_en_pdf ExampleFiles.letter_en_pdf
.readURL[IO](8 * 1024, blocker) .readURL[IO](8 * 1024)
lazy val letterDEText = lazy val letterDEText =
ExampleFiles.letter_de_txt ExampleFiles.letter_de_txt
.readText[IO](8 * 1024, blocker) .readText[IO](8 * 1024)
.unsafeRunSync() .unsafeRunSync()
lazy val letterENText = lazy val letterENText =
ExampleFiles.letter_en_txt ExampleFiles.letter_en_txt
.readText[IO](8 * 1024, blocker) .readText[IO](8 * 1024)
.unsafeRunSync() .unsafeRunSync()
} }

View File

@ -1,8 +1,7 @@
package docspell.files package docspell.files
import scala.concurrent.ExecutionContext
import cats.effect._ import cats.effect._
import cats.effect.unsafe.implicits.global
import cats.implicits._ import cats.implicits._
import docspell.common.Glob import docspell.common.Glob
@ -11,12 +10,9 @@ import munit._
class ZipTest extends FunSuite { class ZipTest extends FunSuite {
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
implicit val CS = IO.contextShift(ExecutionContext.global)
test("unzip") { test("unzip") {
val zipFile = ExampleFiles.letters_zip.readURL[IO](8192, blocker) val zipFile = ExampleFiles.letters_zip.readURL[IO](8192)
val uncomp = zipFile.through(Zip.unzip(8192, blocker, Glob.all)) val uncomp = zipFile.through(Zip.unzip(8192, Glob.all))
uncomp uncomp
.evalMap { entry => .evalMap { entry =>

View File

@ -11,7 +11,7 @@ import org.http4s.client.Client
import org.http4s.client.middleware.Logger import org.http4s.client.middleware.Logger
import org.log4s.getLogger import org.log4s.getLogger
final class SolrFtsClient[F[_]: Effect]( final class SolrFtsClient[F[_]: Async](
solrUpdate: SolrUpdate[F], solrUpdate: SolrUpdate[F],
solrSetup: SolrSetup[F], solrSetup: SolrSetup[F],
solrQuery: SolrQuery[F] solrQuery: SolrQuery[F]
@ -77,7 +77,7 @@ final class SolrFtsClient[F[_]: Effect](
object SolrFtsClient { object SolrFtsClient {
private[this] val logger = getLogger private[this] val logger = getLogger
def apply[F[_]: ConcurrentEffect]( def apply[F[_]: Async](
cfg: SolrConfig, cfg: SolrConfig,
httpClient: Client[F] httpClient: Client[F]
): Resource[F, FtsClient[F]] = { ): Resource[F, FtsClient[F]] = {
@ -91,7 +91,7 @@ object SolrFtsClient {
) )
} }
private def loggingMiddleware[F[_]: Concurrent]( private def loggingMiddleware[F[_]: Async](
cfg: SolrConfig, cfg: SolrConfig,
client: Client[F] client: Client[F]
): Client[F] = ): Client[F] =

View File

@ -22,7 +22,7 @@ trait SolrQuery[F[_]] {
} }
object SolrQuery { object SolrQuery {
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrQuery[F] = { def apply[F[_]: Async](cfg: SolrConfig, client: Client[F]): SolrQuery[F] = {
val dsl = new Http4sClientDsl[F] {} val dsl = new Http4sClientDsl[F] {}
import dsl._ import dsl._

View File

@ -24,7 +24,7 @@ trait SolrSetup[F[_]] {
object SolrSetup { object SolrSetup {
private val versionDocId = "6d8f09f4-8d7e-4bc9-98b8-7c89223b36dd" private val versionDocId = "6d8f09f4-8d7e-4bc9-98b8-7c89223b36dd"
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrSetup[F] = { def apply[F[_]: Async](cfg: SolrConfig, client: Client[F]): SolrSetup[F] = {
val dsl = new Http4sClientDsl[F] {} val dsl = new Http4sClientDsl[F] {}
import dsl._ import dsl._

View File

@ -30,7 +30,7 @@ trait SolrUpdate[F[_]] {
object SolrUpdate { object SolrUpdate {
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrUpdate[F] = { def apply[F[_]: Async](cfg: SolrConfig, client: Client[F]): SolrUpdate[F] = {
val dsl = new Http4sClientDsl[F] {} val dsl = new Http4sClientDsl[F] {}
import dsl._ import dsl._

View File

@ -30,10 +30,10 @@ import docspell.store.queue._
import docspell.store.records.RJobLog import docspell.store.records.RJobLog
import emil.javamail._ import emil.javamail._
import org.http4s.blaze.client.BlazeClientBuilder
import org.http4s.client.Client import org.http4s.client.Client
import org.http4s.client.blaze.BlazeClientBuilder
final class JoexAppImpl[F[_]: ConcurrentEffect: Timer]( final class JoexAppImpl[F[_]: Async](
cfg: Config, cfg: Config,
nodeOps: ONode[F], nodeOps: ONode[F],
store: Store[F], store: Store[F],
@ -49,8 +49,8 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: Timer](
val prun = periodicScheduler.start.compile.drain val prun = periodicScheduler.start.compile.drain
for { for {
_ <- scheduleBackgroundTasks _ <- scheduleBackgroundTasks
_ <- ConcurrentEffect[F].start(run) _ <- Async[F].start(run)
_ <- ConcurrentEffect[F].start(prun) _ <- Async[F].start(prun)
_ <- scheduler.periodicAwake _ <- scheduler.periodicAwake
_ <- periodicScheduler.periodicAwake _ <- periodicScheduler.periodicAwake
_ <- nodeOps.register(cfg.appId, NodeType.Joex, cfg.baseUrl) _ <- nodeOps.register(cfg.appId, NodeType.Joex, cfg.baseUrl)
@ -79,17 +79,16 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: Timer](
object JoexAppImpl { object JoexAppImpl {
def create[F[_]: ConcurrentEffect: ContextShift: Timer]( def create[F[_]: Async](
cfg: Config, cfg: Config,
termSignal: SignallingRef[F, Boolean], termSignal: SignallingRef[F, Boolean],
connectEC: ExecutionContext, connectEC: ExecutionContext,
clientEC: ExecutionContext, clientEC: ExecutionContext
blocker: Blocker
): Resource[F, JoexApp[F]] = ): Resource[F, JoexApp[F]] =
for { for {
httpClient <- BlazeClientBuilder[F](clientEC).resource httpClient <- BlazeClientBuilder[F](clientEC).resource
client = JoexClient(httpClient) client = JoexClient(httpClient)
store <- Store.create(cfg.jdbc, connectEC, blocker) store <- Store.create(cfg.jdbc, connectEC)
queue <- JobQueue(store) queue <- JobQueue(store)
pstore <- PeriodicTaskStore.create(store) pstore <- PeriodicTaskStore.create(store)
nodeOps <- ONode(store) nodeOps <- ONode(store)
@ -97,11 +96,11 @@ object JoexAppImpl {
upload <- OUpload(store, queue, cfg.files, joex) upload <- OUpload(store, queue, cfg.files, joex)
fts <- createFtsClient(cfg)(httpClient) fts <- createFtsClient(cfg)(httpClient)
itemOps <- OItem(store, fts, queue, joex) itemOps <- OItem(store, fts, queue, joex)
analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig, blocker) analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig)
regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, blocker, store) regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, store)
javaEmil = javaEmil =
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug)) JavaMailEmil(Settings.defaultSettings.copy(debug = cfg.mailDebug))
sch <- SchedulerBuilder(cfg.scheduler, blocker, store) sch <- SchedulerBuilder(cfg.scheduler, store)
.withQueue(queue) .withQueue(queue)
.withTask( .withTask(
JobTask.json( JobTask.json(
@ -207,14 +206,13 @@ object JoexAppImpl {
sch, sch,
queue, queue,
pstore, pstore,
client, client
Timer[F]
) )
app = new JoexAppImpl(cfg, nodeOps, store, queue, pstore, termSignal, sch, psch) app = new JoexAppImpl(cfg, nodeOps, store, queue, pstore, termSignal, sch, psch)
appR <- Resource.make(app.init.map(_ => app))(_.shutdown) appR <- Resource.make(app.init.map(_ => app))(_.shutdown)
} yield appR } yield appR
private def createFtsClient[F[_]: ConcurrentEffect]( private def createFtsClient[F[_]: Async](
cfg: Config cfg: Config
)(client: Client[F]): Resource[F, FtsClient[F]] = )(client: Client[F]): Resource[F, FtsClient[F]] =
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client) if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)

View File

@ -1,7 +1,7 @@
package docspell.joex package docspell.joex
import cats.effect.Ref
import cats.effect._ import cats.effect._
import cats.effect.concurrent.Ref
import fs2.Stream import fs2.Stream
import fs2.concurrent.SignallingRef import fs2.concurrent.SignallingRef
@ -9,9 +9,9 @@ import docspell.common.Pools
import docspell.joex.routes._ import docspell.joex.routes._
import org.http4s.HttpApp import org.http4s.HttpApp
import org.http4s.blaze.server.BlazeServerBuilder
import org.http4s.implicits._ import org.http4s.implicits._
import org.http4s.server.Router import org.http4s.server.Router
import org.http4s.server.blaze.BlazeServerBuilder
import org.http4s.server.middleware.Logger import org.http4s.server.middleware.Logger
object JoexServer { object JoexServer {
@ -22,17 +22,14 @@ object JoexServer {
exitRef: Ref[F, ExitCode] exitRef: Ref[F, ExitCode]
) )
def stream[F[_]: ConcurrentEffect: ContextShift]( def stream[F[_]: Async](cfg: Config, pools: Pools): Stream[F, Nothing] = {
cfg: Config,
pools: Pools
)(implicit T: Timer[F]): Stream[F, Nothing] = {
val app = for { val app = for {
signal <- Resource.eval(SignallingRef[F, Boolean](false)) signal <- Resource.eval(SignallingRef[F, Boolean](false))
exitCode <- Resource.eval(Ref[F].of(ExitCode.Success)) exitCode <- Resource.eval(Ref[F].of(ExitCode.Success))
joexApp <- joexApp <-
JoexAppImpl JoexAppImpl
.create[F](cfg, signal, pools.connectEC, pools.httpClientEC, pools.blocker) .create[F](cfg, signal, pools.connectEC, pools.httpClientEC)
httpApp = Router( httpApp = Router(
"/api/info" -> InfoRoutes(cfg), "/api/info" -> InfoRoutes(cfg),

View File

@ -57,9 +57,8 @@ object Main extends IOApp {
val pools = for { val pools = for {
cec <- connectEC cec <- connectEC
bec <- blockingEC bec <- blockingEC
blocker = Blocker.liftExecutorService(bec)
rec <- restserverEC rec <- restserverEC
} yield Pools(cec, bec, blocker, rec) } yield Pools(cec, bec, rec)
pools.use(p => pools.use(p =>
JoexServer JoexServer
.stream[IO](cfg, p) .stream[IO](cfg, p)

View File

@ -33,16 +33,15 @@ object NerFile {
private def jsonFilePath(directory: Path, collective: Ident): Path = private def jsonFilePath(directory: Path, collective: Ident): Path =
directory.resolve(s"${collective.id}.json") directory.resolve(s"${collective.id}.json")
def find[F[_]: Sync: ContextShift]( def find[F[_]: Async](
collective: Ident, collective: Ident,
directory: Path, directory: Path
blocker: Blocker
): F[Option[NerFile]] = { ): F[Option[NerFile]] = {
val file = jsonFilePath(directory, collective) val file = jsonFilePath(directory, collective)
File.existsNonEmpty[F](file).flatMap { File.existsNonEmpty[F](file).flatMap {
case true => case true =>
File File
.readJson[F, NerFile](file, blocker) .readJson[F, NerFile](file)
.map(_.some) .map(_.some)
case false => case false =>
(None: Option[NerFile]).pure[F] (None: Option[NerFile]).pure[F]

View File

@ -3,7 +3,7 @@ package docspell.joex.analysis
import java.nio.file.Path import java.nio.file.Path
import cats.effect._ import cats.effect._
import cats.effect.concurrent.Semaphore import cats.effect.std.Semaphore
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
@ -31,19 +31,17 @@ object RegexNerFile {
case class Config(maxEntries: Int, directory: Path, minTime: Duration) case class Config(maxEntries: Int, directory: Path, minTime: Duration)
def apply[F[_]: Concurrent: ContextShift]( def apply[F[_]: Async](
cfg: Config, cfg: Config,
blocker: Blocker,
store: Store[F] store: Store[F]
): Resource[F, RegexNerFile[F]] = ): Resource[F, RegexNerFile[F]] =
for { for {
dir <- File.withTempDir[F](cfg.directory, "regexner-") dir <- File.withTempDir[F](cfg.directory, "regexner-")
writer <- Resource.eval(Semaphore(1)) writer <- Resource.eval(Semaphore(1))
} yield new Impl[F](cfg.copy(directory = dir), blocker, store, writer) } yield new Impl[F](cfg.copy(directory = dir), store, writer)
final private class Impl[F[_]: Concurrent: ContextShift]( final private class Impl[F[_]: Async](
cfg: Config, cfg: Config,
blocker: Blocker,
store: Store[F], store: Store[F],
writer: Semaphore[F] //TODO allow parallelism per collective writer: Semaphore[F] //TODO allow parallelism per collective
) extends RegexNerFile[F] { ) extends RegexNerFile[F] {
@ -55,7 +53,7 @@ object RegexNerFile {
def doMakeFile(collective: Ident): F[Option[Path]] = def doMakeFile(collective: Ident): F[Option[Path]] =
for { for {
now <- Timestamp.current[F] now <- Timestamp.current[F]
existing <- NerFile.find[F](collective, cfg.directory, blocker) existing <- NerFile.find[F](collective, cfg.directory)
result <- existing match { result <- existing match {
case Some(nf) => case Some(nf) =>
val dur = Duration.between(nf.creation, now) val dur = Duration.between(nf.creation, now)
@ -105,11 +103,13 @@ object RegexNerFile {
} yield result } yield result
private def updateTimestamp(nf: NerFile, now: Timestamp): F[Unit] = private def updateTimestamp(nf: NerFile, now: Timestamp): F[Unit] =
writer.withPermit(for { writer.permit.use(_ =>
file <- Sync[F].pure(nf.jsonFilePath(cfg.directory)) for {
_ <- File.mkDir(file.getParent) file <- Sync[F].pure(nf.jsonFilePath(cfg.directory))
_ <- File.writeString(file, nf.copy(creation = now).asJson.spaces2) _ <- File.mkDir(file.getParent)
} yield ()) _ <- File.writeString(file, nf.copy(creation = now).asJson.spaces2)
} yield ()
)
private def createFile( private def createFile(
lastUpdate: Timestamp, lastUpdate: Timestamp,
@ -117,13 +117,17 @@ object RegexNerFile {
now: Timestamp now: Timestamp
): F[NerFile] = { ): F[NerFile] = {
def update(nf: NerFile, text: String): F[Unit] = def update(nf: NerFile, text: String): F[Unit] =
writer.withPermit(for { writer.permit.use(_ =>
jsonFile <- Sync[F].pure(nf.jsonFilePath(cfg.directory)) for {
_ <- logger.fdebug(s"Writing custom NER file for collective '${collective.id}'") jsonFile <- Sync[F].pure(nf.jsonFilePath(cfg.directory))
_ <- File.mkDir(jsonFile.getParent) _ <- logger.fdebug(
_ <- File.writeString(nf.nerFilePath(cfg.directory), text) s"Writing custom NER file for collective '${collective.id}'"
_ <- File.writeString(jsonFile, nf.asJson.spaces2) )
} yield ()) _ <- File.mkDir(jsonFile.getParent)
_ <- File.writeString(nf.nerFilePath(cfg.directory), text)
_ <- File.writeString(jsonFile, nf.asJson.spaces2)
} yield ()
)
for { for {
_ <- logger.finfo(s"Generating custom NER file for collective '${collective.id}'") _ <- logger.finfo(s"Generating custom NER file for collective '${collective.id}'")

View File

@ -28,7 +28,7 @@ object Migration {
def from[F[_]: Applicative: FlatMap](fm: FtsMigration[F]): Migration[F] = def from[F[_]: Applicative: FlatMap](fm: FtsMigration[F]): Migration[F] =
Migration(fm.version, fm.engine, fm.description, FtsWork.from(fm.task)) Migration(fm.version, fm.engine, fm.description, FtsWork.from(fm.task))
def apply[F[_]: Effect]( def apply[F[_]: Async](
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
fts: FtsClient[F], fts: FtsClient[F],
store: Store[F], store: Store[F],
@ -41,7 +41,7 @@ object Migration {
} }
} }
def applySingle[F[_]: Effect](ctx: FtsContext[F])(m: Migration[F]): F[Unit] = def applySingle[F[_]: Async](ctx: FtsContext[F])(m: Migration[F]): F[Unit] =
for { for {
_ <- ctx.logger.info(s"Apply ${m.version}/${m.description}") _ <- ctx.logger.info(s"Apply ${m.version}/${m.description}")
_ <- m.task.run(ctx) _ <- m.task.run(ctx)

View File

@ -12,7 +12,7 @@ import docspell.store.records.RJob
object MigrationTask { object MigrationTask {
val taskName = Ident.unsafe("full-text-index") val taskName = Ident.unsafe("full-text-index")
def apply[F[_]: ConcurrentEffect]( def apply[F[_]: Async](
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
fts: FtsClient[F] fts: FtsClient[F]
): Task[F, Unit, Unit] = ): Task[F, Unit, Unit] =
@ -46,7 +46,7 @@ object MigrationTask {
Some(DocspellSystem.migrationTaskTracker) Some(DocspellSystem.migrationTaskTracker)
) )
def migrationTasks[F[_]: Effect](fts: FtsClient[F]): F[List[Migration[F]]] = def migrationTasks[F[_]: Async](fts: FtsClient[F]): F[List[Migration[F]]] =
fts.initialize.map(_.map(fm => Migration.from(fm))) fts.initialize.map(_.map(fm => Migration.from(fm)))
} }

View File

@ -14,7 +14,7 @@ object ReIndexTask {
val taskName = ReIndexTaskArgs.taskName val taskName = ReIndexTaskArgs.taskName
val tracker = DocspellSystem.migrationTaskTracker val tracker = DocspellSystem.migrationTaskTracker
def apply[F[_]: ConcurrentEffect]( def apply[F[_]: Async](
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
fts: FtsClient[F] fts: FtsClient[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
@ -27,7 +27,7 @@ object ReIndexTask {
def onCancel[F[_]]: Task[F, Args, Unit] = def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log[F, Args](_.warn("Cancelling full-text re-index task")) Task.log[F, Args](_.warn("Cancelling full-text re-index task"))
private def clearData[F[_]: ConcurrentEffect](collective: Option[Ident]): FtsWork[F] = private def clearData[F[_]: Async](collective: Option[Ident]): FtsWork[F] =
FtsWork.log[F](_.info("Clearing index data")) ++ FtsWork.log[F](_.info("Clearing index data")) ++
(collective match { (collective match {
case Some(_) => case Some(_) =>

View File

@ -7,19 +7,20 @@ import docspell.common._
import docspell.joex.scheduler.{Context, Task} import docspell.joex.scheduler.{Context, Task}
import docspell.store.records._ import docspell.store.records._
import org.http4s.blaze.client.BlazeClientBuilder
import org.http4s.client.Client import org.http4s.client.Client
import org.http4s.client.blaze.BlazeClientBuilder
object CheckNodesTask { object CheckNodesTask {
def apply[F[_]: ConcurrentEffect]( def apply[F[_]: Async](
cfg: HouseKeepingConfig.CheckNodes cfg: HouseKeepingConfig.CheckNodes
): Task[F, Unit, Unit] = ): Task[F, Unit, Unit] =
Task { ctx => Task { ctx =>
if (cfg.enabled) if (cfg.enabled)
for { for {
_ <- ctx.logger.info("Check nodes reachability") _ <- ctx.logger.info("Check nodes reachability")
_ <- BlazeClientBuilder[F](ctx.blocker.blockingContext).resource.use { client => ec = scala.concurrent.ExecutionContext.global
_ <- BlazeClientBuilder[F](ec).resource.use { client =>
checkNodes(ctx, client) checkNodes(ctx, client)
} }
_ <- ctx.logger.info( _ <- ctx.logger.info(
@ -32,7 +33,7 @@ object CheckNodesTask {
ctx.logger.info("CheckNodes task is disabled in the configuration") ctx.logger.info("CheckNodes task is disabled in the configuration")
} }
def checkNodes[F[_]: Sync](ctx: Context[F, _], client: Client[F]): F[Unit] = def checkNodes[F[_]: Async](ctx: Context[F, _], client: Client[F]): F[Unit] =
ctx.store ctx.store
.transact(RNode.streamAll) .transact(RNode.streamAll)
.evalMap(node => .evalMap(node =>
@ -45,7 +46,7 @@ object CheckNodesTask {
.compile .compile
.drain .drain
def checkNode[F[_]: Sync](logger: Logger[F], client: Client[F])( def checkNode[F[_]: Async](logger: Logger[F], client: Client[F])(
url: LenientUri url: LenientUri
): F[Boolean] = { ): F[Boolean] = {
val apiVersion = url / "api" / "info" / "version" val apiVersion = url / "api" / "info" / "version"

View File

@ -15,7 +15,7 @@ object HouseKeepingTask {
val taskName: Ident = Ident.unsafe("housekeeping") val taskName: Ident = Ident.unsafe("housekeeping")
def apply[F[_]: ConcurrentEffect](cfg: Config): Task[F, Unit, Unit] = def apply[F[_]: Async](cfg: Config): Task[F, Unit, Unit] =
Task Task
.log[F, Unit](_.info(s"Running house-keeping task now")) .log[F, Unit](_.info(s"Running house-keeping task now"))
.flatMap(_ => CleanupInvitesTask(cfg.houseKeeping.cleanupInvites)) .flatMap(_ => CleanupInvitesTask(cfg.houseKeeping.cleanupInvites))

View File

@ -5,6 +5,7 @@ import java.nio.file.Path
import cats.data.OptionT import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.io.file.Files
import docspell.analysis.classifier.{ClassifierModel, TextClassifier} import docspell.analysis.classifier.{ClassifierModel, TextClassifier}
import docspell.common._ import docspell.common._
@ -15,8 +16,7 @@ import bitpeace.RangeDef
object Classify { object Classify {
def apply[F[_]: Sync: ContextShift]( def apply[F[_]: Async](
blocker: Blocker,
logger: Logger[F], logger: Logger[F],
workingDir: Path, workingDir: Path,
store: Store[F], store: Store[F],
@ -36,7 +36,7 @@ object Classify {
cls <- OptionT(File.withTempDir(workingDir, "classify").use { dir => cls <- OptionT(File.withTempDir(workingDir, "classify").use { dir =>
val modelFile = dir.resolve("model.ser.gz") val modelFile = dir.resolve("model.ser.gz")
modelData modelData
.through(fs2.io.file.writeAll(modelFile, blocker)) .through(Files[F].writeAll(modelFile))
.compile .compile
.drain .drain
.flatMap(_ => classifier.classify(logger, ClassifierModel(modelFile), text)) .flatMap(_ => classifier.classify(logger, ClassifierModel(modelFile), text))

View File

@ -20,7 +20,7 @@ object LearnClassifierTask {
def onCancel[F[_]]: Task[F, Args, Unit] = def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn("Cancelling learn-classifier task")) Task.log(_.warn("Cancelling learn-classifier task"))
def apply[F[_]: Sync: ContextShift]( def apply[F[_]: Async](
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
analyser: TextAnalyser[F] analyser: TextAnalyser[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
@ -28,7 +28,7 @@ object LearnClassifierTask {
.flatMap(_ => learnItemEntities(cfg, analyser)) .flatMap(_ => learnItemEntities(cfg, analyser))
.flatMap(_ => Task(_ => Sync[F].delay(System.gc()))) .flatMap(_ => Task(_ => Sync[F].delay(System.gc())))
private def learnItemEntities[F[_]: Sync: ContextShift]( private def learnItemEntities[F[_]: Async](
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
analyser: TextAnalyser[F] analyser: TextAnalyser[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
@ -45,7 +45,7 @@ object LearnClassifierTask {
else ().pure[F] else ().pure[F]
} }
private def learnTags[F[_]: Sync: ContextShift]( private def learnTags[F[_]: Async](
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
analyser: TextAnalyser[F] analyser: TextAnalyser[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =

View File

@ -11,7 +11,7 @@ import docspell.common._
import docspell.joex.scheduler._ import docspell.joex.scheduler._
object LearnItemEntities { object LearnItemEntities {
def learnAll[F[_]: Sync: ContextShift, A]( def learnAll[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
@ -22,7 +22,7 @@ object LearnItemEntities {
.flatMap(_ => learnConcPerson(analyser, collective, maxItems, maxTextLen)) .flatMap(_ => learnConcPerson(analyser, collective, maxItems, maxTextLen))
.flatMap(_ => learnConcEquip(analyser, collective, maxItems, maxTextLen)) .flatMap(_ => learnConcEquip(analyser, collective, maxItems, maxTextLen))
def learnCorrOrg[F[_]: Sync: ContextShift, A]( def learnCorrOrg[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
@ -33,7 +33,7 @@ object LearnItemEntities {
ctx => SelectItems.forCorrOrg(ctx.store, collective, maxItems, maxTextLen) ctx => SelectItems.forCorrOrg(ctx.store, collective, maxItems, maxTextLen)
) )
def learnCorrPerson[F[_]: Sync: ContextShift, A]( def learnCorrPerson[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
@ -44,7 +44,7 @@ object LearnItemEntities {
ctx => SelectItems.forCorrPerson(ctx.store, collective, maxItems, maxTextLen) ctx => SelectItems.forCorrPerson(ctx.store, collective, maxItems, maxTextLen)
) )
def learnConcPerson[F[_]: Sync: ContextShift, A]( def learnConcPerson[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
@ -55,7 +55,7 @@ object LearnItemEntities {
ctx => SelectItems.forConcPerson(ctx.store, collective, maxItems, maxTextLen) ctx => SelectItems.forConcPerson(ctx.store, collective, maxItems, maxTextLen)
) )
def learnConcEquip[F[_]: Sync: ContextShift, A]( def learnConcEquip[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
@ -66,7 +66,7 @@ object LearnItemEntities {
ctx => SelectItems.forConcEquip(ctx.store, collective, maxItems, maxTextLen) ctx => SelectItems.forConcEquip(ctx.store, collective, maxItems, maxTextLen)
) )
private def learn[F[_]: Sync: ContextShift, A]( private def learn[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
collective: Ident collective: Ident
)(cname: ClassifierName, data: Context[F, _] => Stream[F, Data]): Task[F, A, Unit] = )(cname: ClassifierName, data: Context[F, _] => Stream[F, Data]): Task[F, A, Unit] =

View File

@ -11,7 +11,7 @@ import docspell.store.records.RClassifierSetting
object LearnTags { object LearnTags {
def learnTagCategory[F[_]: Sync: ContextShift, A]( def learnTagCategory[F[_]: Async, A](
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
@ -33,7 +33,7 @@ object LearnTags {
) )
} }
def learnAllTagCategories[F[_]: Sync: ContextShift, A](analyser: TextAnalyser[F])( def learnAllTagCategories[F[_]: Async, A](analyser: TextAnalyser[F])(
collective: Ident, collective: Ident,
maxItems: Int, maxItems: Int,
maxTextLen: Int maxTextLen: Int

View File

@ -2,6 +2,7 @@ package docspell.joex.learn
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.io.file.Files
import docspell.analysis.classifier.ClassifierModel import docspell.analysis.classifier.ClassifierModel
import docspell.common._ import docspell.common._
@ -13,18 +14,17 @@ import bitpeace.MimetypeHint
object StoreClassifierModel { object StoreClassifierModel {
def handleModel[F[_]: Sync: ContextShift]( def handleModel[F[_]: Async](
ctx: Context[F, _], ctx: Context[F, _],
collective: Ident, collective: Ident,
modelName: ClassifierName modelName: ClassifierName
)( )(
trainedModel: ClassifierModel trainedModel: ClassifierModel
): F[Unit] = ): F[Unit] =
handleModel(ctx.store, ctx.blocker, ctx.logger)(collective, modelName, trainedModel) handleModel(ctx.store, ctx.logger)(collective, modelName, trainedModel)
def handleModel[F[_]: Sync: ContextShift]( def handleModel[F[_]: Async](
store: Store[F], store: Store[F],
blocker: Blocker,
logger: Logger[F] logger: Logger[F]
)( )(
collective: Ident, collective: Ident,
@ -36,7 +36,7 @@ object StoreClassifierModel {
RClassifierModel.findByName(collective, modelName.name).map(_.map(_.fileId)) RClassifierModel.findByName(collective, modelName.name).map(_.map(_.fileId))
) )
_ <- logger.debug(s"Storing new trained model for: ${modelName.name}") _ <- logger.debug(s"Storing new trained model for: ${modelName.name}")
fileData = fs2.io.file.readAll(trainedModel.model, blocker, 4096) fileData = Files[F].readAll(trainedModel.model, 4096)
newFile <- newFile <-
store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError
_ <- store.transact( _ <- store.transact(

View File

@ -15,7 +15,7 @@ import emil.{MimeType => _, _}
object ReadMail { object ReadMail {
def readBytesP[F[_]: ConcurrentEffect]( def readBytesP[F[_]: Async](
logger: Logger[F], logger: Logger[F],
glob: Glob glob: Glob
): Pipe[F, Byte, Binary[F]] = ): Pipe[F, Byte, Binary[F]] =
@ -26,7 +26,7 @@ object ReadMail {
Stream.eval(logger.debug(s"Converting e-mail file...")) >> Stream.eval(logger.debug(s"Converting e-mail file...")) >>
s.through(Mail.readBytes[F]) s.through(Mail.readBytes[F])
def mailToEntries[F[_]: ConcurrentEffect]( def mailToEntries[F[_]: Async](
logger: Logger[F], logger: Logger[F],
glob: Glob glob: Glob
)(mail: Mail[F]): Stream[F, Binary[F]] = { )(mail: Mail[F]): Stream[F, Binary[F]] = {

View File

@ -35,7 +35,7 @@ object PdfConvTask {
val taskName = Ident.unsafe("pdf-files-migration") val taskName = Ident.unsafe("pdf-files-migration")
def apply[F[_]: Sync: ContextShift](cfg: Config): Task[F, Args, Unit] = def apply[F[_]: Async](cfg: Config): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
for { for {
_ <- ctx.logger.info(s"Converting pdf file ${ctx.args} using ocrmypdf") _ <- ctx.logger.info(s"Converting pdf file ${ctx.args} using ocrmypdf")
@ -62,7 +62,7 @@ object PdfConvTask {
val existsPdf = val existsPdf =
for { for {
meta <- ctx.store.transact(RAttachment.findMeta(ctx.args.attachId)) meta <- ctx.store.transact(RAttachment.findMeta(ctx.args.attachId))
res = meta.filter(_.mimetype.matches(Mimetype.`application/pdf`)) res = meta.filter(_.mimetype.matches(Mimetype.applicationPdf))
_ <- _ <-
if (res.isEmpty) if (res.isEmpty)
ctx.logger.info( ctx.logger.info(
@ -83,7 +83,7 @@ object PdfConvTask {
else none.pure[F] else none.pure[F]
} }
def convert[F[_]: Sync: ContextShift]( def convert[F[_]: Async](
cfg: Config, cfg: Config,
ctx: Context[F, Args], ctx: Context[F, Args],
in: FileMeta in: FileMeta
@ -118,7 +118,6 @@ object PdfConvTask {
cfg.convert.ocrmypdf, cfg.convert.ocrmypdf,
lang, lang,
in.chunksize, in.chunksize,
ctx.blocker,
ctx.logger ctx.logger
)(data, storeResult) )(data, storeResult)

View File

@ -95,7 +95,7 @@ object AttachmentPageCount {
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(Mimetype.`application/octet-stream`) .getOrElse(Mimetype.applicationOctetStream)
.map(_.toLocal) .map(_.toLocal)
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] = def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =

View File

@ -98,7 +98,7 @@ object AttachmentPreview {
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(Mimetype.`application/octet-stream`) .getOrElse(Mimetype.applicationOctetStream)
.map(_.toLocal) .map(_.toLocal)
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] = def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =

View File

@ -33,7 +33,7 @@ import bitpeace.{Mimetype, MimetypeHint, RangeDef}
*/ */
object ConvertPdf { object ConvertPdf {
def apply[F[_]: Sync: ContextShift]( def apply[F[_]: Async](
cfg: ConvertConfig, cfg: ConvertConfig,
item: ItemData item: ItemData
): Task[F, ProcessItemArgs, ItemData] = ): Task[F, ProcessItemArgs, ItemData] =
@ -69,15 +69,15 @@ object ConvertPdf {
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] = def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(Mimetype.`application/octet-stream`) .getOrElse(Mimetype.applicationOctetStream)
def convertSafe[F[_]: Sync: ContextShift]( def convertSafe[F[_]: Async](
cfg: ConvertConfig, cfg: ConvertConfig,
sanitizeHtml: SanitizeHtml, sanitizeHtml: SanitizeHtml,
ctx: Context[F, ProcessItemArgs], ctx: Context[F, ProcessItemArgs],
item: ItemData item: ItemData
)(ra: RAttachment, mime: Mimetype): F[(RAttachment, Option[RAttachmentMeta])] = )(ra: RAttachment, mime: Mimetype): F[(RAttachment, Option[RAttachmentMeta])] =
Conversion.create[F](cfg, sanitizeHtml, ctx.blocker, ctx.logger).use { conv => Conversion.create[F](cfg, sanitizeHtml, ctx.logger).use { conv =>
mime.toLocal match { mime.toLocal match {
case mt => case mt =>
val data = ctx.store.bitpeace val data = ctx.store.bitpeace

View File

@ -32,12 +32,12 @@ import emil.Mail
*/ */
object ExtractArchive { object ExtractArchive {
def apply[F[_]: ConcurrentEffect: ContextShift]( def apply[F[_]: Async](
item: ItemData item: ItemData
): Task[F, ProcessItemArgs, ItemData] = ): Task[F, ProcessItemArgs, ItemData] =
multiPass(item, None).map(_._2) multiPass(item, None).map(_._2)
def multiPass[F[_]: ConcurrentEffect: ContextShift]( def multiPass[F[_]: Async](
item: ItemData, item: ItemData,
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] = ): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] =
@ -46,7 +46,7 @@ object ExtractArchive {
else multiPass(t._2, t._1) else multiPass(t._2, t._1)
} }
def singlePass[F[_]: ConcurrentEffect: ContextShift]( def singlePass[F[_]: Async](
item: ItemData, item: ItemData,
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] = ): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] =
@ -85,9 +85,9 @@ object ExtractArchive {
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] = def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(Mimetype.`application/octet-stream`) .getOrElse(Mimetype.applicationOctetStream)
def extractSafe[F[_]: ConcurrentEffect: ContextShift]( def extractSafe[F[_]: Async](
ctx: Context[F, ProcessItemArgs], ctx: Context[F, ProcessItemArgs],
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int, mime: Mimetype): F[Extracted] = )(ra: RAttachment, pos: Int, mime: Mimetype): F[Extracted] =
@ -131,7 +131,7 @@ object ExtractArchive {
} yield extracted.copy(files = extracted.files.filter(_.id != ra.id)) } yield extracted.copy(files = extracted.files.filter(_.id != ra.id))
} }
def extractZip[F[_]: ConcurrentEffect: ContextShift]( def extractZip[F[_]: Async](
ctx: Context[F, ProcessItemArgs], ctx: Context[F, ProcessItemArgs],
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int): F[Extracted] = { )(ra: RAttachment, pos: Int): F[Extracted] = {
@ -142,7 +142,7 @@ object ExtractArchive {
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *> ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *>
zipData zipData
.through(Zip.unzipP[F](8192, ctx.blocker, glob)) .through(Zip.unzipP[F](8192, glob))
.zipWithIndex .zipWithIndex
.flatMap(handleEntry(ctx, ra, pos, archive, None)) .flatMap(handleEntry(ctx, ra, pos, archive, None))
.foldMonoid .foldMonoid
@ -150,7 +150,7 @@ object ExtractArchive {
.lastOrError .lastOrError
} }
def extractMail[F[_]: ConcurrentEffect]( def extractMail[F[_]: Async](
ctx: Context[F, ProcessItemArgs], ctx: Context[F, ProcessItemArgs],
archive: Option[RAttachmentArchive] archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int): F[Extracted] = { )(ra: RAttachment, pos: Int): F[Extracted] = {

View File

@ -28,7 +28,7 @@ object ItemHandler {
} }
) )
def newItem[F[_]: ConcurrentEffect: ContextShift]( def newItem[F[_]: Async](
cfg: Config, cfg: Config,
itemOps: OItem[F], itemOps: OItem[F],
fts: FtsClient[F], fts: FtsClient[F],
@ -62,7 +62,7 @@ object ItemHandler {
def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] = def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] =
Task(_.isLastRetry) Task(_.isLastRetry)
def safeProcess[F[_]: ConcurrentEffect: ContextShift]( def safeProcess[F[_]: Async](
cfg: Config, cfg: Config,
itemOps: OItem[F], itemOps: OItem[F],
fts: FtsClient[F], fts: FtsClient[F],

View File

@ -12,7 +12,7 @@ import docspell.joex.scheduler.Task
object ProcessItem { object ProcessItem {
def apply[F[_]: ConcurrentEffect: ContextShift]( def apply[F[_]: Async](
cfg: Config, cfg: Config,
itemOps: OItem[F], itemOps: OItem[F],
fts: FtsClient[F], fts: FtsClient[F],
@ -27,7 +27,7 @@ object ProcessItem {
.flatMap(Task.setProgress(99)) .flatMap(Task.setProgress(99))
.flatMap(RemoveEmptyItem(itemOps)) .flatMap(RemoveEmptyItem(itemOps))
def processAttachments[F[_]: ConcurrentEffect: ContextShift]( def processAttachments[F[_]: Async](
cfg: Config, cfg: Config,
fts: FtsClient[F], fts: FtsClient[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
@ -35,7 +35,7 @@ object ProcessItem {
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] = )(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
processAttachments0[F](cfg, fts, analyser, regexNer, (30, 60, 90))(item) processAttachments0[F](cfg, fts, analyser, regexNer, (30, 60, 90))(item)
def analysisOnly[F[_]: Sync: ContextShift]( def analysisOnly[F[_]: Async](
cfg: Config, cfg: Config,
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F]
@ -46,7 +46,7 @@ object ProcessItem {
.flatMap(CrossCheckProposals[F]) .flatMap(CrossCheckProposals[F])
.flatMap(SaveProposals[F]) .flatMap(SaveProposals[F])
private def processAttachments0[F[_]: ConcurrentEffect: ContextShift]( private def processAttachments0[F[_]: Async](
cfg: Config, cfg: Config,
fts: FtsClient[F], fts: FtsClient[F],
analyser: TextAnalyser[F], analyser: TextAnalyser[F],

View File

@ -20,7 +20,7 @@ import docspell.store.records.RItem
object ReProcessItem { object ReProcessItem {
type Args = ReProcessItemArgs type Args = ReProcessItemArgs
def apply[F[_]: ConcurrentEffect: ContextShift]( def apply[F[_]: Async](
cfg: Config, cfg: Config,
fts: FtsClient[F], fts: FtsClient[F],
itemOps: OItem[F], itemOps: OItem[F],
@ -84,7 +84,7 @@ object ReProcessItem {
) )
} }
def processFiles[F[_]: ConcurrentEffect: ContextShift]( def processFiles[F[_]: Async](
cfg: Config, cfg: Config,
fts: FtsClient[F], fts: FtsClient[F],
itemOps: OItem[F], itemOps: OItem[F],
@ -133,7 +133,7 @@ object ReProcessItem {
def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] = def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] =
Task(_.isLastRetry) Task(_.isLastRetry)
def safeProcess[F[_]: ConcurrentEffect: ContextShift]( def safeProcess[F[_]: Async](
cfg: Config, cfg: Config,
fts: FtsClient[F], fts: FtsClient[F],
itemOps: OItem[F], itemOps: OItem[F],

View File

@ -19,7 +19,7 @@ import docspell.store.records.{RAttachmentMeta, RClassifierSetting}
object TextAnalysis { object TextAnalysis {
type Args = ProcessItemArgs type Args = ProcessItemArgs
def apply[F[_]: Sync: ContextShift]( def apply[F[_]: Async](
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
analyser: TextAnalyser[F], analyser: TextAnalyser[F],
nerFile: RegexNerFile[F] nerFile: RegexNerFile[F]
@ -78,7 +78,7 @@ object TextAnalysis {
} yield (rm.copy(nerlabels = labels.all.toList), AttachmentDates(rm, labels.dates)) } yield (rm.copy(nerlabels = labels.all.toList), AttachmentDates(rm, labels.dates))
} }
def predictTags[F[_]: Sync: ContextShift]( def predictTags[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
metas: Vector[RAttachmentMeta], metas: Vector[RAttachmentMeta],
@ -97,7 +97,7 @@ object TextAnalysis {
} yield tags.flatten } yield tags.flatten
} }
def predictItemEntities[F[_]: Sync: ContextShift]( def predictItemEntities[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
metas: Vector[RAttachmentMeta], metas: Vector[RAttachmentMeta],
@ -128,13 +128,12 @@ object TextAnalysis {
.map(MetaProposalList.apply) .map(MetaProposalList.apply)
} }
private def makeClassify[F[_]: Sync: ContextShift]( private def makeClassify[F[_]: Async](
ctx: Context[F, Args], ctx: Context[F, Args],
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
classifier: TextClassifier[F] classifier: TextClassifier[F]
)(text: String): ClassifierName => F[Option[String]] = )(text: String): ClassifierName => F[Option[String]] =
Classify[F]( Classify[F](
ctx.blocker,
ctx.logger, ctx.logger,
cfg.workingDir, cfg.workingDir,
ctx.store, ctx.store,

View File

@ -15,7 +15,7 @@ import bitpeace.{Mimetype, RangeDef}
object TextExtraction { object TextExtraction {
def apply[F[_]: ConcurrentEffect: ContextShift](cfg: ExtractConfig, fts: FtsClient[F])( def apply[F[_]: Async](cfg: ExtractConfig, fts: FtsClient[F])(
item: ItemData item: ItemData
): Task[F, ProcessItemArgs, ItemData] = ): Task[F, ProcessItemArgs, ItemData] =
Task { ctx => Task { ctx =>
@ -60,7 +60,7 @@ object TextExtraction {
case class Result(am: RAttachmentMeta, td: TextData, tags: List[String] = Nil) case class Result(am: RAttachmentMeta, td: TextData, tags: List[String] = Nil)
def extractTextIfEmpty[F[_]: Sync: ContextShift]( def extractTextIfEmpty[F[_]: Async](
ctx: Context[F, ProcessItemArgs], ctx: Context[F, ProcessItemArgs],
cfg: ExtractConfig, cfg: ExtractConfig,
lang: Language, lang: Language,
@ -93,7 +93,7 @@ object TextExtraction {
} }
} }
def extractTextToMeta[F[_]: Sync: ContextShift]( def extractTextToMeta[F[_]: Async](
ctx: Context[F, _], ctx: Context[F, _],
cfg: ExtractConfig, cfg: ExtractConfig,
lang: Language, lang: Language,
@ -132,13 +132,13 @@ object TextExtraction {
def findMime: F[Mimetype] = def findMime: F[Mimetype] =
OptionT(ctx.store.transact(RFileMeta.findById(fileId))) OptionT(ctx.store.transact(RFileMeta.findById(fileId)))
.map(_.mimetype) .map(_.mimetype)
.getOrElse(Mimetype.`application/octet-stream`) .getOrElse(Mimetype.applicationOctetStream)
findMime findMime
.flatMap(mt => extr.extractText(data, DataType(mt.toLocal), lang)) .flatMap(mt => extr.extractText(data, DataType(mt.toLocal), lang))
} }
private def extractTextFallback[F[_]: Sync: ContextShift]( private def extractTextFallback[F[_]: Async](
ctx: Context[F, _], ctx: Context[F, _],
cfg: ExtractConfig, cfg: ExtractConfig,
ra: RAttachment, ra: RAttachment,
@ -149,7 +149,7 @@ object TextExtraction {
ctx.logger.error(s"Cannot extract text").map(_ => None) ctx.logger.error(s"Cannot extract text").map(_ => None)
case id :: rest => case id :: rest =>
val extr = Extraction.create[F](ctx.blocker, ctx.logger, cfg) val extr = Extraction.create[F](ctx.logger, cfg)
extractText[F](ctx, extr, lang)(id) extractText[F](ctx, extr, lang)(id)
.flatMap({ .flatMap({

View File

@ -14,7 +14,7 @@ import org.http4s.dsl.Http4sDsl
object JoexRoutes { object JoexRoutes {
def apply[F[_]: ConcurrentEffect: Timer](app: JoexApp[F]): HttpRoutes[F] = { def apply[F[_]: Async](app: JoexApp[F]): HttpRoutes[F] = {
val dsl = new Http4sDsl[F] {} val dsl = new Http4sDsl[F] {}
import dsl._ import dsl._
HttpRoutes.of[F] { HttpRoutes.of[F] {
@ -34,8 +34,8 @@ object JoexRoutes {
case POST -> Root / "shutdownAndExit" => case POST -> Root / "shutdownAndExit" =>
for { for {
_ <- ConcurrentEffect[F].start( _ <- Async[F].start(
Timer[F].sleep(Duration.seconds(1).toScala) *> app.initShutdown Temporal[F].sleep(Duration.seconds(1).toScala) *> app.initShutdown
) )
resp <- Ok(BasicResult(true, "Shutdown initiated.")) resp <- Ok(BasicResult(true, "Shutdown initiated."))
} yield resp } yield resp

View File

@ -31,45 +31,40 @@ trait Context[F[_], A] { self =>
last = config.retries == current.getOrElse(0) last = config.retries == current.getOrElse(0)
} yield last } yield last
def blocker: Blocker
def map[C](f: A => C)(implicit F: Functor[F]): Context[F, C] = def map[C](f: A => C)(implicit F: Functor[F]): Context[F, C] =
new Context.ContextImpl[F, C](f(args), logger, store, blocker, config, jobId) new Context.ContextImpl[F, C](f(args), logger, store, config, jobId)
} }
object Context { object Context {
private[this] val log = getLogger private[this] val log = getLogger
def create[F[_]: Functor, A]( def create[F[_]: Async, A](
jobId: Ident, jobId: Ident,
arg: A, arg: A,
config: SchedulerConfig, config: SchedulerConfig,
log: Logger[F], log: Logger[F],
store: Store[F], store: Store[F]
blocker: Blocker
): Context[F, A] = ): Context[F, A] =
new ContextImpl(arg, log, store, blocker, config, jobId) new ContextImpl(arg, log, store, config, jobId)
def apply[F[_]: Concurrent, A]( def apply[F[_]: Async, A](
job: RJob, job: RJob,
arg: A, arg: A,
config: SchedulerConfig, config: SchedulerConfig,
logSink: LogSink[F], logSink: LogSink[F],
blocker: Blocker,
store: Store[F] store: Store[F]
): F[Context[F, A]] = ): F[Context[F, A]] =
for { for {
_ <- log.ftrace("Creating logger for task run") _ <- log.ftrace("Creating logger for task run")
logger <- QueueLogger(job.id, job.info, config.logBufferSize, logSink) logger <- QueueLogger(job.id, job.info, config.logBufferSize, logSink)
_ <- log.ftrace("Logger created, instantiating context") _ <- log.ftrace("Logger created, instantiating context")
ctx = create[F, A](job.id, arg, config, logger, store, blocker) ctx = create[F, A](job.id, arg, config, logger, store)
} yield ctx } yield ctx
final private class ContextImpl[F[_]: Functor, A]( final private class ContextImpl[F[_]: Functor, A](
val args: A, val args: A,
val logger: Logger[F], val logger: Logger[F],
val store: Store[F], val store: Store[F],
val blocker: Blocker,
val config: SchedulerConfig, val config: SchedulerConfig,
val jobId: Ident val jobId: Ident
) extends Context[F, A] { ) extends Context[F, A] {

View File

@ -1,8 +1,8 @@
package docspell.joex.scheduler package docspell.joex.scheduler
import cats.effect.{Concurrent, Sync} import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.{Pipe, Stream} import fs2.Pipe
import docspell.common._ import docspell.common._
import docspell.common.syntax.all._ import docspell.common.syntax.all._
@ -45,7 +45,7 @@ object LogSink {
def printer[F[_]: Sync]: LogSink[F] = def printer[F[_]: Sync]: LogSink[F] =
LogSink(_.evalMap(e => logInternal(e))) LogSink(_.evalMap(e => logInternal(e)))
def db[F[_]: Sync](store: Store[F]): LogSink[F] = def db[F[_]: Async](store: Store[F]): LogSink[F] =
LogSink( LogSink(
_.evalMap(ev => _.evalMap(ev =>
for { for {
@ -63,9 +63,6 @@ object LogSink {
) )
) )
def dbAndLog[F[_]: Concurrent](store: Store[F]): LogSink[F] = { def dbAndLog[F[_]: Async](store: Store[F]): LogSink[F] =
val s: Stream[F, Pipe[F, LogEvent, Unit]] = LogSink(_.broadcastThrough(printer[F].receive, db[F](store).receive))
Stream.emits(Seq(printer[F].receive, db[F](store).receive))
LogSink(Pipe.join(s))
}
} }

View File

@ -24,20 +24,19 @@ trait PeriodicScheduler[F[_]] {
def shutdown: F[Unit] def shutdown: F[Unit]
def periodicAwake: F[Fiber[F, Unit]] def periodicAwake: F[Fiber[F, Throwable, Unit]]
def notifyChange: F[Unit] def notifyChange: F[Unit]
} }
object PeriodicScheduler { object PeriodicScheduler {
def create[F[_]: ConcurrentEffect]( def create[F[_]: Async](
cfg: PeriodicSchedulerConfig, cfg: PeriodicSchedulerConfig,
sch: Scheduler[F], sch: Scheduler[F],
queue: JobQueue[F], queue: JobQueue[F],
store: PeriodicTaskStore[F], store: PeriodicTaskStore[F],
client: JoexClient[F], client: JoexClient[F]
timer: Timer[F]
): Resource[F, PeriodicScheduler[F]] = ): Resource[F, PeriodicScheduler[F]] =
for { for {
waiter <- Resource.eval(SignallingRef(true)) waiter <- Resource.eval(SignallingRef(true))
@ -49,8 +48,7 @@ object PeriodicScheduler {
store, store,
client, client,
waiter, waiter,
state, state
timer
) )
_ <- Resource.eval(psch.init) _ <- Resource.eval(psch.init)
} yield psch } yield psch

View File

@ -12,21 +12,19 @@ import docspell.joexapi.client.JoexClient
import docspell.store.queue._ import docspell.store.queue._
import docspell.store.records.RPeriodicTask import docspell.store.records.RPeriodicTask
import com.github.eikek.fs2calev._ import eu.timepit.fs2cron.calev.CalevScheduler
import org.log4s.getLogger import org.log4s.getLogger
final class PeriodicSchedulerImpl[F[_]: ConcurrentEffect]( final class PeriodicSchedulerImpl[F[_]: Async](
val config: PeriodicSchedulerConfig, val config: PeriodicSchedulerConfig,
sch: Scheduler[F], sch: Scheduler[F],
queue: JobQueue[F], queue: JobQueue[F],
store: PeriodicTaskStore[F], store: PeriodicTaskStore[F],
client: JoexClient[F], client: JoexClient[F],
waiter: SignallingRef[F, Boolean], waiter: SignallingRef[F, Boolean],
state: SignallingRef[F, State[F]], state: SignallingRef[F, State[F]]
timer: Timer[F]
) extends PeriodicScheduler[F] { ) extends PeriodicScheduler[F] {
private[this] val logger = getLogger private[this] val logger = getLogger
implicit private val _timer: Timer[F] = timer
def start: Stream[F, Nothing] = def start: Stream[F, Nothing] =
logger.sinfo("Starting periodic scheduler") ++ logger.sinfo("Starting periodic scheduler") ++
@ -35,8 +33,8 @@ final class PeriodicSchedulerImpl[F[_]: ConcurrentEffect](
def shutdown: F[Unit] = def shutdown: F[Unit] =
state.modify(_.requestShutdown) state.modify(_.requestShutdown)
def periodicAwake: F[Fiber[F, Unit]] = def periodicAwake: F[Fiber[F, Throwable, Unit]] =
ConcurrentEffect[F].start( Async[F].start(
Stream Stream
.awakeEvery[F](config.wakeupPeriod.toScala) .awakeEvery[F](config.wakeupPeriod.toScala)
.evalMap(_ => logger.fdebug("Periodic awake reached") *> notifyChange) .evalMap(_ => logger.fdebug("Periodic awake reached") *> notifyChange)
@ -127,10 +125,11 @@ final class PeriodicSchedulerImpl[F[_]: ConcurrentEffect](
s"Scheduling next notify for timer ${pj.timer.asString} -> ${pj.timer.nextElapse(now.toUtcDateTime)}" s"Scheduling next notify for timer ${pj.timer.asString} -> ${pj.timer.nextElapse(now.toUtcDateTime)}"
) )
) *> ) *>
ConcurrentEffect[F] Async[F]
.start( .start(
CalevFs2 CalevScheduler
.sleep[F](pj.timer) .utc[F]
.sleep(pj.timer)
.evalMap(_ => notifyChange) .evalMap(_ => notifyChange)
.compile .compile
.drain .drain
@ -168,15 +167,15 @@ object PeriodicSchedulerImpl {
case class State[F[_]]( case class State[F[_]](
shutdownRequest: Boolean, shutdownRequest: Boolean,
scheduledNotify: Option[Fiber[F, Unit]] scheduledNotify: Option[Fiber[F, Throwable, Unit]]
) { ) {
def requestShutdown: (State[F], Unit) = def requestShutdown: (State[F], Unit) =
(copy(shutdownRequest = true), ()) (copy(shutdownRequest = true), ())
def setNotify(fb: Fiber[F, Unit]): (State[F], Unit) = def setNotify(fb: Fiber[F, Throwable, Unit]): (State[F], Unit) =
(copy(scheduledNotify = Some(fb)), ()) (copy(scheduledNotify = Some(fb)), ())
def clearNotify: (State[F], Option[Fiber[F, Unit]]) = def clearNotify: (State[F], Option[Fiber[F, Throwable, Unit]]) =
(copy(scheduledNotify = None), scheduledNotify) (copy(scheduledNotify = None), scheduledNotify)
} }

View File

@ -1,8 +1,9 @@
package docspell.joex.scheduler package docspell.joex.scheduler
import cats.effect.{Concurrent, Sync} import cats.effect._
import cats.effect.std.Queue
import cats.implicits._ import cats.implicits._
import fs2.concurrent.Queue import fs2.Stream
import docspell.common._ import docspell.common._
@ -15,28 +16,28 @@ object QueueLogger {
): Logger[F] = ): Logger[F] =
new Logger[F] { new Logger[F] {
def trace(msg: => String): F[Unit] = def trace(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.enqueue1) LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.offer)
def debug(msg: => String): F[Unit] = def debug(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.enqueue1) LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.offer)
def info(msg: => String): F[Unit] = def info(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Info, msg).flatMap(q.enqueue1) LogEvent.create[F](jobId, jobInfo, LogLevel.Info, msg).flatMap(q.offer)
def warn(msg: => String): F[Unit] = def warn(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Warn, msg).flatMap(q.enqueue1) LogEvent.create[F](jobId, jobInfo, LogLevel.Warn, msg).flatMap(q.offer)
def error(ex: Throwable)(msg: => String): F[Unit] = def error(ex: Throwable)(msg: => String): F[Unit] =
LogEvent LogEvent
.create[F](jobId, jobInfo, LogLevel.Error, msg) .create[F](jobId, jobInfo, LogLevel.Error, msg)
.map(le => le.copy(ex = Some(ex))) .map(le => le.copy(ex = Some(ex)))
.flatMap(q.enqueue1) .flatMap(q.offer)
def error(msg: => String): F[Unit] = def error(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Error, msg).flatMap(q.enqueue1) LogEvent.create[F](jobId, jobInfo, LogLevel.Error, msg).flatMap(q.offer)
} }
def apply[F[_]: Concurrent]( def apply[F[_]: Async](
jobId: Ident, jobId: Ident,
jobInfo: String, jobInfo: String,
bufferSize: Int, bufferSize: Int,
@ -45,7 +46,9 @@ object QueueLogger {
for { for {
q <- Queue.circularBuffer[F, LogEvent](bufferSize) q <- Queue.circularBuffer[F, LogEvent](bufferSize)
log = create(jobId, jobInfo, q) log = create(jobId, jobInfo, q)
_ <- Concurrent[F].start(q.dequeue.through(sink.receive).compile.drain) _ <- Async[F].start(
Stream.fromQueueUnterminated(q).through(sink.receive).compile.drain
)
} yield log } yield log
} }

View File

@ -1,6 +1,6 @@
package docspell.joex.scheduler package docspell.joex.scheduler
import cats.effect.{Fiber, Timer} import cats.effect._
import fs2.Stream import fs2.Stream
import docspell.common.Ident import docspell.common.Ident
@ -30,5 +30,5 @@ trait Scheduler[F[_]] {
*/ */
def shutdown(cancelAll: Boolean): F[Unit] def shutdown(cancelAll: Boolean): F[Unit]
def periodicAwake(implicit T: Timer[F]): F[Fiber[F, Unit]] def periodicAwake: F[Fiber[F, Throwable, Unit]]
} }

View File

@ -1,18 +1,17 @@
package docspell.joex.scheduler package docspell.joex.scheduler
import cats.effect._ import cats.effect._
import cats.effect.concurrent.Semaphore import cats.effect.std.Semaphore
import cats.implicits._ import cats.implicits._
import fs2.concurrent.SignallingRef import fs2.concurrent.SignallingRef
import docspell.store.Store import docspell.store.Store
import docspell.store.queue.JobQueue import docspell.store.queue.JobQueue
case class SchedulerBuilder[F[_]: ConcurrentEffect: ContextShift]( case class SchedulerBuilder[F[_]: Async](
config: SchedulerConfig, config: SchedulerConfig,
tasks: JobTaskRegistry[F], tasks: JobTaskRegistry[F],
store: Store[F], store: Store[F],
blocker: Blocker,
queue: Resource[F, JobQueue[F]], queue: Resource[F, JobQueue[F]],
logSink: LogSink[F] logSink: LogSink[F]
) { ) {
@ -27,10 +26,7 @@ case class SchedulerBuilder[F[_]: ConcurrentEffect: ContextShift](
withTaskRegistry(tasks.withTask(task)) withTaskRegistry(tasks.withTask(task))
def withQueue(queue: Resource[F, JobQueue[F]]): SchedulerBuilder[F] = def withQueue(queue: Resource[F, JobQueue[F]]): SchedulerBuilder[F] =
SchedulerBuilder[F](config, tasks, store, blocker, queue, logSink) SchedulerBuilder[F](config, tasks, store, queue, logSink)
def withBlocker(blocker: Blocker): SchedulerBuilder[F] =
copy(blocker = blocker)
def withLogSink(sink: LogSink[F]): SchedulerBuilder[F] = def withLogSink(sink: LogSink[F]): SchedulerBuilder[F] =
copy(logSink = sink) copy(logSink = sink)
@ -39,19 +35,16 @@ case class SchedulerBuilder[F[_]: ConcurrentEffect: ContextShift](
copy(queue = Resource.pure[F, JobQueue[F]](queue)) copy(queue = Resource.pure[F, JobQueue[F]](queue))
def serve: Resource[F, Scheduler[F]] = def serve: Resource[F, Scheduler[F]] =
resource.evalMap(sch => resource.evalMap(sch => Async[F].start(sch.start.compile.drain).map(_ => sch))
ConcurrentEffect[F].start(sch.start.compile.drain).map(_ => sch)
)
def resource: Resource[F, Scheduler[F]] = { def resource: Resource[F, Scheduler[F]] = {
val scheduler = for { val scheduler: Resource[F, SchedulerImpl[F]] = for {
jq <- queue jq <- queue
waiter <- Resource.eval(SignallingRef(true)) waiter <- Resource.eval(SignallingRef(true))
state <- Resource.eval(SignallingRef(SchedulerImpl.emptyState[F])) state <- Resource.eval(SignallingRef(SchedulerImpl.emptyState[F]))
perms <- Resource.eval(Semaphore(config.poolSize.toLong)) perms <- Resource.eval(Semaphore(config.poolSize.toLong))
} yield new SchedulerImpl[F]( } yield new SchedulerImpl[F](
config, config,
blocker,
jq, jq,
tasks, tasks,
store, store,
@ -68,16 +61,14 @@ case class SchedulerBuilder[F[_]: ConcurrentEffect: ContextShift](
object SchedulerBuilder { object SchedulerBuilder {
def apply[F[_]: ConcurrentEffect: ContextShift]( def apply[F[_]: Async](
config: SchedulerConfig, config: SchedulerConfig,
blocker: Blocker,
store: Store[F] store: Store[F]
): SchedulerBuilder[F] = ): SchedulerBuilder[F] =
new SchedulerBuilder[F]( new SchedulerBuilder[F](
config, config,
JobTaskRegistry.empty[F], JobTaskRegistry.empty[F],
store, store,
blocker,
JobQueue(store), JobQueue(store),
LogSink.db[F](store) LogSink.db[F](store)
) )

View File

@ -2,7 +2,7 @@ package docspell.joex.scheduler
import cats.data.OptionT import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.effect.concurrent.Semaphore import cats.effect.std.Semaphore
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
import fs2.concurrent.SignallingRef import fs2.concurrent.SignallingRef
@ -17,9 +17,8 @@ import docspell.store.records.RJob
import org.log4s._ import org.log4s._
final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift]( final class SchedulerImpl[F[_]: Async](
val config: SchedulerConfig, val config: SchedulerConfig,
blocker: Blocker,
queue: JobQueue[F], queue: JobQueue[F],
tasks: JobTaskRegistry[F], tasks: JobTaskRegistry[F],
store: Store[F], store: Store[F],
@ -37,8 +36,8 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
def init: F[Unit] = def init: F[Unit] =
QJob.runningToWaiting(config.name, store) QJob.runningToWaiting(config.name, store)
def periodicAwake(implicit T: Timer[F]): F[Fiber[F, Unit]] = def periodicAwake: F[Fiber[F, Throwable, Unit]] =
ConcurrentEffect[F].start( Async[F].start(
Stream Stream
.awakeEvery[F](config.wakeupPeriod.toScala) .awakeEvery[F](config.wakeupPeriod.toScala)
.evalMap(_ => logger.fdebug("Periodic awake reached") *> notifyChange) .evalMap(_ => logger.fdebug("Periodic awake reached") *> notifyChange)
@ -153,7 +152,7 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
for { for {
_ <- _ <-
logger.fdebug(s"Creating context for job ${job.info} to run cancellation $t") logger.fdebug(s"Creating context for job ${job.info} to run cancellation $t")
ctx <- Context[F, String](job, job.args, config, logSink, blocker, store) ctx <- Context[F, String](job, job.args, config, logSink, store)
_ <- t.onCancel.run(ctx) _ <- t.onCancel.run(ctx)
_ <- state.modify(_.markCancelled(job)) _ <- state.modify(_.markCancelled(job))
_ <- onFinish(job, JobState.Cancelled) _ <- onFinish(job, JobState.Cancelled)
@ -177,7 +176,7 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
case Right(t) => case Right(t) =>
for { for {
_ <- logger.fdebug(s"Creating context for job ${job.info} to run $t") _ <- logger.fdebug(s"Creating context for job ${job.info} to run $t")
ctx <- Context[F, String](job, job.args, config, logSink, blocker, store) ctx <- Context[F, String](job, job.args, config, logSink, store)
jot = wrapTask(job, t.task, ctx) jot = wrapTask(job, t.task, ctx)
tok <- forkRun(job, jot.run(ctx), t.onCancel.run(ctx), ctx) tok <- forkRun(job, jot.run(ctx), t.onCancel.run(ctx), ctx)
_ <- state.modify(_.addRunning(job, tok)) _ <- state.modify(_.addRunning(job, tok))
@ -208,9 +207,7 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
ctx: Context[F, String] ctx: Context[F, String]
): Task[F, String, Unit] = ): Task[F, String, Unit] =
task task
.mapF(fa => .mapF(fa => onStart(job) *> logger.fdebug("Starting task now") *> fa)
onStart(job) *> logger.fdebug("Starting task now") *> blocker.blockOn(fa)
)
.mapF(_.attempt.flatMap({ .mapF(_.attempt.flatMap({
case Right(()) => case Right(()) =>
logger.info(s"Job execution successful: ${job.info}") logger.info(s"Job execution successful: ${job.info}")
@ -252,11 +249,10 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
code: F[Unit], code: F[Unit],
onCancel: F[Unit], onCancel: F[Unit],
ctx: Context[F, String] ctx: Context[F, String]
): F[F[Unit]] = { ): F[F[Unit]] =
val bfa = blocker.blockOn(code)
logger.fdebug(s"Forking job ${job.info}") *> logger.fdebug(s"Forking job ${job.info}") *>
ConcurrentEffect[F] Async[F]
.start(bfa) .start(code)
.map(fiber => .map(fiber =>
logger.fdebug(s"Cancelling job ${job.info}") *> logger.fdebug(s"Cancelling job ${job.info}") *>
fiber.cancel *> fiber.cancel *>
@ -271,11 +267,12 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
ctx.logger.warn("Job has been cancelled.") *> ctx.logger.warn("Job has been cancelled.") *>
logger.fdebug(s"Job ${job.info} has been cancelled.") logger.fdebug(s"Job ${job.info} has been cancelled.")
) )
}
} }
object SchedulerImpl { object SchedulerImpl {
type CancelToken[F[_]] = F[Unit]
def emptyState[F[_]]: State[F] = def emptyState[F[_]]: State[F] =
State(Map.empty, Set.empty, Map.empty, false) State(Map.empty, Set.empty, Map.empty, false)

View File

@ -9,9 +9,9 @@ import docspell.common.syntax.all._
import docspell.common.{Ident, LenientUri} import docspell.common.{Ident, LenientUri}
import docspell.joexapi.model.BasicResult import docspell.joexapi.model.BasicResult
import org.http4s.circe.CirceEntityDecoder._ import org.http4s.blaze.client.BlazeClientBuilder
import org.http4s.circe.CirceEntityDecoder
import org.http4s.client.Client import org.http4s.client.Client
import org.http4s.client.blaze.BlazeClientBuilder
import org.http4s.{Method, Request, Uri} import org.http4s.{Method, Request, Uri}
import org.log4s.getLogger import org.log4s.getLogger
@ -29,8 +29,9 @@ object JoexClient {
private[this] val logger = getLogger private[this] val logger = getLogger
def apply[F[_]: Sync](client: Client[F]): JoexClient[F] = def apply[F[_]: Async](client: Client[F]): JoexClient[F] =
new JoexClient[F] { new JoexClient[F] with CirceEntityDecoder {
def notifyJoex(base: LenientUri): F[BasicResult] = { def notifyJoex(base: LenientUri): F[BasicResult] = {
val notifyUrl = base / "api" / "v1" / "notify" val notifyUrl = base / "api" / "v1" / "notify"
val req = Request[F](Method.POST, uri(notifyUrl)) val req = Request[F](Method.POST, uri(notifyUrl))
@ -62,6 +63,6 @@ object JoexClient {
Uri.unsafeFromString(u.asString) Uri.unsafeFromString(u.asString)
} }
def resource[F[_]: ConcurrentEffect](ec: ExecutionContext): Resource[F, JoexClient[F]] = def resource[F[_]: Async](ec: ExecutionContext): Resource[F, JoexClient[F]] =
BlazeClientBuilder[F](ec).resource.map(apply[F]) BlazeClientBuilder[F](ec).resource.map(apply[F])
} }

View File

@ -1,12 +1,12 @@
package docspell.restserver package docspell.restserver
import java.net.InetAddress
import docspell.backend.auth.Login import docspell.backend.auth.Login
import docspell.backend.{Config => BackendConfig} import docspell.backend.{Config => BackendConfig}
import docspell.common._ import docspell.common._
import docspell.ftssolr.SolrConfig import docspell.ftssolr.SolrConfig
import com.comcast.ip4s.IpAddress
case class Config( case class Config(
appName: String, appName: String,
appId: Ident, appId: Ident,
@ -42,12 +42,14 @@ object Config {
case class HttpHeader(enabled: Boolean, headerName: String, headerValue: String) case class HttpHeader(enabled: Boolean, headerName: String, headerValue: String)
case class AllowedIps(enabled: Boolean, ips: Set[String]) { case class AllowedIps(enabled: Boolean, ips: Set[String]) {
def containsAddress(inet: InetAddress): Boolean = { def containsAddress(inet: IpAddress): Boolean = {
val ip = inet.getHostAddress val ip = inet.fold(_.toUriString, _.toUriString) //.getHostAddress
lazy val ipParts = ip.split('.') lazy val ipParts = ip.split('.')
def checkSingle(pattern: String): Boolean = def checkSingle(pattern: String): Boolean =
pattern == ip || (inet.isLoopbackAddress && pattern == "127.0.0.1") || (pattern pattern == ip || (ip.contains(
"localhost"
) && pattern == "127.0.0.1") || (pattern
.split('.') .split('.')
.zip(ipParts) .zip(ipParts)
.foldLeft(true) { case (r, (a, b)) => .foldLeft(true) { case (r, (a, b)) =>

View File

@ -52,9 +52,8 @@ object Main extends IOApp {
val pools = for { val pools = for {
cec <- connectEC cec <- connectEC
bec <- blockingEC bec <- blockingEC
blocker = Blocker.liftExecutorService(bec)
rec <- restserverEC rec <- restserverEC
} yield Pools(cec, bec, blocker, rec) } yield Pools(cec, bec, rec)
logger.info(s"\n${banner.render("***>")}") logger.info(s"\n${banner.render("***>")}")
if (EnvMode.current.isDev) { if (EnvMode.current.isDev) {

View File

@ -24,21 +24,20 @@ final class RestAppImpl[F[_]](val config: Config, val backend: BackendApp[F])
object RestAppImpl { object RestAppImpl {
def create[F[_]: ConcurrentEffect: ContextShift]( def create[F[_]: Async](
cfg: Config, cfg: Config,
connectEC: ExecutionContext, connectEC: ExecutionContext,
httpClientEc: ExecutionContext, httpClientEc: ExecutionContext
blocker: Blocker
): Resource[F, RestApp[F]] = ): Resource[F, RestApp[F]] =
for { for {
backend <- BackendApp(cfg.backend, connectEC, httpClientEc, blocker)( backend <- BackendApp(cfg.backend, connectEC, httpClientEc)(
createFtsClient[F](cfg) createFtsClient[F](cfg)
) )
app = new RestAppImpl[F](cfg, backend) app = new RestAppImpl[F](cfg, backend)
appR <- Resource.make(app.init.map(_ => app))(_.shutdown) appR <- Resource.make(app.init.map(_ => app))(_.shutdown)
} yield appR } yield appR
private def createFtsClient[F[_]: ConcurrentEffect]( private def createFtsClient[F[_]: Async](
cfg: Config cfg: Config
)(client: Client[F]): Resource[F, FtsClient[F]] = )(client: Client[F]): Resource[F, FtsClient[F]] =
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client) if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)

View File

@ -11,36 +11,33 @@ import docspell.restserver.routes._
import docspell.restserver.webapp._ import docspell.restserver.webapp._
import org.http4s._ import org.http4s._
import org.http4s.blaze.server.BlazeServerBuilder
import org.http4s.dsl.Http4sDsl import org.http4s.dsl.Http4sDsl
import org.http4s.headers.Location import org.http4s.headers.Location
import org.http4s.implicits._ import org.http4s.implicits._
import org.http4s.server.Router import org.http4s.server.Router
import org.http4s.server.blaze.BlazeServerBuilder
import org.http4s.server.middleware.Logger import org.http4s.server.middleware.Logger
object RestServer { object RestServer {
def stream[F[_]: ConcurrentEffect]( def stream[F[_]: Async](cfg: Config, pools: Pools): Stream[F, Nothing] = {
cfg: Config,
pools: Pools
)(implicit T: Timer[F], CS: ContextShift[F]): Stream[F, Nothing] = {
val templates = TemplateRoutes[F](pools.blocker, cfg) val templates = TemplateRoutes[F](cfg)
val app = for { val app = for {
restApp <- restApp <-
RestAppImpl RestAppImpl
.create[F](cfg, pools.connectEC, pools.httpClientEC, pools.blocker) .create[F](cfg, pools.connectEC, pools.httpClientEC)
httpApp = Router( httpApp = Router(
"/api/info" -> routes.InfoRoutes(), "/api/info" -> routes.InfoRoutes(),
"/api/v1/open/" -> openRoutes(cfg, restApp), "/api/v1/open/" -> openRoutes(cfg, restApp),
"/api/v1/sec/" -> Authenticate(restApp.backend.login, cfg.auth) { token => "/api/v1/sec/" -> Authenticate(restApp.backend.login, cfg.auth) { token =>
securedRoutes(cfg, pools, restApp, token) securedRoutes(cfg, restApp, token)
}, },
"/api/v1/admin" -> AdminRoutes(cfg.adminEndpoint) { "/api/v1/admin" -> AdminRoutes(cfg.adminEndpoint) {
adminRoutes(cfg, restApp) adminRoutes(cfg, restApp)
}, },
"/api/doc" -> templates.doc, "/api/doc" -> templates.doc,
"/app/assets" -> EnvMiddleware(WebjarRoutes.appRoutes[F](pools.blocker)), "/app/assets" -> EnvMiddleware(WebjarRoutes.appRoutes[F]),
"/app" -> EnvMiddleware(templates.app), "/app" -> EnvMiddleware(templates.app),
"/sw.js" -> EnvMiddleware(templates.serviceWorker), "/sw.js" -> EnvMiddleware(templates.serviceWorker),
"/" -> redirectTo("/app") "/" -> redirectTo("/app")
@ -61,9 +58,8 @@ object RestServer {
) )
}.drain }.drain
def securedRoutes[F[_]: Effect: ContextShift]( def securedRoutes[F[_]: Async](
cfg: Config, cfg: Config,
pools: Pools,
restApp: RestApp[F], restApp: RestApp[F],
token: AuthToken token: AuthToken
): HttpRoutes[F] = ): HttpRoutes[F] =
@ -77,9 +73,9 @@ object RestServer {
"user" -> UserRoutes(restApp.backend, token), "user" -> UserRoutes(restApp.backend, token),
"collective" -> CollectiveRoutes(restApp.backend, token), "collective" -> CollectiveRoutes(restApp.backend, token),
"queue" -> JobQueueRoutes(restApp.backend, token), "queue" -> JobQueueRoutes(restApp.backend, token),
"item" -> ItemRoutes(cfg, pools.blocker, restApp.backend, token), "item" -> ItemRoutes(cfg, restApp.backend, token),
"items" -> ItemMultiRoutes(restApp.backend, token), "items" -> ItemMultiRoutes(restApp.backend, token),
"attachment" -> AttachmentRoutes(pools.blocker, restApp.backend, token), "attachment" -> AttachmentRoutes(restApp.backend, token),
"attachments" -> AttachmentMultiRoutes(restApp.backend, token), "attachments" -> AttachmentMultiRoutes(restApp.backend, token),
"upload" -> UploadRoutes.secured(restApp.backend, cfg, token), "upload" -> UploadRoutes.secured(restApp.backend, cfg, token),
"checkfile" -> CheckFileRoutes.secured(restApp.backend, token), "checkfile" -> CheckFileRoutes.secured(restApp.backend, token),
@ -95,7 +91,7 @@ object RestServer {
"clientSettings" -> ClientSettingsRoutes(restApp.backend, token) "clientSettings" -> ClientSettingsRoutes(restApp.backend, token)
) )
def openRoutes[F[_]: Effect](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] = def openRoutes[F[_]: Async](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
Router( Router(
"auth" -> LoginRoutes.login(restApp.backend.login, cfg), "auth" -> LoginRoutes.login(restApp.backend.login, cfg),
"signup" -> RegisterRoutes(restApp.backend, cfg), "signup" -> RegisterRoutes(restApp.backend, cfg),
@ -104,14 +100,14 @@ object RestServer {
"integration" -> IntegrationEndpointRoutes.open(restApp.backend, cfg) "integration" -> IntegrationEndpointRoutes.open(restApp.backend, cfg)
) )
def adminRoutes[F[_]: Effect](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] = def adminRoutes[F[_]: Async](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
Router( Router(
"fts" -> FullTextIndexRoutes.admin(cfg, restApp.backend), "fts" -> FullTextIndexRoutes.admin(cfg, restApp.backend),
"user" -> UserRoutes.admin(restApp.backend), "user" -> UserRoutes.admin(restApp.backend),
"info" -> InfoRoutes.admin(cfg) "info" -> InfoRoutes.admin(cfg)
) )
def redirectTo[F[_]: Effect](path: String): HttpRoutes[F] = { def redirectTo[F[_]: Async](path: String): HttpRoutes[F] = {
val dsl = new Http4sDsl[F] {} val dsl = new Http4sDsl[F] {}
import dsl._ import dsl._
@ -119,7 +115,7 @@ object RestServer {
Response[F]( Response[F](
Status.SeeOther, Status.SeeOther,
body = Stream.empty, body = Stream.empty,
headers = Headers.of(Location(Uri(path = path))) headers = Headers(Location(Uri(path = Uri.Path.unsafeFromString(path))))
).pure[F] ).pure[F]
} }
} }

View File

@ -5,7 +5,7 @@ import docspell.common.AccountId
import docspell.common.LenientUri import docspell.common.LenientUri
import org.http4s._ import org.http4s._
import org.http4s.util._ import org.typelevel.ci.CIString
case class CookieData(auth: AuthToken) { case class CookieData(auth: AuthToken) {
def accountId: AccountId = auth.account def accountId: AccountId = auth.account
@ -37,7 +37,7 @@ object CookieData {
def fromCookie[F[_]](req: Request[F]): Either[String, String] = def fromCookie[F[_]](req: Request[F]): Either[String, String] =
for { for {
header <- headers.Cookie.from(req.headers).toRight("Cookie parsing error") header <- req.headers.get[headers.Cookie].toRight("Cookie parsing error")
cookie <- cookie <-
header.values.toList header.values.toList
.find(_.name == cookieName) .find(_.name == cookieName)
@ -46,8 +46,8 @@ object CookieData {
def fromHeader[F[_]](req: Request[F]): Either[String, String] = def fromHeader[F[_]](req: Request[F]): Either[String, String] =
req.headers req.headers
.get(CaseInsensitiveString(headerName)) .get(CIString(headerName))
.map(_.value) .map(_.head.value)
.toRight("Couldn't find an authenticator") .toRight("Couldn't find an authenticator")
def deleteCookie(baseUrl: LenientUri): ResponseCookie = def deleteCookie(baseUrl: LenientUri): ResponseCookie =

View File

@ -33,7 +33,7 @@ object RememberCookieData {
def fromCookie[F[_]](req: Request[F]): Option[String] = def fromCookie[F[_]](req: Request[F]): Option[String] =
for { for {
header <- headers.Cookie.from(req.headers) header <- req.headers.get[headers.Cookie]
cookie <- header.values.toList.find(_.name == cookieName) cookie <- header.values.toList.find(_.name == cookieName)
} yield cookie.content } yield cookie.content

View File

@ -2,7 +2,7 @@ package docspell.restserver.conv
import java.time.{LocalDate, ZoneId} import java.time.{LocalDate, ZoneId}
import cats.effect.{Effect, Sync} import cats.effect.{Async, Sync}
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
@ -294,7 +294,7 @@ trait Conversions {
JobLogEvent(jl.created, jl.level, jl.message) JobLogEvent(jl.created, jl.level, jl.message)
// upload // upload
def readMultipart[F[_]: Effect]( def readMultipart[F[_]: Async](
mp: Multipart[F], mp: Multipart[F],
sourceName: String, sourceName: String,
logger: Logger, logger: Logger,
@ -347,11 +347,11 @@ trait Conversions {
.filter(p => p.name.forall(s => !s.equalsIgnoreCase("meta"))) .filter(p => p.name.forall(s => !s.equalsIgnoreCase("meta")))
.map(p => .map(p =>
OUpload OUpload
.File(p.filename, p.headers.get(`Content-Type`).map(fromContentType), p.body) .File(p.filename, p.headers.get[`Content-Type`].map(fromContentType), p.body)
) )
for { for {
metaData <- meta metaData <- meta
_ <- Effect[F].delay(logger.debug(s"Parsed upload meta data: $metaData")) _ <- Async[F].delay(logger.debug(s"Parsed upload meta data: $metaData"))
tracker <- Ident.randomId[F] tracker <- Ident.randomId[F]
} yield UploadData(metaData._1, metaData._2, files, prio, Some(tracker)) } yield UploadData(metaData._1, metaData._2, files, prio, Some(tracker))
} }

Some files were not shown because too many files have changed in this diff Show More