mirror of
synced 2025-03-25 16:45:05 +00:00
Upgrade code base to CE3
This commit is contained in:
@ -1,7 +1,3 @@
updates.ignore = [
{ groupId = "org.apache.poi" },
updates.pin = [
{ groupId = "co.fs2", version = "2." }
@ -32,10 +32,7 @@ object TextAnalyser {
labels ++ dates.map(dl => dl.label.copy(label = dl.date.toString))
def create[F[_]: Concurrent: Timer: ContextShift](
cfg: TextAnalysisConfig,
blocker: Blocker
): Resource[F, TextAnalyser[F]] =
def create[F[_]: Async](cfg: TextAnalysisConfig): Resource[F, TextAnalyser[F]] =
.map(stanfordNer =>
@ -56,7 +53,7 @@ object TextAnalyser {
} yield Result(spans ++ list, dates)
def classifier: TextClassifier[F] =
new StanfordTextClassifier[F](cfg.classifier, blocker)
new StanfordTextClassifier[F](cfg.classifier)
private def textLimit(logger: Logger[F], text: String): F[String] =
if (cfg.maxLength <= 0)
@ -82,7 +79,7 @@ object TextAnalyser {
/** Provides the nlp pipeline based on the configuration. */
private object Nlp {
def apply[F[_]: Concurrent: Timer](
def apply[F[_]: Async](
cfg: TextAnalysisConfig.NlpConfig
): F[Input[F] => F[Vector[NerLabel]]] =
cfg.mode match {
@ -104,7 +101,7 @@ object TextAnalyser {
text: String
def annotate[F[_]: BracketThrow](
def annotate[F[_]: Async](
cache: PipelineCache[F]
)(input: Input[F]): F[Vector[NerLabel]] =
@ -2,10 +2,11 @@ package docspell.analysis.classifier
import java.nio.file.Path
import cats.effect.Ref
import cats.effect._
import cats.effect.concurrent.Ref
import cats.implicits._
import fs2.Stream
import fs2.io.file.Files
import docspell.analysis.classifier
import docspell.analysis.classifier.TextClassifier._
@ -15,10 +16,8 @@ import docspell.common.syntax.FileSyntax._
import edu.stanford.nlp.classify.ColumnDataClassifier
final class StanfordTextClassifier[F[_]: Sync: ContextShift](
cfg: TextClassifierConfig,
blocker: Blocker
) extends TextClassifier[F] {
final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
extends TextClassifier[F] {
def trainClassifier[A](
logger: Logger[F],
@ -28,7 +27,7 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
.withTempDir(cfg.workingDir, "trainclassifier")
.use { dir =>
for {
rawData <- writeDataFile(blocker, dir, data)
rawData <- writeDataFile(dir, data)
_ <- logger.debug(s"Learning from ${rawData.count} items.")
trainData <- splitData(logger, rawData)
scores <- cfg.classifierConfigs.traverse(m => train(logger, trainData, m))
@ -81,8 +80,8 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
TrainData(in.file.resolveSibling("train.txt"), in.file.resolveSibling("test.txt"))
val fileLines =
.readAll(in.file, blocker, 4096)
.readAll[F](in.file, 4096)
@ -95,7 +94,7 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
.through(fs2.io.file.writeAll(td.test, blocker))
_ <-
@ -103,13 +102,13 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
.through(fs2.io.file.writeAll(td.train, blocker))
} yield td
def writeDataFile(blocker: Blocker, dir: Path, data: Stream[F, Data]): F[RawData] = {
def writeDataFile(dir: Path, data: Stream[F, Data]): F[RawData] = {
val target = dir.resolve("rawdata")
for {
counter <- Ref.of[F, Long](0L)
@ -120,7 +119,7 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
.evalTap(_ => counter.update(_ + 1))
.through(fs2.io.file.writeAll(target, blocker))
lines <- counter.get
@ -19,7 +19,7 @@ object DateFind {
.splitToken(text, " \t.,\n\r/".toSet)
.filter(w => lang != Language.Latvian || w.value != "gada")
.filter(_.length == 3)
.filter(_.size == 3)
.flatMap(q =>
@ -28,9 +28,9 @@ object DateFind {
text.substring(q.head.begin, q(2).end),
text.substring(q.head.get.begin, q(2).end),
@ -2,9 +2,8 @@ package docspell.analysis.nlp
import scala.concurrent.duration.{Duration => _, _}
import cats.Applicative
import cats.effect.Ref
import cats.effect._
import cats.effect.concurrent.Ref
import cats.implicits._
import docspell.analysis.NlpSettings
@ -28,7 +27,7 @@ trait PipelineCache[F[_]] {
object PipelineCache {
private[this] val logger = getLogger
def apply[F[_]: Concurrent: Timer](clearInterval: Duration)(
def apply[F[_]: Async](clearInterval: Duration)(
creator: NlpSettings => Annotator[F],
release: F[Unit]
): F[PipelineCache[F]] =
@ -38,7 +37,7 @@ object PipelineCache {
_ <- Logger.log4s(logger).info("Creating nlp pipeline cache")
} yield new Impl[F](data, creator, cacheClear)
final private class Impl[F[_]: Sync](
final private class Impl[F[_]: Async](
data: Ref[F, Map[String, Entry[Annotator[F]]]],
creator: NlpSettings => Annotator[F],
cacheClear: CacheClearing[F]
@ -97,20 +96,20 @@ object PipelineCache {
object CacheClearing {
def none[F[_]: Applicative]: CacheClearing[F] =
def none[F[_]]: CacheClearing[F] =
new CacheClearing[F] {
def withCache: Resource[F, Unit] =
Resource.pure[F, Unit](())
def create[F[_]: Concurrent: Timer, A](
def create[F[_]: Async, A](
data: Ref[F, Map[String, Entry[A]]],
interval: Duration,
release: F[Unit]
): F[CacheClearing[F]] =
for {
counter <- Ref.of(0L)
cleaning <- Ref.of(None: Option[Fiber[F, Unit]])
cleaning <- Ref.of(None: Option[Fiber[F, Throwable, Unit]])
log = Logger.log4s(logger)
result <-
if (interval.millis <= 0)
@ -135,10 +134,10 @@ object PipelineCache {
final private class CacheClearingImpl[F[_], A](
data: Ref[F, Map[String, Entry[A]]],
counter: Ref[F, Long],
cleaningFiber: Ref[F, Option[Fiber[F, Unit]]],
cleaningFiber: Ref[F, Option[Fiber[F, Throwable, Unit]]],
clearInterval: FiniteDuration,
release: F[Unit]
)(implicit T: Timer[F], F: Concurrent[F])
)(implicit F: Async[F])
extends CacheClearing[F] {
private[this] val log = Logger.log4s[F](logger)
@ -157,8 +156,8 @@ object PipelineCache {
case None => ().pure[F]
private def clearAllLater: F[Fiber[F, Unit]] =
F.start(T.sleep(clearInterval) *> clearAll)
private def clearAllLater: F[Fiber[F, Throwable, Unit]] =
F.start(F.sleep(clearInterval) *> clearAll)
private def logDontClear: F[Unit] =
log.info("Cancel stanford cache clearing, as it has been used in between.")
@ -2,12 +2,12 @@ package docspell.analysis.classifier
import java.nio.file.Paths
import scala.concurrent.ExecutionContext
import cats.data.Kleisli
import cats.data.NonEmptyList
import cats.effect._
import cats.effect.unsafe.implicits.global
import fs2.Stream
import fs2.io.file.Files
import docspell.analysis.classifier.TextClassifier.Data
import docspell.common._
@ -17,8 +17,6 @@ import munit._
class StanfordTextClassifierSuite extends FunSuite {
val logger = Logger.log4s[IO](org.log4s.getLogger)
implicit val CS = IO.contextShift(ExecutionContext.global)
test("learn from data") {
val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map()))
@ -38,34 +36,30 @@ class StanfordTextClassifierSuite extends FunSuite {
val modelExists =
Blocker[IO].use { blocker =>
val classifier = new StanfordTextClassifier[IO](cfg, blocker)
classifier.trainClassifier[Boolean](logger, data)(
Kleisli(result => File.existsNonEmpty[IO](result.model))
val modelExists = {
val classifier = new StanfordTextClassifier[IO](cfg)
classifier.trainClassifier[Boolean](logger, data)(
Kleisli(result => File.existsNonEmpty[IO](result.model))
assertEquals(modelExists.unsafeRunSync(), true)
test("run classifier") {
val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map()))
val things = for {
dir <- File.withTempDir[IO](Paths.get("target"), "testcls")
blocker <- Blocker[IO]
} yield (dir, blocker)
val cfg = TextClassifierConfig(Paths.get("target"), NonEmptyList.of(Map()))
val things = File.withTempDir[IO](Paths.get("target"), "testcls")
.use { case (dir, blocker) =>
val classifier = new StanfordTextClassifier[IO](cfg, blocker)
.use { dir =>
val classifier = new StanfordTextClassifier[IO](cfg)
val modelFile = dir.resolve("test.ser.gz")
for {
_ <-
.readURL[IO](4096, blocker)
.through(fs2.io.file.writeAll(modelFile, blocker))
model = ClassifierModel(modelFile)
@ -3,6 +3,7 @@ package docspell.analysis.nlp
import java.nio.file.Paths
import cats.effect.IO
import cats.effect.unsafe.implicits.global
import docspell.analysis.Env
import docspell.common._
@ -14,8 +14,8 @@ import docspell.store.queue.JobQueue
import docspell.store.usertask.UserTaskStore
import emil.javamail.{JavaMailEmil, Settings}
import org.http4s.blaze.client.BlazeClientBuilder
import org.http4s.client.Client
import org.http4s.client.blaze.BlazeClientBuilder
trait BackendApp[F[_]] {
@ -43,12 +43,11 @@ trait BackendApp[F[_]] {
object BackendApp {
def create[F[_]: ConcurrentEffect: ContextShift](
def create[F[_]: Async](
cfg: Config,
store: Store[F],
httpClient: Client[F],
ftsClient: FtsClient[F],
blocker: Blocker
ftsClient: FtsClient[F]
): Resource[F, BackendApp[F]] =
for {
utStore <- UserTaskStore(store)
@ -68,7 +67,7 @@ object BackendApp {
itemSearchImpl <- OItemSearch(store)
fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl)
javaEmil =
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
JavaMailEmil(Settings.defaultSettings.copy(debug = cfg.mailDebug))
mailImpl <- OMail(store, javaEmil)
userTaskImpl <- OUserTask(utStore, queue, joexImpl)
folderImpl <- OFolder(store)
@ -98,16 +97,15 @@ object BackendApp {
val clientSettings = clientSettingsImpl
def apply[F[_]: ConcurrentEffect: ContextShift](
def apply[F[_]: Async](
cfg: Config,
connectEC: ExecutionContext,
httpClientEc: ExecutionContext,
blocker: Blocker
httpClientEc: ExecutionContext
)(ftsFactory: Client[F] => Resource[F, FtsClient[F]]): Resource[F, BackendApp[F]] =
for {
store <- Store.create(cfg.jdbc, connectEC, blocker)
store <- Store.create(cfg.jdbc, connectEC)
httpClient <- BlazeClientBuilder[F](httpClientEc).resource
ftsClient <- ftsFactory(httpClient)
backend <- create(cfg, store, httpClient, ftsClient, blocker)
backend <- create(cfg, store, httpClient, ftsClient)
} yield backend
@ -69,7 +69,7 @@ object Login {
def invalidTime: Result = InvalidTime
def apply[F[_]: Effect](store: Store[F]): Resource[F, Login[F]] =
def apply[F[_]: Async](store: Store[F]): Resource[F, Login[F]] =
Resource.pure[F, Login[F]](new Login[F] {
private val logF = Logger.log4s(logger)
@ -1,7 +1,7 @@
package docspell.backend.ops
import cats.data.OptionT
import cats.effect.{Effect, Resource}
import cats.effect.{Async, Resource}
import cats.implicits._
import docspell.common.AccountId
@ -25,7 +25,7 @@ trait OClientSettings[F[_]] {
object OClientSettings {
private[this] val logger = getLogger
def apply[F[_]: Effect](store: Store[F]): Resource[F, OClientSettings[F]] =
def apply[F[_]: Async](store: Store[F]): Resource[F, OClientSettings[F]] =
Resource.pure[F, OClientSettings[F]](new OClientSettings[F] {
private def getUserId(account: AccountId): OptionT[F, Ident] =
@ -58,7 +58,7 @@ object OClientSettings {
store.transact(RClientSettings.upsert(clientId, userId, data))
_ <- OptionT.liftF(
if (n <= 0) Effect[F].raiseError(new Exception("No rows updated!"))
if (n <= 0) Async[F].raiseError(new Exception("No rows updated!"))
else ().pure[F]
} yield ()).getOrElse(())
@ -1,6 +1,6 @@
package docspell.backend.ops
import cats.effect.{Effect, Resource}
import cats.effect.{Async, Resource}
import cats.implicits._
import fs2.Stream
@ -126,7 +126,7 @@ object OCollective {
def apply[F[_]: Effect](
def apply[F[_]: Async](
store: Store[F],
uts: UserTaskStore[F],
queue: JobQueue[F],
@ -87,7 +87,7 @@ object OCustomFields {
collective: Ident
def apply[F[_]: Effect](
def apply[F[_]: Async](
store: Store[F]
): Resource[F, OCustomFields[F]] =
Resource.pure[F, OCustomFields[F]](new OCustomFields[F] {
@ -1,6 +1,6 @@
package docspell.backend.ops
import cats.effect.{Effect, Resource}
import cats.effect.{Async, Resource}
import cats.implicits._
import docspell.common.{AccountId, Ident}
@ -22,7 +22,7 @@ trait OEquipment[F[_]] {
object OEquipment {
def apply[F[_]: Effect](store: Store[F]): Resource[F, OEquipment[F]] =
def apply[F[_]: Async](store: Store[F]): Resource[F, OEquipment[F]] =
Resource.pure[F, OEquipment[F]](new OEquipment[F] {
def findAll(account: AccountId, nameQuery: Option[String]): F[Vector[REquipment]] =
store.transact(REquipment.findAll(account.collective, nameQuery, _.name))
@ -55,7 +55,7 @@ object OFolder {
type FolderDetail = QFolder.FolderDetail
val FolderDetail = QFolder.FolderDetail
def apply[F[_]: Effect](store: Store[F]): Resource[F, OFolder[F]] =
def apply[F[_]](store: Store[F]): Resource[F, OFolder[F]] =
Resource.pure[F, OFolder[F]](new OFolder[F] {
def findAll(
account: AccountId,
@ -77,7 +77,7 @@ object OFulltext {
case class FtsItem(item: ListItem, ftsData: FtsData)
case class FtsItemWithTags(item: ListItemWithTags, ftsData: FtsData)
def apply[F[_]: Effect](
def apply[F[_]: Async](
itemSearch: OItemSearch[F],
fts: FtsClient[F],
store: Store[F],
@ -1,7 +1,7 @@
package docspell.backend.ops
import cats.data.{NonEmptyList, OptionT}
import cats.effect.{Effect, Resource}
import cats.effect.{Async, Resource}
import cats.implicits._
import docspell.backend.JobFactory
@ -191,7 +191,7 @@ trait OItem[F[_]] {
object OItem {
def apply[F[_]: Effect](
def apply[F[_]: Async](
store: Store[F],
fts: FtsClient[F],
queue: JobQueue[F],
@ -1,7 +1,7 @@
package docspell.backend.ops
import cats.data.OptionT
import cats.effect.{Effect, Resource}
import cats.effect.{Async, Resource}
import cats.implicits._
import fs2.Stream
@ -118,7 +118,7 @@ object OItemSearch {
val fileId = rs.fileId
def apply[F[_]: Effect](store: Store[F]): Resource[F, OItemSearch[F]] =
def apply[F[_]: Async](store: Store[F]): Resource[F, OItemSearch[F]] =
Resource.pure[F, OItemSearch[F]](new OItemSearch[F] {
def findItem(id: Ident, collective: Ident): F[Option[ItemData]] =
@ -36,7 +36,7 @@ object OJoex {
} yield cancel.success).getOrElse(false)
def create[F[_]: ConcurrentEffect](
def create[F[_]: Async](
ec: ExecutionContext,
store: Store[F]
): Resource[F, OJoex[F]] =
@ -141,7 +141,7 @@ object OMail {
def apply[F[_]: Effect](store: Store[F], emil: Emil[F]): Resource[F, OMail[F]] =
def apply[F[_]: Async](store: Store[F], emil: Emil[F]): Resource[F, OMail[F]] =
Resource.pure[F, OMail[F]](new OMail[F] {
def getSmtpSettings(
accId: AccountId,
@ -1,6 +1,6 @@
package docspell.backend.ops
import cats.effect.{Effect, Resource}
import cats.effect.{Async, Resource}
import cats.implicits._
import docspell.common.syntax.all._
@ -20,7 +20,7 @@ trait ONode[F[_]] {
object ONode {
private[this] val logger = getLogger
def apply[F[_]: Effect](store: Store[F]): Resource[F, ONode[F]] =
def apply[F[_]: Async](store: Store[F]): Resource[F, ONode[F]] =
Resource.pure[F, ONode[F]](new ONode[F] {
def register(appId: Ident, nodeType: NodeType, uri: LenientUri): F[Unit] =
@ -1,6 +1,6 @@
package docspell.backend.ops
import cats.effect.{Effect, Resource}
import cats.effect.{Async, Resource}
import cats.implicits._
import docspell.backend.ops.OOrganization._
@ -49,7 +49,7 @@ object OOrganization {
contacts: Seq[RContact]
def apply[F[_]: Effect](store: Store[F]): Resource[F, OOrganization[F]] =
def apply[F[_]: Async](store: Store[F]): Resource[F, OOrganization[F]] =
Resource.pure[F, OOrganization[F]](new OOrganization[F] {
def findAllOrg(
@ -1,6 +1,6 @@
package docspell.backend.ops
import cats.effect.{Effect, Resource}
import cats.effect.{Async, Resource}
import cats.implicits._
import docspell.common.{AccountId, Ident}
@ -22,7 +22,7 @@ trait OSource[F[_]] {
object OSource {
def apply[F[_]: Effect](store: Store[F]): Resource[F, OSource[F]] =
def apply[F[_]: Async](store: Store[F]): Resource[F, OSource[F]] =
Resource.pure[F, OSource[F]](new OSource[F] {
def findAll(account: AccountId): F[Vector[SourceData]] =
@ -1,6 +1,6 @@
package docspell.backend.ops
import cats.effect.{Effect, Resource}
import cats.effect.{Async, Resource}
import cats.implicits._
import docspell.common.{AccountId, Ident}
@ -25,7 +25,7 @@ trait OTag[F[_]] {
object OTag {
def apply[F[_]: Effect](store: Store[F]): Resource[F, OTag[F]] =
def apply[F[_]: Async](store: Store[F]): Resource[F, OTag[F]] =
Resource.pure[F, OTag[F]](new OTag[F] {
def findAll(account: AccountId, nameQuery: Option[String]): F[Vector[RTag]] =
store.transact(RTag.findAll(account.collective, nameQuery, _.name))
@ -62,7 +62,7 @@ trait OUserTask[F[_]] {
object OUserTask {
def apply[F[_]: Effect](
def apply[F[_]: Async](
store: UserTaskStore[F],
queue: JobQueue[F],
joex: OJoex[F]
@ -1,6 +1,6 @@
package docspell.backend.signup
import cats.effect.{Effect, Resource}
import cats.effect.{Async, Resource}
import cats.implicits._
import docspell.backend.PasswordCrypt
@ -23,7 +23,7 @@ trait OSignup[F[_]] {
object OSignup {
private[this] val logger = getLogger
def apply[F[_]: Effect](store: Store[F]): Resource[F, OSignup[F]] =
def apply[F[_]: Async](store: Store[F]): Resource[F, OSignup[F]] =
Resource.pure[F, OSignup[F]](new OSignup[F] {
def newInvite(cfg: Config)(password: Password): F[NewInviteResult] =
@ -35,7 +35,7 @@ object OSignup {
.map(ri => NewInviteResult.success(ri.id))
def register(cfg: Config)(data: RegisterData): F[SignupResult] =
cfg.mode match {
@ -1,47 +1,48 @@
package docspell.common
import java.io.IOException
import java.nio.charset.StandardCharsets
import java.nio.file._
import java.nio.file.attribute.BasicFileAttributes
import java.nio.file.{Files => JFiles, _}
import java.util.concurrent.atomic.AtomicInteger
import scala.jdk.CollectionConverters._
import cats.effect._
import cats.implicits._
import fs2.Stream
import fs2.io.file.Files
import fs2.{Chunk, Stream}
import docspell.common.syntax.all._
import io.circe.Decoder
import scodec.bits.ByteVector
//TODO use io.fs2.files.Files api
object File {
def mkDir[F[_]: Sync](dir: Path): F[Path] =
def mkTempDir[F[_]: Sync](parent: Path, prefix: String): F[Path] =
mkDir(parent).map(p => Files.createTempDirectory(p, prefix))
mkDir(parent).map(p => JFiles.createTempDirectory(p, prefix))
def mkTempFile[F[_]: Sync](
parent: Path,
prefix: String,
suffix: Option[String] = None
): F[Path] =
mkDir(parent).map(p => Files.createTempFile(p, prefix, suffix.orNull))
mkDir(parent).map(p => JFiles.createTempFile(p, prefix, suffix.orNull))
def deleteDirectory[F[_]: Sync](dir: Path): F[Int] =
Sync[F].delay {
val count = new AtomicInteger(0)
new SimpleFileVisitor[Path]() {
override def visitFile(
file: Path,
attrs: BasicFileAttributes
): FileVisitResult = {
@ -49,7 +50,7 @@ object File {
Option(e) match {
case Some(ex) => throw ex
case None =>
@ -58,47 +59,57 @@ object File {
def exists[F[_]: Sync](file: Path): F[Boolean] =
def size[F[_]: Sync](file: Path): F[Long] =
def existsNonEmpty[F[_]: Sync](file: Path, minSize: Long = 0): F[Boolean] =
Sync[F].delay(Files.exists(file) && Files.size(file) > minSize)
Sync[F].delay(JFiles.exists(file) && JFiles.size(file) > minSize)
def deleteFile[F[_]: Sync](file: Path): F[Unit] =
Sync[F].delay(Files.deleteIfExists(file)).map(_ => ())
Sync[F].delay(JFiles.deleteIfExists(file)).map(_ => ())
def delete[F[_]: Sync](path: Path): F[Int] =
if (Files.isDirectory(path)) deleteDirectory(path)
if (JFiles.isDirectory(path)) deleteDirectory(path)
else deleteFile(path).map(_ => 1)
def withTempDir[F[_]: Sync](parent: Path, prefix: String): Resource[F, Path] =
Resource.make(mkTempDir(parent, prefix))(p => delete(p).map(_ => ()))
def listFiles[F[_]: Sync](pred: Path => Boolean, dir: Path): F[List[Path]] =
def listJFiles[F[_]: Sync](pred: Path => Boolean, dir: Path): F[List[Path]] =
Sync[F].delay {
val javaList =
Files.list(dir).filter(p => pred(p)).collect(java.util.stream.Collectors.toList())
.filter(p => pred(p))
def readAll[F[_]: Sync: ContextShift](
def readAll[F[_]: Files](
file: Path,
blocker: Blocker,
chunkSize: Int
): Stream[F, Byte] =
fs2.io.file.readAll(file, blocker, chunkSize)
Files[F].readAll(file, chunkSize)
def readText[F[_]: Sync: ContextShift](file: Path, blocker: Blocker): F[String] =
readAll[F](file, blocker, 8192).through(fs2.text.utf8Decode).compile.foldMonoid
def readText[F[_]: Files: Concurrent](file: Path): F[String] =
readAll[F](file, 8192).through(fs2.text.utf8Decode).compile.foldMonoid
def writeString[F[_]: Sync](file: Path, content: String): F[Path] =
Sync[F].delay(Files.write(file, content.getBytes(StandardCharsets.UTF_8)))
def writeString[F[_]: Files: Concurrent](file: Path, content: String): F[Path] =
ByteVector.encodeUtf8(content) match {
case Right(bv) =>
.map(_ => file)
case Left(ex) =>
def readJson[F[_]: Sync: ContextShift, A](file: Path, blocker: Blocker)(implicit
d: Decoder[A]
): F[A] =
readText[F](file, blocker).map(_.parseJsonAs[A]).rethrow
def readJson[F[_]: Async, A](file: Path)(implicit d: Decoder[A]): F[A] =
@ -6,7 +6,7 @@ import java.net.URLEncoder
import cats.data.NonEmptyList
import cats.effect.Resource
import cats.effect.{Blocker, ContextShift, Sync}
import cats.effect._
import cats.implicits._
import fs2.Stream
@ -66,20 +66,17 @@ case class LenientUri(
def readURL[F[_]: Sync: ContextShift](
chunkSize: Int,
blocker: Blocker
): Stream[F, Byte] =
def readURL[F[_]: Sync](chunkSize: Int): Stream[F, Byte] =
.emit(Either.catchNonFatal(new URL(asString)))
.flatMap(url =>
fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, blocker, true)
fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, true)
def readText[F[_]: Sync: ContextShift](chunkSize: Int, blocker: Blocker): F[String] =
readURL[F](chunkSize, blocker).through(fs2.text.utf8Decode).compile.foldMonoid
def readText[F[_]: Sync](chunkSize: Int): F[String] =
def host: Option[String] =
authority.map(a =>
@ -2,13 +2,10 @@ package docspell.common
import scala.concurrent.ExecutionContext
import cats.effect._
/** Captures thread pools to use in an application.
case class Pools(
connectEC: ExecutionContext,
httpClientEC: ExecutionContext,
blocker: Blocker,
restEC: ExecutionContext
@ -7,7 +7,7 @@ import java.util.concurrent.TimeUnit
import scala.jdk.CollectionConverters._
import cats.effect.{Blocker, ContextShift, Sync}
import cats.effect._
import cats.implicits._
import fs2.{Stream, io, text}
@ -34,9 +34,8 @@ object SystemCommand {
final case class Result(rc: Int, stdout: String, stderr: String)
def exec[F[_]: Sync: ContextShift](
def exec[F[_]: Sync](
cmd: Config,
blocker: Blocker,
logger: Logger[F],
wd: Option[Path] = None,
stdin: Stream[F, Byte] = Stream.empty
@ -44,8 +43,8 @@ object SystemCommand {
startProcess(cmd, wd, logger, stdin) { proc =>
Stream.eval {
for {
_ <- writeToProcess(stdin, proc, blocker)
term <- Sync[F].delay(proc.waitFor(cmd.timeout.seconds, TimeUnit.SECONDS))
_ <- writeToProcess(stdin, proc)
term <- Sync[F].blocking(proc.waitFor(cmd.timeout.seconds, TimeUnit.SECONDS))
_ <-
if (term)
logger.debug(s"Command `${cmd.cmdString}` finished: ${proc.exitValue}")
@ -55,23 +54,22 @@ object SystemCommand {
_ <- if (!term) timeoutError(proc, cmd) else Sync[F].pure(())
out <-
if (term) inputStreamToString(proc.getInputStream, blocker)
if (term) inputStreamToString(proc.getInputStream)
else Sync[F].pure("")
err <-
if (term) inputStreamToString(proc.getErrorStream, blocker)
if (term) inputStreamToString(proc.getErrorStream)
else Sync[F].pure("")
} yield Result(proc.exitValue, out, err)
def execSuccess[F[_]: Sync: ContextShift](
def execSuccess[F[_]: Sync](
cmd: Config,
blocker: Blocker,
logger: Logger[F],
wd: Option[Path] = None,
stdin: Stream[F, Byte] = Stream.empty
): Stream[F, Result] =
exec(cmd, blocker, logger, wd, stdin).flatMap { r =>
exec(cmd, logger, wd, stdin).flatMap { r =>
if (r.rc != 0)
new Exception(
@ -92,7 +90,7 @@ object SystemCommand {
val log = logger.debug(s"Running external command: ${cmd.cmdString}")
val hasStdin = stdin.take(1).compile.last.map(_.isDefined)
val proc = log *> hasStdin.flatMap(flag =>
Sync[F].delay {
Sync[F].blocking {
val pb = new ProcessBuilder(cmd.toCmd.asJava)
.redirectInput(if (flag) Redirect.PIPE else Redirect.INHERIT)
@ -109,11 +107,8 @@ object SystemCommand {
private def inputStreamToString[F[_]: Sync: ContextShift](
in: InputStream,
blocker: Blocker
): F[String] =
io.readInputStream(Sync[F].pure(in), 16 * 1024, blocker, closeAfterUse = false)
private def inputStreamToString[F[_]: Sync](in: InputStream): F[String] =
io.readInputStream(Sync[F].pure(in), 16 * 1024, closeAfterUse = false)
@ -122,18 +117,17 @@ object SystemCommand {
private def writeToProcess[F[_]: Sync: ContextShift](
private def writeToProcess[F[_]: Sync](
data: Stream[F, Byte],
proc: Process,
blocker: Blocker
proc: Process
): F[Unit] =
.through(io.writeOutputStream(Sync[F].delay(proc.getOutputStream), blocker))
private def timeoutError[F[_]: Sync](proc: Process, cmd: Config): F[Unit] =
Sync[F].delay(proc.destroyForcibly()).attempt *> {
Sync[F].blocking(proc.destroyForcibly()).attempt *> {
new Exception(
s"Command `${cmd.cmdString}` timed out (${cmd.timeout.formatExact})"
@ -12,6 +12,8 @@ import docspell.convert.extern._
import docspell.convert.flexmark.Markdown
import docspell.files.{ImageSize, TikaMimetype}
import scodec.bits.ByteVector
trait Conversion[F[_]] {
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(
@ -22,10 +24,9 @@ trait Conversion[F[_]] {
object Conversion {
def create[F[_]: Sync: ContextShift](
def create[F[_]: Async](
cfg: ConvertConfig,
sanitizeHtml: SanitizeHtml,
blocker: Blocker,
logger: Logger[F]
): Resource[F, Conversion[F]] =
Resource.pure[F, Conversion[F]](new Conversion[F] {
@ -36,12 +37,12 @@ object Conversion {
TikaMimetype.resolve(dataType, in).flatMap {
case MimeType.PdfMatch(_) =>
.toPDF(cfg.ocrmypdf, lang, cfg.chunkSize, blocker, logger)(in, handler)
.toPDF(cfg.ocrmypdf, lang, cfg.chunkSize, logger)(in, handler)
case MimeType.HtmlMatch(mt) =>
val cs = mt.charsetOrUtf8
.toPDF(cfg.wkhtmlpdf, cfg.chunkSize, cs, sanitizeHtml, blocker, logger)(
.toPDF(cfg.wkhtmlpdf, cfg.chunkSize, cs, sanitizeHtml, logger)(
@ -50,14 +51,15 @@ object Conversion {
val cs = mt.charsetOrUtf8
Markdown.toHtml(in, cfg.markdown, cs).flatMap { html =>
val bytes = Stream
)(bytes, handler)
@ -77,7 +79,7 @@ object Conversion {
Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, blocker, logger)(
Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, logger)(
@ -86,14 +88,14 @@ object Conversion {
s"Cannot read image when determining size for ${mt.asString}. Converting anyways."
) *>
Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, blocker, logger)(
Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, logger)(
case Office(_) =>
Unoconv.toPDF(cfg.unoconv, cfg.chunkSize, blocker, logger)(in, handler)
Unoconv.toPDF(cfg.unoconv, cfg.chunkSize, logger)(in, handler)
case mt =>
@ -4,6 +4,7 @@ import java.nio.file.Path
import cats.effect._
import cats.implicits._
import fs2.io.file.Files
import fs2.{Pipe, Stream}
import docspell.common._
@ -12,12 +13,11 @@ import docspell.convert.ConversionResult.{Handler, successPdf, successPdfTxt}
private[extern] object ExternConv {
def toPDF[F[_]: Sync: ContextShift, A](
def toPDF[F[_]: Async, A](
name: String,
cmdCfg: SystemCommand.Config,
wd: Path,
useStdin: Boolean,
blocker: Blocker,
logger: Logger[F],
reader: (Path, SystemCommand.Result) => F[ConversionResult[F]]
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] =
@ -37,13 +37,12 @@ private[extern] object ExternConv {
val createInput: Pipe[F, Byte, Unit] =
if (useStdin) _ => Stream.emit(())
else storeDataToFile(name, blocker, logger, inFile)
else storeDataToFile(name, logger, inFile)
in.through(createInput).flatMap { _ =>
if (useStdin) in
@ -66,8 +65,7 @@ private[extern] object ExternConv {
def readResult[F[_]: Sync: ContextShift](
blocker: Blocker,
def readResult[F[_]: Async](
chunkSize: Int,
logger: Logger[F]
)(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] =
@ -77,15 +75,15 @@ private[extern] object ExternConv {
File.existsNonEmpty[F](outTxt).flatMap {
case true =>
File.readAll(out, blocker, chunkSize),
File.readText(outTxt, blocker)
File.readAll(out, chunkSize),
case false =>
successPdf(File.readAll(out, blocker, chunkSize)).pure[F]
successPdf(File.readAll(out, chunkSize)).pure[F]
case true =>
logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
successPdf(File.readAll(out, blocker, chunkSize)).pure[F]
successPdf(File.readAll(out, chunkSize)).pure[F]
case false =>
@ -95,9 +93,8 @@ private[extern] object ExternConv {
def readResultTesseract[F[_]: Sync: ContextShift](
def readResultTesseract[F[_]: Async](
outPrefix: String,
blocker: Blocker,
chunkSize: Int,
logger: Logger[F]
)(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] = {
@ -106,9 +103,9 @@ private[extern] object ExternConv {
case true =>
val outTxt = out.resolveSibling(s"$outPrefix.txt")
File.exists(outTxt).flatMap { txtExists =>
val pdfData = File.readAll(out, blocker, chunkSize)
val pdfData = File.readAll(out, chunkSize)
if (result.rc == 0)
if (txtExists) successPdfTxt(pdfData, File.readText(outTxt, blocker)).pure[F]
if (txtExists) successPdfTxt(pdfData, File.readText(outTxt)).pure[F]
else successPdf(pdfData).pure[F]
logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
@ -124,9 +121,8 @@ private[extern] object ExternConv {
private def storeDataToFile[F[_]: Sync: ContextShift](
private def storeDataToFile[F[_]: Async](
name: String,
blocker: Blocker,
logger: Logger[F],
inFile: Path
): Pipe[F, Byte, Unit] =
@ -134,7 +130,7 @@ private[extern] object ExternConv {
.eval(logger.debug(s"Storing input to file ${inFile} for running $name"))
.drain ++
Stream.eval(storeFile(in, inFile, blocker))
Stream.eval(storeFile(in, inFile))
private def logResult[F[_]: Sync](
name: String,
@ -144,10 +140,9 @@ private[extern] object ExternConv {
logger.debug(s"$name stdout: ${result.stdout}") *>
logger.debug(s"$name stderr: ${result.stderr}")
private def storeFile[F[_]: Sync: ContextShift](
private def storeFile[F[_]: Async](
in: Stream[F, Byte],
target: Path,
blocker: Blocker
target: Path
): F[Unit] =
in.through(fs2.io.file.writeAll(target, blocker)).compile.drain
@ -11,23 +11,21 @@ import docspell.convert.ConversionResult.Handler
object OcrMyPdf {
def toPDF[F[_]: Sync: ContextShift, A](
def toPDF[F[_]: Async, A](
cfg: OcrMyPdfConfig,
lang: Language,
chunkSize: Int,
blocker: Blocker,
logger: Logger[F]
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] =
if (cfg.enabled) {
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
ExternConv.readResult[F](blocker, chunkSize, logger)
ExternConv.readResult[F](chunkSize, logger)
ExternConv.toPDF[F, A](
cfg.command.replace(Map("{{lang}}" -> lang.iso3)),
)(in, handler)
@ -11,23 +11,21 @@ import docspell.convert.ConversionResult.Handler
object Tesseract {
def toPDF[F[_]: Sync: ContextShift, A](
def toPDF[F[_]: Async, A](
cfg: TesseractConfig,
lang: Language,
chunkSize: Int,
blocker: Blocker,
logger: Logger[F]
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
val outBase = cfg.command.args.tail.headOption.getOrElse("out")
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
ExternConv.readResultTesseract[F](outBase, blocker, chunkSize, logger)
ExternConv.readResultTesseract[F](outBase, chunkSize, logger)
ExternConv.toPDF[F, A](
cfg.command.replace(Map("{{lang}}" -> lang.iso3)),
)(in, handler)
@ -11,21 +11,19 @@ import docspell.convert.ConversionResult.Handler
object Unoconv {
def toPDF[F[_]: Sync: ContextShift, A](
def toPDF[F[_]: Async, A](
cfg: UnoconvConfig,
chunkSize: Int,
blocker: Blocker,
logger: Logger[F]
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
ExternConv.readResult[F](blocker, chunkSize, logger)
ExternConv.readResult[F](chunkSize, logger)
ExternConv.toPDF[F, A](
@ -13,16 +13,15 @@ import docspell.convert.{ConversionResult, SanitizeHtml}
object WkHtmlPdf {
def toPDF[F[_]: Sync: ContextShift, A](
def toPDF[F[_]: Async, A](
cfg: WkHtmlPdfConfig,
chunkSize: Int,
charset: Charset,
sanitizeHtml: SanitizeHtml,
blocker: Blocker,
logger: Logger[F]
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
ExternConv.readResult[F](blocker, chunkSize, logger)
ExternConv.readResult[F](chunkSize, logger)
val cmdCfg = cfg.command.replace(Map("{{encoding}}" -> charset.name()))
@ -40,7 +39,7 @@ object WkHtmlPdf {
.toPDF[F, A]("wkhtmltopdf", cmdCfg, cfg.workingDir, true, blocker, logger, reader)(
.toPDF[F, A]("wkhtmltopdf", cmdCfg, cfg.workingDir, true, logger, reader)(
@ -4,6 +4,7 @@ import java.nio.file.Paths
import cats.data.Kleisli
import cats.effect.IO
import cats.effect.unsafe.implicits.global
import cats.implicits._
import fs2.Stream
@ -12,13 +13,11 @@ import docspell.convert.ConversionResult.Handler
import docspell.convert.extern.OcrMyPdfConfig
import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig}
import docspell.convert.flexmark.MarkdownConfig
import docspell.files.{ExampleFiles, TestFiles}
import docspell.files.ExampleFiles
import munit._
class ConversionTest extends FunSuite with FileChecks {
val blocker = TestFiles.blocker
implicit val CS = TestFiles.CS
val logger = Logger.log4s[IO](org.log4s.getLogger)
val target = Paths.get("target")
@ -73,7 +72,7 @@ class ConversionTest extends FunSuite with FileChecks {
val conversion =
Conversion.create[IO](convertConfig, SanitizeHtml.none, blocker, logger)
Conversion.create[IO](convertConfig, SanitizeHtml.none, logger)
val bombs = List(
@ -167,7 +166,7 @@ class ConversionTest extends FunSuite with FileChecks {
.evalMap({ case (uri, index) =>
val load = uri.readURL[IO](8192, blocker)
val load = uri.readURL[IO](8192)
val dataType = DataType.filename(uri.path.segments.last)
logger.info(s"Processing file ${uri.path.asString}") *>
conv.toPDF(dataType, Language.German, handler(index))(load)
@ -5,6 +5,7 @@ import java.nio.file.{Files, Path}
import cats.data.Kleisli
import cats.effect.IO
import cats.effect.unsafe.implicits.global
import fs2.{Pipe, Stream}
import docspell.common.MimeType
@ -4,19 +4,18 @@ import java.nio.charset.StandardCharsets
import java.nio.file.{Path, Paths}
import cats.effect._
import cats.effect.unsafe.implicits.global
import docspell.common._
import docspell.convert._
import docspell.files.{ExampleFiles, TestFiles}
import docspell.files.ExampleFiles
import munit._
class ExternConvTest extends FunSuite with FileChecks {
val blocker = TestFiles.blocker
implicit val CS = TestFiles.CS
val utf8 = StandardCharsets.UTF_8
val logger = Logger.log4s[IO](org.log4s.getLogger)
val target = Paths.get("target")
val utf8 = StandardCharsets.UTF_8
val logger = Logger.log4s[IO](org.log4s.getLogger)
val target = Paths.get("target")
test("convert html to pdf") {
val cfg = SystemCommand.Config(
@ -32,8 +31,8 @@ class ExternConvTest extends FunSuite with FileChecks {
val wkCfg = WkHtmlPdfConfig(cfg, target)
val p =
.toPDF[IO, Path](wkCfg, 8192, utf8, SanitizeHtml.none, blocker, logger)(
ExampleFiles.letter_de_html.readURL[IO](8192, blocker),
.toPDF[IO, Path](wkCfg, 8192, utf8, SanitizeHtml.none, logger)(
@ -59,8 +58,8 @@ class ExternConvTest extends FunSuite with FileChecks {
val ucCfg = UnoconvConfig(cfg, target)
val p =
.toPDF[IO, Path](ucCfg, 8192, blocker, logger)(
ExampleFiles.examples_sample_docx.readURL[IO](8192, blocker),
.toPDF[IO, Path](ucCfg, 8192, logger)(
@ -85,8 +84,8 @@ class ExternConvTest extends FunSuite with FileChecks {
val tessCfg = TesseractConfig(cfg, target)
val (pdf, txt) =
.toPDF[IO, (Path, Path)](tessCfg, Language.German, 8192, blocker, logger)(
ExampleFiles.camera_letter_en_jpg.readURL[IO](8192, blocker),
.toPDF[IO, (Path, Path)](tessCfg, Language.German, 8192, logger)(
storePdfTxtHandler(dir.resolve("test.pdf"), dir.resolve("test.txt"))
@ -25,8 +25,7 @@ trait Extraction[F[_]] {
object Extraction {
def create[F[_]: Sync: ContextShift](
blocker: Blocker,
def create[F[_]: Async](
logger: Logger[F],
cfg: ExtractConfig
): Extraction[F] =
@ -39,7 +38,7 @@ object Extraction {
TikaMimetype.resolve(dataType, data).flatMap {
case MimeType.PdfMatch(_) =>
.get(data, blocker, lang, cfg.pdf.minTextLen, cfg.ocr, logger)
.get(data, lang, cfg.pdf.minTextLen, cfg.ocr, logger)
case PoiType(mt) =>
@ -59,7 +58,7 @@ object Extraction {
case OcrType(mt) =>
val doExtract = TextExtract
.extractOCR(data, blocker, logger, lang.iso3, cfg.ocr)
.extractOCR(data, logger, lang.iso3, cfg.ocr)
@ -17,9 +17,8 @@ object PdfExtract {
Result(t._1, t._2)
def get[F[_]: Sync: ContextShift](
def get[F[_]: Async](
in: Stream[F, Byte],
blocker: Blocker,
lang: Language,
stripMinLen: Int,
ocrCfg: OcrConfig,
@ -27,7 +26,7 @@ object PdfExtract {
): F[Either[Throwable, Result]] = {
val runOcr =
TextExtract.extractOCR(in, blocker, logger, lang.iso3, ocrCfg).compile.lastOrError
TextExtract.extractOCR(in, logger, lang.iso3, ocrCfg).compile.lastOrError
def chooseResult(ocrStr: Text, strippedRes: (Text, Option[PdfMetaData])) =
if (ocrStr.length > strippedRes._1.length)
@ -2,7 +2,7 @@ package docspell.extract.ocr
import java.nio.file.Path
import cats.effect.{Blocker, ContextShift, Sync}
import cats.effect._
import fs2.Stream
import docspell.common._
@ -11,16 +11,15 @@ object Ocr {
/** Extract the text of all pages in the given pdf file.
def extractPdf[F[_]: Sync: ContextShift](
def extractPdf[F[_]: Async](
pdf: Stream[F, Byte],
blocker: Blocker,
logger: Logger[F],
lang: String,
config: OcrConfig
): F[Option[String]] =
File.withTempDir(config.ghostscript.workingDir, "extractpdf").use { wd =>
runGhostscript(pdf, config, wd, blocker, logger)
.flatMap(tmpImg => runTesseractFile(tmpImg, blocker, logger, lang, config))
runGhostscript(pdf, config, wd, logger)
.flatMap(tmpImg => runTesseractFile(tmpImg, logger, lang, config))
.fold1(_ + "\n\n\n" + _)
@ -28,47 +27,43 @@ object Ocr {
/** Extract the text from the given image file
def extractImage[F[_]: Sync: ContextShift](
def extractImage[F[_]: Async](
img: Stream[F, Byte],
blocker: Blocker,
logger: Logger[F],
lang: String,
config: OcrConfig
): Stream[F, String] =
runTesseractStdin(img, blocker, logger, lang, config)
runTesseractStdin(img, logger, lang, config)
def extractPdFFile[F[_]: Sync: ContextShift](
def extractPdFFile[F[_]: Async](
pdf: Path,
blocker: Blocker,
logger: Logger[F],
lang: String,
config: OcrConfig
): F[Option[String]] =
File.withTempDir(config.ghostscript.workingDir, "extractpdf").use { wd =>
runGhostscriptFile(pdf, config.ghostscript.command, wd, blocker, logger)
.flatMap(tif => runTesseractFile(tif, blocker, logger, lang, config))
runGhostscriptFile(pdf, config.ghostscript.command, wd, logger)
.flatMap(tif => runTesseractFile(tif, logger, lang, config))
.fold1(_ + "\n\n\n" + _)
def extractImageFile[F[_]: Sync: ContextShift](
def extractImageFile[F[_]: Async](
img: Path,
blocker: Blocker,
logger: Logger[F],
lang: String,
config: OcrConfig
): Stream[F, String] =
runTesseractFile(img, blocker, logger, lang, config)
runTesseractFile(img, logger, lang, config)
/** Run ghostscript to extract all pdf pages into tiff files. The
* files are stored to a temporary location on disk and returned.
private[extract] def runGhostscript[F[_]: Sync: ContextShift](
private[extract] def runGhostscript[F[_]: Async](
pdf: Stream[F, Byte],
cfg: OcrConfig,
wd: Path,
blocker: Blocker,
logger: Logger[F]
): Stream[F, Path] = {
val xargs =
@ -84,19 +79,18 @@ object Ocr {
.execSuccess(cmd, blocker, logger, wd = Some(wd), stdin = pdf)
.evalMap(_ => File.listFiles(pathEndsWith(".tif"), wd))
.execSuccess(cmd, logger, wd = Some(wd), stdin = pdf)
.evalMap(_ => File.listJFiles(pathEndsWith(".tif"), wd))
.flatMap(fs => Stream.emits(fs))
/** Run ghostscript to extract all pdf pages into tiff files. The
* files are stored to a temporary location on disk and returned.
private[extract] def runGhostscriptFile[F[_]: Sync: ContextShift](
private[extract] def runGhostscriptFile[F[_]: Async](
pdf: Path,
ghostscript: SystemCommand.Config,
wd: Path,
blocker: Blocker,
logger: Logger[F]
): Stream[F, Path] = {
val cmd = ghostscript.replace(
@ -106,8 +100,8 @@ object Ocr {
.execSuccess[F](cmd, blocker, logger, wd = Some(wd))
.evalMap(_ => File.listFiles(pathEndsWith(".tif"), wd))
.execSuccess[F](cmd, logger, wd = Some(wd))
.evalMap(_ => File.listJFiles(pathEndsWith(".tif"), wd))
.flatMap(fs => Stream.emits(fs))
@ -117,11 +111,10 @@ object Ocr {
/** Run unpaper to optimize the image for ocr. The
* files are stored to a temporary location on disk and returned.
private[extract] def runUnpaperFile[F[_]: Sync: ContextShift](
private[extract] def runUnpaperFile[F[_]: Async](
img: Path,
unpaper: SystemCommand.Config,
wd: Path,
blocker: Blocker,
logger: Logger[F]
): Stream[F, Path] = {
val targetFile = img.resolveSibling("u-" + img.getFileName.toString).toAbsolutePath
@ -132,7 +125,7 @@ object Ocr {
.execSuccess[F](cmd, blocker, logger, wd = Some(wd))
.execSuccess[F](cmd, logger, wd = Some(wd))
.map(_ => targetFile)
.handleErrorWith { th =>
@ -146,39 +139,36 @@ object Ocr {
/** Run tesseract on the given image file and return the extracted
* text.
private[extract] def runTesseractFile[F[_]: Sync: ContextShift](
private[extract] def runTesseractFile[F[_]: Async](
img: Path,
blocker: Blocker,
logger: Logger[F],
lang: String,
config: OcrConfig
): Stream[F, String] =
// tesseract cannot cope with absolute filenames
// so use the parent as working dir
runUnpaperFile(img, config.unpaper.command, img.getParent, blocker, logger).flatMap {
uimg =>
val cmd = config.tesseract.command
Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang))
.execSuccess[F](cmd, blocker, logger, wd = Some(uimg.getParent))
runUnpaperFile(img, config.unpaper.command, img.getParent, logger).flatMap { uimg =>
val cmd = config.tesseract.command
Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang))
.execSuccess[F](cmd, logger, wd = Some(uimg.getParent))
/** Run tesseract on the given image file and return the extracted
* text.
private[extract] def runTesseractStdin[F[_]: Sync: ContextShift](
private[extract] def runTesseractStdin[F[_]: Async](
img: Stream[F, Byte],
blocker: Blocker,
logger: Logger[F],
lang: String,
config: OcrConfig
): Stream[F, String] = {
val cmd = config.tesseract.command
.replace(Map("{{file}}" -> "stdin", "{{lang}}" -> fixLanguage(lang)))
SystemCommand.execSuccess(cmd, blocker, logger, stdin = img).map(_.stdout)
SystemCommand.execSuccess(cmd, logger, stdin = img).map(_.stdout)
private def fixLanguage(lang: String): String =
@ -1,6 +1,6 @@
package docspell.extract.ocr
import cats.effect.{Blocker, ContextShift, Sync}
import cats.effect._
import fs2.Stream
import docspell.common._
@ -9,18 +9,16 @@ import docspell.files._
object TextExtract {
def extract[F[_]: Sync: ContextShift](
def extract[F[_]: Async](
in: Stream[F, Byte],
blocker: Blocker,
logger: Logger[F],
lang: String,
config: OcrConfig
): Stream[F, Text] =
extractOCR(in, blocker, logger, lang, config)
extractOCR(in, logger, lang, config)
def extractOCR[F[_]: Sync: ContextShift](
def extractOCR[F[_]: Async](
in: Stream[F, Byte],
blocker: Blocker,
logger: Logger[F],
lang: String,
config: OcrConfig
@ -29,10 +27,10 @@ object TextExtract {
.eval(TikaMimetype.detect(in, MimeTypeHint.none))
case MimeType.pdf =>
Stream.eval(Ocr.extractPdf(in, blocker, logger, lang, config)).unNoneTerminate
Stream.eval(Ocr.extractPdf(in, logger, lang, config)).unNoneTerminate
case mt if mt.primary == "image" =>
Ocr.extractImage(in, blocker, logger, lang, config)
Ocr.extractImage(in, logger, lang, config)
case mt =>
raiseError(s"File `$mt` not supported")
@ -12,6 +12,7 @@ import fs2.Stream
import org.apache.commons.io.output.ByteArrayOutputStream
import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.rendering.PDFRenderer
import scodec.bits.ByteVector
trait PdfboxPreview[F[_]] {
@ -50,7 +51,7 @@ object PdfboxPreview {
private def pngStream[F[_]](img: RenderedImage): Stream[F, Byte] = {
val out = new ByteArrayOutputStream()
ImageIO.write(img, "PNG", out)
@ -1,6 +1,7 @@
package docspell.extract.ocr
import cats.effect.IO
import cats.effect.unsafe.implicits.global
import docspell.common.Logger
import docspell.files.TestFiles
@ -14,7 +15,7 @@ class TextExtractionSuite extends FunSuite {
test("extract english pdf".ignore) {
val text = TextExtract
.extract[IO](letterSourceEN, blocker, logger, "eng", OcrConfig.default)
.extract[IO](letterSourceEN, logger, "eng", OcrConfig.default)
@ -24,7 +25,7 @@ class TextExtractionSuite extends FunSuite {
test("extract german pdf".ignore) {
val expect = TestFiles.letterDEText
val extract = TextExtract
.extract[IO](letterSourceDE, blocker, logger, "deu", OcrConfig.default)
.extract[IO](letterSourceDE, logger, "deu", OcrConfig.default)
@ -1,14 +1,13 @@
package docspell.extract.odf
import cats.effect._
import cats.effect.unsafe.implicits.global
import docspell.files.{ExampleFiles, TestFiles}
import docspell.files.ExampleFiles
import munit._
class OdfExtractTest extends FunSuite {
val blocker = TestFiles.blocker
implicit val CS = TestFiles.CS
val files = List(
ExampleFiles.examples_sample_odt -> 6372,
@ -21,7 +20,7 @@ class OdfExtractTest extends FunSuite {
val str1 = OdfExtract.get(is).fold(throw _, identity)
assertEquals(str1.length, len)
val data = file.readURL[IO](8192, blocker)
val data = file.readURL[IO](8192)
val str2 = OdfExtract.get[IO](data).unsafeRunSync().fold(throw _, identity)
assertEquals(str2, str1)
@ -1,14 +1,13 @@
package docspell.extract.pdfbox
import cats.effect._
import cats.effect.unsafe.implicits.global
import docspell.files.{ExampleFiles, TestFiles}
import munit._
class PdfboxExtractTest extends FunSuite {
val blocker = TestFiles.blocker
implicit val CS = TestFiles.CS
val textPDFs = List(
ExampleFiles.letter_de_pdf -> TestFiles.letterDEText,
@ -27,7 +26,7 @@ class PdfboxExtractTest extends FunSuite {
test("extract text from text PDFs via Stream") {
textPDFs.foreach { case (file, txt) =>
val data = file.readURL[IO](8192, blocker)
val data = file.readURL[IO](8192)
val str = PdfboxExtract.getText(data).unsafeRunSync().fold(throw _, identity)
val received = removeFormatting(str.value)
val expect = removeFormatting(txt)
@ -3,15 +3,15 @@ package docspell.extract.pdfbox
import java.nio.file.Path
import cats.effect._
import cats.effect.unsafe.implicits.global
import fs2.Stream
import fs2.io.file.Files
import docspell.files.{ExampleFiles, TestFiles}
import docspell.files.ExampleFiles
import munit._
class PdfboxPreviewTest extends FunSuite {
val blocker = TestFiles.blocker
implicit val CS = TestFiles.CS
val testPDFs = List(
ExampleFiles.letter_de_pdf -> "7d98be75b239816d6c751b3f3c56118ebf1a4632c43baf35a68a662f9d595ab8",
@ -21,7 +21,7 @@ class PdfboxPreviewTest extends FunSuite {
test("extract first page image from PDFs".flaky) {
testPDFs.foreach { case (file, checksum) =>
val data = file.readURL[IO](8192, blocker)
val data = file.readURL[IO](8192)
val sha256out =
@ -42,7 +42,7 @@ class PdfboxPreviewTest extends FunSuite {
def writeToFile(data: Stream[IO, Byte], file: Path): IO[Unit] =
fs2.io.file.writeAll(file, blocker)
@ -1,15 +1,14 @@
package docspell.extract.poi
import cats.effect._
import cats.effect.unsafe.implicits.global
import docspell.common.MimeTypeHint
import docspell.files.{ExampleFiles, TestFiles}
import docspell.files.ExampleFiles
import munit._
class PoiExtractTest extends FunSuite {
val blocker = TestFiles.blocker
implicit val CS = TestFiles.CS
val officeFiles = List(
ExampleFiles.examples_sample_doc -> 6241,
@ -21,13 +20,13 @@ class PoiExtractTest extends FunSuite {
test("extract text from ms office files") {
officeFiles.foreach { case (file, len) =>
val str1 = PoiExtract
.get[IO](file.readURL[IO](8192, blocker), MimeTypeHint.none)
.get[IO](file.readURL[IO](8192), MimeTypeHint.none)
.fold(throw _, identity)
val str2 = PoiExtract
file.readURL[IO](8192, blocker),
MimeTypeHint(Some(file.path.segments.last), None)
@ -13,28 +13,19 @@ import docspell.common.Glob
object Zip {
def unzipP[F[_]: ConcurrentEffect: ContextShift](
chunkSize: Int,
blocker: Blocker,
glob: Glob
): Pipe[F, Byte, Binary[F]] =
s => unzip[F](chunkSize, blocker, glob)(s)
def unzipP[F[_]: Async](chunkSize: Int, glob: Glob): Pipe[F, Byte, Binary[F]] =
s => unzip[F](chunkSize, glob)(s)
def unzip[F[_]: ConcurrentEffect: ContextShift](
chunkSize: Int,
blocker: Blocker,
glob: Glob
def unzip[F[_]: Async](chunkSize: Int, glob: Glob)(
data: Stream[F, Byte]
): Stream[F, Binary[F]] =
.flatMap(in => unzipJava(in, chunkSize, blocker, glob))
.flatMap(in => unzipJava(in, chunkSize, glob))
def unzipJava[F[_]: Sync: ContextShift](
def unzipJava[F[_]: Async](
in: InputStream,
chunkSize: Int,
blocker: Blocker,
glob: Glob
): Stream[F, Binary[F]] = {
val zin = new ZipInputStream(in)
@ -52,7 +43,7 @@ object Zip {
.map { ze =>
val name = Paths.get(ze.getName()).getFileName.toString
val data =
fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, blocker, false)
fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, false)
Binary(name, data)
@ -1,16 +1,14 @@
package docspell.files
import scala.concurrent.ExecutionContext
import scala.util.Using
import cats.effect.{Blocker, IO}
import cats.effect._
import cats.effect.unsafe.implicits.global
import cats.implicits._
import munit._
class ImageSizeTest extends FunSuite {
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
implicit val CS = IO.contextShift(ExecutionContext.global)
//tiff files are not supported on the jdk by default
//requires an external library
@ -37,7 +35,7 @@ class ImageSizeTest extends FunSuite {
test("get sizes from stream") {
files.foreach { case (uri, expect) =>
val stream = uri.readURL[IO](8192, blocker)
val stream = uri.readURL[IO](8192)
val dim = ImageSize.get(stream).unsafeRunSync()
assertEquals(dim, expect.some)
@ -1,19 +1,17 @@
package docspell.files
import scala.concurrent.ExecutionContext
import cats.effect._
import cats.effect.unsafe.implicits.global
import docspell.common.MimeTypeHint
object Playing extends IOApp {
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
def run(args: List[String]): IO[ExitCode] =
IO {
//val ods = ExampleFiles.examples_sample_ods.readURL[IO](8192, blocker)
//val odt = ExampleFiles.examples_sample_odt.readURL[IO](8192, blocker)
val rtf = ExampleFiles.examples_sample_rtf.readURL[IO](8192, blocker)
val rtf = ExampleFiles.examples_sample_rtf.readURL[IO](8192)
val x = for {
odsm1 <-
@ -1,29 +1,26 @@
package docspell.files
import scala.concurrent.ExecutionContext
import cats.effect.{Blocker, IO}
import cats.effect._
import cats.effect.unsafe.implicits.global
import fs2.Stream
object TestFiles {
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
implicit val CS = IO.contextShift(ExecutionContext.global)
val letterSourceDE: Stream[IO, Byte] =
.readURL[IO](8 * 1024, blocker)
.readURL[IO](8 * 1024)
val letterSourceEN: Stream[IO, Byte] =
.readURL[IO](8 * 1024, blocker)
.readURL[IO](8 * 1024)
lazy val letterDEText =
.readText[IO](8 * 1024, blocker)
.readText[IO](8 * 1024)
lazy val letterENText =
.readText[IO](8 * 1024, blocker)
.readText[IO](8 * 1024)
@ -1,8 +1,7 @@
package docspell.files
import scala.concurrent.ExecutionContext
import cats.effect._
import cats.effect.unsafe.implicits.global
import cats.implicits._
import docspell.common.Glob
@ -11,12 +10,9 @@ import munit._
class ZipTest extends FunSuite {
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
implicit val CS = IO.contextShift(ExecutionContext.global)
test("unzip") {
val zipFile = ExampleFiles.letters_zip.readURL[IO](8192, blocker)
val uncomp = zipFile.through(Zip.unzip(8192, blocker, Glob.all))
val zipFile = ExampleFiles.letters_zip.readURL[IO](8192)
val uncomp = zipFile.through(Zip.unzip(8192, Glob.all))
.evalMap { entry =>
@ -11,7 +11,7 @@ import org.http4s.client.Client
import org.http4s.client.middleware.Logger
import org.log4s.getLogger
final class SolrFtsClient[F[_]: Effect](
final class SolrFtsClient[F[_]: Async](
solrUpdate: SolrUpdate[F],
solrSetup: SolrSetup[F],
solrQuery: SolrQuery[F]
@ -77,7 +77,7 @@ final class SolrFtsClient[F[_]: Effect](
object SolrFtsClient {
private[this] val logger = getLogger
def apply[F[_]: ConcurrentEffect](
def apply[F[_]: Async](
cfg: SolrConfig,
httpClient: Client[F]
): Resource[F, FtsClient[F]] = {
@ -91,7 +91,7 @@ object SolrFtsClient {
private def loggingMiddleware[F[_]: Concurrent](
private def loggingMiddleware[F[_]: Async](
cfg: SolrConfig,
client: Client[F]
): Client[F] =
@ -22,7 +22,7 @@ trait SolrQuery[F[_]] {
object SolrQuery {
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrQuery[F] = {
def apply[F[_]: Async](cfg: SolrConfig, client: Client[F]): SolrQuery[F] = {
val dsl = new Http4sClientDsl[F] {}
import dsl._
@ -24,7 +24,7 @@ trait SolrSetup[F[_]] {
object SolrSetup {
private val versionDocId = "6d8f09f4-8d7e-4bc9-98b8-7c89223b36dd"
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrSetup[F] = {
def apply[F[_]: Async](cfg: SolrConfig, client: Client[F]): SolrSetup[F] = {
val dsl = new Http4sClientDsl[F] {}
import dsl._
@ -30,7 +30,7 @@ trait SolrUpdate[F[_]] {
object SolrUpdate {
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrUpdate[F] = {
def apply[F[_]: Async](cfg: SolrConfig, client: Client[F]): SolrUpdate[F] = {
val dsl = new Http4sClientDsl[F] {}
import dsl._
@ -30,10 +30,10 @@ import docspell.store.queue._
import docspell.store.records.RJobLog
import emil.javamail._
import org.http4s.blaze.client.BlazeClientBuilder
import org.http4s.client.Client
import org.http4s.client.blaze.BlazeClientBuilder
final class JoexAppImpl[F[_]: ConcurrentEffect: Timer](
final class JoexAppImpl[F[_]: Async](
cfg: Config,
nodeOps: ONode[F],
store: Store[F],
@ -49,8 +49,8 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: Timer](
val prun = periodicScheduler.start.compile.drain
for {
_ <- scheduleBackgroundTasks
_ <- ConcurrentEffect[F].start(run)
_ <- ConcurrentEffect[F].start(prun)
_ <- Async[F].start(run)
_ <- Async[F].start(prun)
_ <- scheduler.periodicAwake
_ <- periodicScheduler.periodicAwake
_ <- nodeOps.register(cfg.appId, NodeType.Joex, cfg.baseUrl)
@ -79,17 +79,16 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: Timer](
object JoexAppImpl {
def create[F[_]: ConcurrentEffect: ContextShift: Timer](
def create[F[_]: Async](
cfg: Config,
termSignal: SignallingRef[F, Boolean],
connectEC: ExecutionContext,
clientEC: ExecutionContext,
blocker: Blocker
clientEC: ExecutionContext
): Resource[F, JoexApp[F]] =
for {
httpClient <- BlazeClientBuilder[F](clientEC).resource
client = JoexClient(httpClient)
store <- Store.create(cfg.jdbc, connectEC, blocker)
store <- Store.create(cfg.jdbc, connectEC)
queue <- JobQueue(store)
pstore <- PeriodicTaskStore.create(store)
nodeOps <- ONode(store)
@ -97,11 +96,11 @@ object JoexAppImpl {
upload <- OUpload(store, queue, cfg.files, joex)
fts <- createFtsClient(cfg)(httpClient)
itemOps <- OItem(store, fts, queue, joex)
analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig, blocker)
regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, blocker, store)
analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig)
regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, store)
javaEmil =
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
sch <- SchedulerBuilder(cfg.scheduler, blocker, store)
JavaMailEmil(Settings.defaultSettings.copy(debug = cfg.mailDebug))
sch <- SchedulerBuilder(cfg.scheduler, store)
@ -207,14 +206,13 @@ object JoexAppImpl {
app = new JoexAppImpl(cfg, nodeOps, store, queue, pstore, termSignal, sch, psch)
appR <- Resource.make(app.init.map(_ => app))(_.shutdown)
} yield appR
private def createFtsClient[F[_]: ConcurrentEffect](
private def createFtsClient[F[_]: Async](
cfg: Config
)(client: Client[F]): Resource[F, FtsClient[F]] =
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
@ -1,7 +1,7 @@
package docspell.joex
import cats.effect.Ref
import cats.effect._
import cats.effect.concurrent.Ref
import fs2.Stream
import fs2.concurrent.SignallingRef
@ -9,9 +9,9 @@ import docspell.common.Pools
import docspell.joex.routes._
import org.http4s.HttpApp
import org.http4s.blaze.server.BlazeServerBuilder
import org.http4s.implicits._
import org.http4s.server.Router
import org.http4s.server.blaze.BlazeServerBuilder
import org.http4s.server.middleware.Logger
object JoexServer {
@ -22,17 +22,14 @@ object JoexServer {
exitRef: Ref[F, ExitCode]
def stream[F[_]: ConcurrentEffect: ContextShift](
cfg: Config,
pools: Pools
)(implicit T: Timer[F]): Stream[F, Nothing] = {
def stream[F[_]: Async](cfg: Config, pools: Pools): Stream[F, Nothing] = {
val app = for {
signal <- Resource.eval(SignallingRef[F, Boolean](false))
exitCode <- Resource.eval(Ref[F].of(ExitCode.Success))
joexApp <-
.create[F](cfg, signal, pools.connectEC, pools.httpClientEC, pools.blocker)
.create[F](cfg, signal, pools.connectEC, pools.httpClientEC)
httpApp = Router(
"/api/info" -> InfoRoutes(cfg),
@ -57,9 +57,8 @@ object Main extends IOApp {
val pools = for {
cec <- connectEC
bec <- blockingEC
blocker = Blocker.liftExecutorService(bec)
rec <- restserverEC
} yield Pools(cec, bec, blocker, rec)
} yield Pools(cec, bec, rec)
pools.use(p =>
.stream[IO](cfg, p)
@ -33,16 +33,15 @@ object NerFile {
private def jsonFilePath(directory: Path, collective: Ident): Path =
def find[F[_]: Sync: ContextShift](
def find[F[_]: Async](
collective: Ident,
directory: Path,
blocker: Blocker
directory: Path
): F[Option[NerFile]] = {
val file = jsonFilePath(directory, collective)
File.existsNonEmpty[F](file).flatMap {
case true =>
.readJson[F, NerFile](file, blocker)
.readJson[F, NerFile](file)
case false =>
(None: Option[NerFile]).pure[F]
@ -3,7 +3,7 @@ package docspell.joex.analysis
import java.nio.file.Path
import cats.effect._
import cats.effect.concurrent.Semaphore
import cats.effect.std.Semaphore
import cats.implicits._
import docspell.common._
@ -31,19 +31,17 @@ object RegexNerFile {
case class Config(maxEntries: Int, directory: Path, minTime: Duration)
def apply[F[_]: Concurrent: ContextShift](
def apply[F[_]: Async](
cfg: Config,
blocker: Blocker,
store: Store[F]
): Resource[F, RegexNerFile[F]] =
for {
dir <- File.withTempDir[F](cfg.directory, "regexner-")
writer <- Resource.eval(Semaphore(1))
} yield new Impl[F](cfg.copy(directory = dir), blocker, store, writer)
} yield new Impl[F](cfg.copy(directory = dir), store, writer)
final private class Impl[F[_]: Concurrent: ContextShift](
final private class Impl[F[_]: Async](
cfg: Config,
blocker: Blocker,
store: Store[F],
writer: Semaphore[F] //TODO allow parallelism per collective
) extends RegexNerFile[F] {
@ -55,7 +53,7 @@ object RegexNerFile {
def doMakeFile(collective: Ident): F[Option[Path]] =
for {
now <- Timestamp.current[F]
existing <- NerFile.find[F](collective, cfg.directory, blocker)
existing <- NerFile.find[F](collective, cfg.directory)
result <- existing match {
case Some(nf) =>
val dur = Duration.between(nf.creation, now)
@ -105,11 +103,13 @@ object RegexNerFile {
} yield result
private def updateTimestamp(nf: NerFile, now: Timestamp): F[Unit] =
writer.withPermit(for {
file <- Sync[F].pure(nf.jsonFilePath(cfg.directory))
_ <- File.mkDir(file.getParent)
_ <- File.writeString(file, nf.copy(creation = now).asJson.spaces2)
} yield ())
writer.permit.use(_ =>
for {
file <- Sync[F].pure(nf.jsonFilePath(cfg.directory))
_ <- File.mkDir(file.getParent)
_ <- File.writeString(file, nf.copy(creation = now).asJson.spaces2)
} yield ()
private def createFile(
lastUpdate: Timestamp,
@ -117,13 +117,17 @@ object RegexNerFile {
now: Timestamp
): F[NerFile] = {
def update(nf: NerFile, text: String): F[Unit] =
writer.withPermit(for {
jsonFile <- Sync[F].pure(nf.jsonFilePath(cfg.directory))
_ <- logger.fdebug(s"Writing custom NER file for collective '${collective.id}'")
_ <- File.mkDir(jsonFile.getParent)
_ <- File.writeString(nf.nerFilePath(cfg.directory), text)
_ <- File.writeString(jsonFile, nf.asJson.spaces2)
} yield ())
writer.permit.use(_ =>
for {
jsonFile <- Sync[F].pure(nf.jsonFilePath(cfg.directory))
_ <- logger.fdebug(
s"Writing custom NER file for collective '${collective.id}'"
_ <- File.mkDir(jsonFile.getParent)
_ <- File.writeString(nf.nerFilePath(cfg.directory), text)
_ <- File.writeString(jsonFile, nf.asJson.spaces2)
} yield ()
for {
_ <- logger.finfo(s"Generating custom NER file for collective '${collective.id}'")
@ -28,7 +28,7 @@ object Migration {
def from[F[_]: Applicative: FlatMap](fm: FtsMigration[F]): Migration[F] =
Migration(fm.version, fm.engine, fm.description, FtsWork.from(fm.task))
def apply[F[_]: Effect](
def apply[F[_]: Async](
cfg: Config.FullTextSearch,
fts: FtsClient[F],
store: Store[F],
@ -41,7 +41,7 @@ object Migration {
def applySingle[F[_]: Effect](ctx: FtsContext[F])(m: Migration[F]): F[Unit] =
def applySingle[F[_]: Async](ctx: FtsContext[F])(m: Migration[F]): F[Unit] =
for {
_ <- ctx.logger.info(s"Apply ${m.version}/${m.description}")
_ <- m.task.run(ctx)
@ -12,7 +12,7 @@ import docspell.store.records.RJob
object MigrationTask {
val taskName = Ident.unsafe("full-text-index")
def apply[F[_]: ConcurrentEffect](
def apply[F[_]: Async](
cfg: Config.FullTextSearch,
fts: FtsClient[F]
): Task[F, Unit, Unit] =
@ -46,7 +46,7 @@ object MigrationTask {
def migrationTasks[F[_]: Effect](fts: FtsClient[F]): F[List[Migration[F]]] =
def migrationTasks[F[_]: Async](fts: FtsClient[F]): F[List[Migration[F]]] =
fts.initialize.map(_.map(fm => Migration.from(fm)))
@ -14,7 +14,7 @@ object ReIndexTask {
val taskName = ReIndexTaskArgs.taskName
val tracker = DocspellSystem.migrationTaskTracker
def apply[F[_]: ConcurrentEffect](
def apply[F[_]: Async](
cfg: Config.FullTextSearch,
fts: FtsClient[F]
): Task[F, Args, Unit] =
@ -27,7 +27,7 @@ object ReIndexTask {
def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log[F, Args](_.warn("Cancelling full-text re-index task"))
private def clearData[F[_]: ConcurrentEffect](collective: Option[Ident]): FtsWork[F] =
private def clearData[F[_]: Async](collective: Option[Ident]): FtsWork[F] =
FtsWork.log[F](_.info("Clearing index data")) ++
(collective match {
case Some(_) =>
@ -7,19 +7,20 @@ import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.store.records._
import org.http4s.blaze.client.BlazeClientBuilder
import org.http4s.client.Client
import org.http4s.client.blaze.BlazeClientBuilder
object CheckNodesTask {
def apply[F[_]: ConcurrentEffect](
def apply[F[_]: Async](
cfg: HouseKeepingConfig.CheckNodes
): Task[F, Unit, Unit] =
Task { ctx =>
if (cfg.enabled)
for {
_ <- ctx.logger.info("Check nodes reachability")
_ <- BlazeClientBuilder[F](ctx.blocker.blockingContext).resource.use { client =>
ec = scala.concurrent.ExecutionContext.global
_ <- BlazeClientBuilder[F](ec).resource.use { client =>
checkNodes(ctx, client)
_ <- ctx.logger.info(
@ -32,7 +33,7 @@ object CheckNodesTask {
ctx.logger.info("CheckNodes task is disabled in the configuration")
def checkNodes[F[_]: Sync](ctx: Context[F, _], client: Client[F]): F[Unit] =
def checkNodes[F[_]: Async](ctx: Context[F, _], client: Client[F]): F[Unit] =
.evalMap(node =>
@ -45,7 +46,7 @@ object CheckNodesTask {
def checkNode[F[_]: Sync](logger: Logger[F], client: Client[F])(
def checkNode[F[_]: Async](logger: Logger[F], client: Client[F])(
url: LenientUri
): F[Boolean] = {
val apiVersion = url / "api" / "info" / "version"
@ -15,7 +15,7 @@ object HouseKeepingTask {
val taskName: Ident = Ident.unsafe("housekeeping")
def apply[F[_]: ConcurrentEffect](cfg: Config): Task[F, Unit, Unit] =
def apply[F[_]: Async](cfg: Config): Task[F, Unit, Unit] =
.log[F, Unit](_.info(s"Running house-keeping task now"))
.flatMap(_ => CleanupInvitesTask(cfg.houseKeeping.cleanupInvites))
@ -5,6 +5,7 @@ import java.nio.file.Path
import cats.data.OptionT
import cats.effect._
import cats.implicits._
import fs2.io.file.Files
import docspell.analysis.classifier.{ClassifierModel, TextClassifier}
import docspell.common._
@ -15,8 +16,7 @@ import bitpeace.RangeDef
object Classify {
def apply[F[_]: Sync: ContextShift](
blocker: Blocker,
def apply[F[_]: Async](
logger: Logger[F],
workingDir: Path,
store: Store[F],
@ -36,7 +36,7 @@ object Classify {
cls <- OptionT(File.withTempDir(workingDir, "classify").use { dir =>
val modelFile = dir.resolve("model.ser.gz")
.through(fs2.io.file.writeAll(modelFile, blocker))
.flatMap(_ => classifier.classify(logger, ClassifierModel(modelFile), text))
@ -20,7 +20,7 @@ object LearnClassifierTask {
def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn("Cancelling learn-classifier task"))
def apply[F[_]: Sync: ContextShift](
def apply[F[_]: Async](
cfg: Config.TextAnalysis,
analyser: TextAnalyser[F]
): Task[F, Args, Unit] =
@ -28,7 +28,7 @@ object LearnClassifierTask {
.flatMap(_ => learnItemEntities(cfg, analyser))
.flatMap(_ => Task(_ => Sync[F].delay(System.gc())))
private def learnItemEntities[F[_]: Sync: ContextShift](
private def learnItemEntities[F[_]: Async](
cfg: Config.TextAnalysis,
analyser: TextAnalyser[F]
): Task[F, Args, Unit] =
@ -45,7 +45,7 @@ object LearnClassifierTask {
else ().pure[F]
private def learnTags[F[_]: Sync: ContextShift](
private def learnTags[F[_]: Async](
cfg: Config.TextAnalysis,
analyser: TextAnalyser[F]
): Task[F, Args, Unit] =
@ -11,7 +11,7 @@ import docspell.common._
import docspell.joex.scheduler._
object LearnItemEntities {
def learnAll[F[_]: Sync: ContextShift, A](
def learnAll[F[_]: Async, A](
analyser: TextAnalyser[F],
collective: Ident,
maxItems: Int,
@ -22,7 +22,7 @@ object LearnItemEntities {
.flatMap(_ => learnConcPerson(analyser, collective, maxItems, maxTextLen))
.flatMap(_ => learnConcEquip(analyser, collective, maxItems, maxTextLen))
def learnCorrOrg[F[_]: Sync: ContextShift, A](
def learnCorrOrg[F[_]: Async, A](
analyser: TextAnalyser[F],
collective: Ident,
maxItems: Int,
@ -33,7 +33,7 @@ object LearnItemEntities {
ctx => SelectItems.forCorrOrg(ctx.store, collective, maxItems, maxTextLen)
def learnCorrPerson[F[_]: Sync: ContextShift, A](
def learnCorrPerson[F[_]: Async, A](
analyser: TextAnalyser[F],
collective: Ident,
maxItems: Int,
@ -44,7 +44,7 @@ object LearnItemEntities {
ctx => SelectItems.forCorrPerson(ctx.store, collective, maxItems, maxTextLen)
def learnConcPerson[F[_]: Sync: ContextShift, A](
def learnConcPerson[F[_]: Async, A](
analyser: TextAnalyser[F],
collective: Ident,
maxItems: Int,
@ -55,7 +55,7 @@ object LearnItemEntities {
ctx => SelectItems.forConcPerson(ctx.store, collective, maxItems, maxTextLen)
def learnConcEquip[F[_]: Sync: ContextShift, A](
def learnConcEquip[F[_]: Async, A](
analyser: TextAnalyser[F],
collective: Ident,
maxItems: Int,
@ -66,7 +66,7 @@ object LearnItemEntities {
ctx => SelectItems.forConcEquip(ctx.store, collective, maxItems, maxTextLen)
private def learn[F[_]: Sync: ContextShift, A](
private def learn[F[_]: Async, A](
analyser: TextAnalyser[F],
collective: Ident
)(cname: ClassifierName, data: Context[F, _] => Stream[F, Data]): Task[F, A, Unit] =
@ -11,7 +11,7 @@ import docspell.store.records.RClassifierSetting
object LearnTags {
def learnTagCategory[F[_]: Sync: ContextShift, A](
def learnTagCategory[F[_]: Async, A](
analyser: TextAnalyser[F],
collective: Ident,
maxItems: Int,
@ -33,7 +33,7 @@ object LearnTags {
def learnAllTagCategories[F[_]: Sync: ContextShift, A](analyser: TextAnalyser[F])(
def learnAllTagCategories[F[_]: Async, A](analyser: TextAnalyser[F])(
collective: Ident,
maxItems: Int,
maxTextLen: Int
@ -2,6 +2,7 @@ package docspell.joex.learn
import cats.effect._
import cats.implicits._
import fs2.io.file.Files
import docspell.analysis.classifier.ClassifierModel
import docspell.common._
@ -13,18 +14,17 @@ import bitpeace.MimetypeHint
object StoreClassifierModel {
def handleModel[F[_]: Sync: ContextShift](
def handleModel[F[_]: Async](
ctx: Context[F, _],
collective: Ident,
modelName: ClassifierName
trainedModel: ClassifierModel
): F[Unit] =
handleModel(ctx.store, ctx.blocker, ctx.logger)(collective, modelName, trainedModel)
handleModel(ctx.store, ctx.logger)(collective, modelName, trainedModel)
def handleModel[F[_]: Sync: ContextShift](
def handleModel[F[_]: Async](
store: Store[F],
blocker: Blocker,
logger: Logger[F]
collective: Ident,
@ -36,7 +36,7 @@ object StoreClassifierModel {
RClassifierModel.findByName(collective, modelName.name).map(_.map(_.fileId))
_ <- logger.debug(s"Storing new trained model for: ${modelName.name}")
fileData = fs2.io.file.readAll(trainedModel.model, blocker, 4096)
fileData = Files[F].readAll(trainedModel.model, 4096)
newFile <-
store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError
_ <- store.transact(
@ -15,7 +15,7 @@ import emil.{MimeType => _, _}
object ReadMail {
def readBytesP[F[_]: ConcurrentEffect](
def readBytesP[F[_]: Async](
logger: Logger[F],
glob: Glob
): Pipe[F, Byte, Binary[F]] =
@ -26,7 +26,7 @@ object ReadMail {
Stream.eval(logger.debug(s"Converting e-mail file...")) >>
def mailToEntries[F[_]: ConcurrentEffect](
def mailToEntries[F[_]: Async](
logger: Logger[F],
glob: Glob
)(mail: Mail[F]): Stream[F, Binary[F]] = {
@ -35,7 +35,7 @@ object PdfConvTask {
val taskName = Ident.unsafe("pdf-files-migration")
def apply[F[_]: Sync: ContextShift](cfg: Config): Task[F, Args, Unit] =
def apply[F[_]: Async](cfg: Config): Task[F, Args, Unit] =
Task { ctx =>
for {
_ <- ctx.logger.info(s"Converting pdf file ${ctx.args} using ocrmypdf")
@ -62,7 +62,7 @@ object PdfConvTask {
val existsPdf =
for {
meta <- ctx.store.transact(RAttachment.findMeta(ctx.args.attachId))
res = meta.filter(_.mimetype.matches(Mimetype.`application/pdf`))
res = meta.filter(_.mimetype.matches(Mimetype.applicationPdf))
_ <-
if (res.isEmpty)
@ -83,7 +83,7 @@ object PdfConvTask {
else none.pure[F]
def convert[F[_]: Sync: ContextShift](
def convert[F[_]: Async](
cfg: Config,
ctx: Context[F, Args],
in: FileMeta
@ -118,7 +118,6 @@ object PdfConvTask {
)(data, storeResult)
@ -95,7 +95,7 @@ object AttachmentPageCount {
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =
@ -98,7 +98,7 @@ object AttachmentPreview {
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =
@ -33,7 +33,7 @@ import bitpeace.{Mimetype, MimetypeHint, RangeDef}
object ConvertPdf {
def apply[F[_]: Sync: ContextShift](
def apply[F[_]: Async](
cfg: ConvertConfig,
item: ItemData
): Task[F, ProcessItemArgs, ItemData] =
@ -69,15 +69,15 @@ object ConvertPdf {
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
def convertSafe[F[_]: Sync: ContextShift](
def convertSafe[F[_]: Async](
cfg: ConvertConfig,
sanitizeHtml: SanitizeHtml,
ctx: Context[F, ProcessItemArgs],
item: ItemData
)(ra: RAttachment, mime: Mimetype): F[(RAttachment, Option[RAttachmentMeta])] =
Conversion.create[F](cfg, sanitizeHtml, ctx.blocker, ctx.logger).use { conv =>
Conversion.create[F](cfg, sanitizeHtml, ctx.logger).use { conv =>
mime.toLocal match {
case mt =>
val data = ctx.store.bitpeace
@ -32,12 +32,12 @@ import emil.Mail
object ExtractArchive {
def apply[F[_]: ConcurrentEffect: ContextShift](
def apply[F[_]: Async](
item: ItemData
): Task[F, ProcessItemArgs, ItemData] =
multiPass(item, None).map(_._2)
def multiPass[F[_]: ConcurrentEffect: ContextShift](
def multiPass[F[_]: Async](
item: ItemData,
archive: Option[RAttachmentArchive]
): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] =
@ -46,7 +46,7 @@ object ExtractArchive {
else multiPass(t._2, t._1)
def singlePass[F[_]: ConcurrentEffect: ContextShift](
def singlePass[F[_]: Async](
item: ItemData,
archive: Option[RAttachmentArchive]
): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] =
@ -85,9 +85,9 @@ object ExtractArchive {
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
def extractSafe[F[_]: ConcurrentEffect: ContextShift](
def extractSafe[F[_]: Async](
ctx: Context[F, ProcessItemArgs],
archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int, mime: Mimetype): F[Extracted] =
@ -131,7 +131,7 @@ object ExtractArchive {
} yield extracted.copy(files = extracted.files.filter(_.id != ra.id))
def extractZip[F[_]: ConcurrentEffect: ContextShift](
def extractZip[F[_]: Async](
ctx: Context[F, ProcessItemArgs],
archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int): F[Extracted] = {
@ -142,7 +142,7 @@ object ExtractArchive {
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *>
.through(Zip.unzipP[F](8192, ctx.blocker, glob))
.through(Zip.unzipP[F](8192, glob))
.flatMap(handleEntry(ctx, ra, pos, archive, None))
@ -150,7 +150,7 @@ object ExtractArchive {
def extractMail[F[_]: ConcurrentEffect](
def extractMail[F[_]: Async](
ctx: Context[F, ProcessItemArgs],
archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int): F[Extracted] = {
@ -28,7 +28,7 @@ object ItemHandler {
def newItem[F[_]: ConcurrentEffect: ContextShift](
def newItem[F[_]: Async](
cfg: Config,
itemOps: OItem[F],
fts: FtsClient[F],
@ -62,7 +62,7 @@ object ItemHandler {
def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] =
def safeProcess[F[_]: ConcurrentEffect: ContextShift](
def safeProcess[F[_]: Async](
cfg: Config,
itemOps: OItem[F],
fts: FtsClient[F],
@ -12,7 +12,7 @@ import docspell.joex.scheduler.Task
object ProcessItem {
def apply[F[_]: ConcurrentEffect: ContextShift](
def apply[F[_]: Async](
cfg: Config,
itemOps: OItem[F],
fts: FtsClient[F],
@ -27,7 +27,7 @@ object ProcessItem {
def processAttachments[F[_]: ConcurrentEffect: ContextShift](
def processAttachments[F[_]: Async](
cfg: Config,
fts: FtsClient[F],
analyser: TextAnalyser[F],
@ -35,7 +35,7 @@ object ProcessItem {
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
processAttachments0[F](cfg, fts, analyser, regexNer, (30, 60, 90))(item)
def analysisOnly[F[_]: Sync: ContextShift](
def analysisOnly[F[_]: Async](
cfg: Config,
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F]
@ -46,7 +46,7 @@ object ProcessItem {
private def processAttachments0[F[_]: ConcurrentEffect: ContextShift](
private def processAttachments0[F[_]: Async](
cfg: Config,
fts: FtsClient[F],
analyser: TextAnalyser[F],
@ -20,7 +20,7 @@ import docspell.store.records.RItem
object ReProcessItem {
type Args = ReProcessItemArgs
def apply[F[_]: ConcurrentEffect: ContextShift](
def apply[F[_]: Async](
cfg: Config,
fts: FtsClient[F],
itemOps: OItem[F],
@ -84,7 +84,7 @@ object ReProcessItem {
def processFiles[F[_]: ConcurrentEffect: ContextShift](
def processFiles[F[_]: Async](
cfg: Config,
fts: FtsClient[F],
itemOps: OItem[F],
@ -133,7 +133,7 @@ object ReProcessItem {
def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] =
def safeProcess[F[_]: ConcurrentEffect: ContextShift](
def safeProcess[F[_]: Async](
cfg: Config,
fts: FtsClient[F],
itemOps: OItem[F],
@ -19,7 +19,7 @@ import docspell.store.records.{RAttachmentMeta, RClassifierSetting}
object TextAnalysis {
type Args = ProcessItemArgs
def apply[F[_]: Sync: ContextShift](
def apply[F[_]: Async](
cfg: Config.TextAnalysis,
analyser: TextAnalyser[F],
nerFile: RegexNerFile[F]
@ -78,7 +78,7 @@ object TextAnalysis {
} yield (rm.copy(nerlabels = labels.all.toList), AttachmentDates(rm, labels.dates))
def predictTags[F[_]: Sync: ContextShift](
def predictTags[F[_]: Async](
ctx: Context[F, Args],
cfg: Config.TextAnalysis,
metas: Vector[RAttachmentMeta],
@ -97,7 +97,7 @@ object TextAnalysis {
} yield tags.flatten
def predictItemEntities[F[_]: Sync: ContextShift](
def predictItemEntities[F[_]: Async](
ctx: Context[F, Args],
cfg: Config.TextAnalysis,
metas: Vector[RAttachmentMeta],
@ -128,13 +128,12 @@ object TextAnalysis {
private def makeClassify[F[_]: Sync: ContextShift](
private def makeClassify[F[_]: Async](
ctx: Context[F, Args],
cfg: Config.TextAnalysis,
classifier: TextClassifier[F]
)(text: String): ClassifierName => F[Option[String]] =
@ -15,7 +15,7 @@ import bitpeace.{Mimetype, RangeDef}
object TextExtraction {
def apply[F[_]: ConcurrentEffect: ContextShift](cfg: ExtractConfig, fts: FtsClient[F])(
def apply[F[_]: Async](cfg: ExtractConfig, fts: FtsClient[F])(
item: ItemData
): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
@ -60,7 +60,7 @@ object TextExtraction {
case class Result(am: RAttachmentMeta, td: TextData, tags: List[String] = Nil)
def extractTextIfEmpty[F[_]: Sync: ContextShift](
def extractTextIfEmpty[F[_]: Async](
ctx: Context[F, ProcessItemArgs],
cfg: ExtractConfig,
lang: Language,
@ -93,7 +93,7 @@ object TextExtraction {
def extractTextToMeta[F[_]: Sync: ContextShift](
def extractTextToMeta[F[_]: Async](
ctx: Context[F, _],
cfg: ExtractConfig,
lang: Language,
@ -132,13 +132,13 @@ object TextExtraction {
def findMime: F[Mimetype] =
.flatMap(mt => extr.extractText(data, DataType(mt.toLocal), lang))
private def extractTextFallback[F[_]: Sync: ContextShift](
private def extractTextFallback[F[_]: Async](
ctx: Context[F, _],
cfg: ExtractConfig,
ra: RAttachment,
@ -149,7 +149,7 @@ object TextExtraction {
ctx.logger.error(s"Cannot extract text").map(_ => None)
case id :: rest =>
val extr = Extraction.create[F](ctx.blocker, ctx.logger, cfg)
val extr = Extraction.create[F](ctx.logger, cfg)
extractText[F](ctx, extr, lang)(id)
@ -14,7 +14,7 @@ import org.http4s.dsl.Http4sDsl
object JoexRoutes {
def apply[F[_]: ConcurrentEffect: Timer](app: JoexApp[F]): HttpRoutes[F] = {
def apply[F[_]: Async](app: JoexApp[F]): HttpRoutes[F] = {
val dsl = new Http4sDsl[F] {}
import dsl._
HttpRoutes.of[F] {
@ -34,8 +34,8 @@ object JoexRoutes {
case POST -> Root / "shutdownAndExit" =>
for {
_ <- ConcurrentEffect[F].start(
Timer[F].sleep(Duration.seconds(1).toScala) *> app.initShutdown
_ <- Async[F].start(
Temporal[F].sleep(Duration.seconds(1).toScala) *> app.initShutdown
resp <- Ok(BasicResult(true, "Shutdown initiated."))
} yield resp
@ -31,45 +31,40 @@ trait Context[F[_], A] { self =>
last = config.retries == current.getOrElse(0)
} yield last
def blocker: Blocker
def map[C](f: A => C)(implicit F: Functor[F]): Context[F, C] =
new Context.ContextImpl[F, C](f(args), logger, store, blocker, config, jobId)
new Context.ContextImpl[F, C](f(args), logger, store, config, jobId)
object Context {
private[this] val log = getLogger
def create[F[_]: Functor, A](
def create[F[_]: Async, A](
jobId: Ident,
arg: A,
config: SchedulerConfig,
log: Logger[F],
store: Store[F],
blocker: Blocker
store: Store[F]
): Context[F, A] =
new ContextImpl(arg, log, store, blocker, config, jobId)
new ContextImpl(arg, log, store, config, jobId)
def apply[F[_]: Concurrent, A](
def apply[F[_]: Async, A](
job: RJob,
arg: A,
config: SchedulerConfig,
logSink: LogSink[F],
blocker: Blocker,
store: Store[F]
): F[Context[F, A]] =
for {
_ <- log.ftrace("Creating logger for task run")
logger <- QueueLogger(job.id, job.info, config.logBufferSize, logSink)
_ <- log.ftrace("Logger created, instantiating context")
ctx = create[F, A](job.id, arg, config, logger, store, blocker)
ctx = create[F, A](job.id, arg, config, logger, store)
} yield ctx
final private class ContextImpl[F[_]: Functor, A](
val args: A,
val logger: Logger[F],
val store: Store[F],
val blocker: Blocker,
val config: SchedulerConfig,
val jobId: Ident
) extends Context[F, A] {
@ -1,8 +1,8 @@
package docspell.joex.scheduler
import cats.effect.{Concurrent, Sync}
import cats.effect._
import cats.implicits._
import fs2.{Pipe, Stream}
import fs2.Pipe
import docspell.common._
import docspell.common.syntax.all._
@ -45,7 +45,7 @@ object LogSink {
def printer[F[_]: Sync]: LogSink[F] =
LogSink(_.evalMap(e => logInternal(e)))
def db[F[_]: Sync](store: Store[F]): LogSink[F] =
def db[F[_]: Async](store: Store[F]): LogSink[F] =
_.evalMap(ev =>
for {
@ -63,9 +63,6 @@ object LogSink {
def dbAndLog[F[_]: Concurrent](store: Store[F]): LogSink[F] = {
val s: Stream[F, Pipe[F, LogEvent, Unit]] =
Stream.emits(Seq(printer[F].receive, db[F](store).receive))
def dbAndLog[F[_]: Async](store: Store[F]): LogSink[F] =
LogSink(_.broadcastThrough(printer[F].receive, db[F](store).receive))
@ -24,20 +24,19 @@ trait PeriodicScheduler[F[_]] {
def shutdown: F[Unit]
def periodicAwake: F[Fiber[F, Unit]]
def periodicAwake: F[Fiber[F, Throwable, Unit]]
def notifyChange: F[Unit]
object PeriodicScheduler {
def create[F[_]: ConcurrentEffect](
def create[F[_]: Async](
cfg: PeriodicSchedulerConfig,
sch: Scheduler[F],
queue: JobQueue[F],
store: PeriodicTaskStore[F],
client: JoexClient[F],
timer: Timer[F]
client: JoexClient[F]
): Resource[F, PeriodicScheduler[F]] =
for {
waiter <- Resource.eval(SignallingRef(true))
@ -49,8 +48,7 @@ object PeriodicScheduler {
_ <- Resource.eval(psch.init)
} yield psch
@ -12,21 +12,19 @@ import docspell.joexapi.client.JoexClient
import docspell.store.queue._
import docspell.store.records.RPeriodicTask
import com.github.eikek.fs2calev._
import eu.timepit.fs2cron.calev.CalevScheduler
import org.log4s.getLogger
final class PeriodicSchedulerImpl[F[_]: ConcurrentEffect](
final class PeriodicSchedulerImpl[F[_]: Async](
val config: PeriodicSchedulerConfig,
sch: Scheduler[F],
queue: JobQueue[F],
store: PeriodicTaskStore[F],
client: JoexClient[F],
waiter: SignallingRef[F, Boolean],
state: SignallingRef[F, State[F]],
timer: Timer[F]
state: SignallingRef[F, State[F]]
) extends PeriodicScheduler[F] {
private[this] val logger = getLogger
implicit private val _timer: Timer[F] = timer
private[this] val logger = getLogger
def start: Stream[F, Nothing] =
logger.sinfo("Starting periodic scheduler") ++
@ -35,8 +33,8 @@ final class PeriodicSchedulerImpl[F[_]: ConcurrentEffect](
def shutdown: F[Unit] =
def periodicAwake: F[Fiber[F, Unit]] =
def periodicAwake: F[Fiber[F, Throwable, Unit]] =
.evalMap(_ => logger.fdebug("Periodic awake reached") *> notifyChange)
@ -127,10 +125,11 @@ final class PeriodicSchedulerImpl[F[_]: ConcurrentEffect](
s"Scheduling next notify for timer ${pj.timer.asString} -> ${pj.timer.nextElapse(now.toUtcDateTime)}"
) *>
.evalMap(_ => notifyChange)
@ -168,15 +167,15 @@ object PeriodicSchedulerImpl {
case class State[F[_]](
shutdownRequest: Boolean,
scheduledNotify: Option[Fiber[F, Unit]]
scheduledNotify: Option[Fiber[F, Throwable, Unit]]
) {
def requestShutdown: (State[F], Unit) =
(copy(shutdownRequest = true), ())
def setNotify(fb: Fiber[F, Unit]): (State[F], Unit) =
def setNotify(fb: Fiber[F, Throwable, Unit]): (State[F], Unit) =
(copy(scheduledNotify = Some(fb)), ())
def clearNotify: (State[F], Option[Fiber[F, Unit]]) =
def clearNotify: (State[F], Option[Fiber[F, Throwable, Unit]]) =
(copy(scheduledNotify = None), scheduledNotify)
@ -1,8 +1,9 @@
package docspell.joex.scheduler
import cats.effect.{Concurrent, Sync}
import cats.effect._
import cats.effect.std.Queue
import cats.implicits._
import fs2.concurrent.Queue
import fs2.Stream
import docspell.common._
@ -15,28 +16,28 @@ object QueueLogger {
): Logger[F] =
new Logger[F] {
def trace(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.enqueue1)
LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.offer)
def debug(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.enqueue1)
LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.offer)
def info(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Info, msg).flatMap(q.enqueue1)
LogEvent.create[F](jobId, jobInfo, LogLevel.Info, msg).flatMap(q.offer)
def warn(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Warn, msg).flatMap(q.enqueue1)
LogEvent.create[F](jobId, jobInfo, LogLevel.Warn, msg).flatMap(q.offer)
def error(ex: Throwable)(msg: => String): F[Unit] =
.create[F](jobId, jobInfo, LogLevel.Error, msg)
.map(le => le.copy(ex = Some(ex)))
def error(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Error, msg).flatMap(q.enqueue1)
LogEvent.create[F](jobId, jobInfo, LogLevel.Error, msg).flatMap(q.offer)
def apply[F[_]: Concurrent](
def apply[F[_]: Async](
jobId: Ident,
jobInfo: String,
bufferSize: Int,
@ -45,7 +46,9 @@ object QueueLogger {
for {
q <- Queue.circularBuffer[F, LogEvent](bufferSize)
log = create(jobId, jobInfo, q)
_ <- Concurrent[F].start(q.dequeue.through(sink.receive).compile.drain)
_ <- Async[F].start(
} yield log
@ -1,6 +1,6 @@
package docspell.joex.scheduler
import cats.effect.{Fiber, Timer}
import cats.effect._
import fs2.Stream
import docspell.common.Ident
@ -30,5 +30,5 @@ trait Scheduler[F[_]] {
def shutdown(cancelAll: Boolean): F[Unit]
def periodicAwake(implicit T: Timer[F]): F[Fiber[F, Unit]]
def periodicAwake: F[Fiber[F, Throwable, Unit]]
@ -1,18 +1,17 @@
package docspell.joex.scheduler
import cats.effect._
import cats.effect.concurrent.Semaphore
import cats.effect.std.Semaphore
import cats.implicits._
import fs2.concurrent.SignallingRef
import docspell.store.Store
import docspell.store.queue.JobQueue
case class SchedulerBuilder[F[_]: ConcurrentEffect: ContextShift](
case class SchedulerBuilder[F[_]: Async](
config: SchedulerConfig,
tasks: JobTaskRegistry[F],
store: Store[F],
blocker: Blocker,
queue: Resource[F, JobQueue[F]],
logSink: LogSink[F]
) {
@ -27,10 +26,7 @@ case class SchedulerBuilder[F[_]: ConcurrentEffect: ContextShift](
def withQueue(queue: Resource[F, JobQueue[F]]): SchedulerBuilder[F] =
SchedulerBuilder[F](config, tasks, store, blocker, queue, logSink)
def withBlocker(blocker: Blocker): SchedulerBuilder[F] =
copy(blocker = blocker)
SchedulerBuilder[F](config, tasks, store, queue, logSink)
def withLogSink(sink: LogSink[F]): SchedulerBuilder[F] =
copy(logSink = sink)
@ -39,19 +35,16 @@ case class SchedulerBuilder[F[_]: ConcurrentEffect: ContextShift](
copy(queue = Resource.pure[F, JobQueue[F]](queue))
def serve: Resource[F, Scheduler[F]] =
resource.evalMap(sch =>
ConcurrentEffect[F].start(sch.start.compile.drain).map(_ => sch)
resource.evalMap(sch => Async[F].start(sch.start.compile.drain).map(_ => sch))
def resource: Resource[F, Scheduler[F]] = {
val scheduler = for {
val scheduler: Resource[F, SchedulerImpl[F]] = for {
jq <- queue
waiter <- Resource.eval(SignallingRef(true))
state <- Resource.eval(SignallingRef(SchedulerImpl.emptyState[F]))
perms <- Resource.eval(Semaphore(config.poolSize.toLong))
} yield new SchedulerImpl[F](
@ -68,16 +61,14 @@ case class SchedulerBuilder[F[_]: ConcurrentEffect: ContextShift](
object SchedulerBuilder {
def apply[F[_]: ConcurrentEffect: ContextShift](
def apply[F[_]: Async](
config: SchedulerConfig,
blocker: Blocker,
store: Store[F]
): SchedulerBuilder[F] =
new SchedulerBuilder[F](
@ -2,7 +2,7 @@ package docspell.joex.scheduler
import cats.data.OptionT
import cats.effect._
import cats.effect.concurrent.Semaphore
import cats.effect.std.Semaphore
import cats.implicits._
import fs2.Stream
import fs2.concurrent.SignallingRef
@ -17,9 +17,8 @@ import docspell.store.records.RJob
import org.log4s._
final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
final class SchedulerImpl[F[_]: Async](
val config: SchedulerConfig,
blocker: Blocker,
queue: JobQueue[F],
tasks: JobTaskRegistry[F],
store: Store[F],
@ -37,8 +36,8 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
def init: F[Unit] =
QJob.runningToWaiting(config.name, store)
def periodicAwake(implicit T: Timer[F]): F[Fiber[F, Unit]] =
def periodicAwake: F[Fiber[F, Throwable, Unit]] =
.evalMap(_ => logger.fdebug("Periodic awake reached") *> notifyChange)
@ -153,7 +152,7 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
for {
_ <-
logger.fdebug(s"Creating context for job ${job.info} to run cancellation $t")
ctx <- Context[F, String](job, job.args, config, logSink, blocker, store)
ctx <- Context[F, String](job, job.args, config, logSink, store)
_ <- t.onCancel.run(ctx)
_ <- state.modify(_.markCancelled(job))
_ <- onFinish(job, JobState.Cancelled)
@ -177,7 +176,7 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
case Right(t) =>
for {
_ <- logger.fdebug(s"Creating context for job ${job.info} to run $t")
ctx <- Context[F, String](job, job.args, config, logSink, blocker, store)
ctx <- Context[F, String](job, job.args, config, logSink, store)
jot = wrapTask(job, t.task, ctx)
tok <- forkRun(job, jot.run(ctx), t.onCancel.run(ctx), ctx)
_ <- state.modify(_.addRunning(job, tok))
@ -208,9 +207,7 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
ctx: Context[F, String]
): Task[F, String, Unit] =
.mapF(fa =>
onStart(job) *> logger.fdebug("Starting task now") *> blocker.blockOn(fa)
.mapF(fa => onStart(job) *> logger.fdebug("Starting task now") *> fa)
case Right(()) =>
logger.info(s"Job execution successful: ${job.info}")
@ -252,11 +249,10 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
code: F[Unit],
onCancel: F[Unit],
ctx: Context[F, String]
): F[F[Unit]] = {
val bfa = blocker.blockOn(code)
): F[F[Unit]] =
logger.fdebug(s"Forking job ${job.info}") *>
.map(fiber =>
logger.fdebug(s"Cancelling job ${job.info}") *>
fiber.cancel *>
@ -271,11 +267,12 @@ final class SchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
ctx.logger.warn("Job has been cancelled.") *>
logger.fdebug(s"Job ${job.info} has been cancelled.")
object SchedulerImpl {
type CancelToken[F[_]] = F[Unit]
def emptyState[F[_]]: State[F] =
State(Map.empty, Set.empty, Map.empty, false)
@ -9,9 +9,9 @@ import docspell.common.syntax.all._
import docspell.common.{Ident, LenientUri}
import docspell.joexapi.model.BasicResult
import org.http4s.circe.CirceEntityDecoder._
import org.http4s.blaze.client.BlazeClientBuilder
import org.http4s.circe.CirceEntityDecoder
import org.http4s.client.Client
import org.http4s.client.blaze.BlazeClientBuilder
import org.http4s.{Method, Request, Uri}
import org.log4s.getLogger
@ -29,8 +29,9 @@ object JoexClient {
private[this] val logger = getLogger
def apply[F[_]: Sync](client: Client[F]): JoexClient[F] =
new JoexClient[F] {
def apply[F[_]: Async](client: Client[F]): JoexClient[F] =
new JoexClient[F] with CirceEntityDecoder {
def notifyJoex(base: LenientUri): F[BasicResult] = {
val notifyUrl = base / "api" / "v1" / "notify"
val req = Request[F](Method.POST, uri(notifyUrl))
@ -62,6 +63,6 @@ object JoexClient {
def resource[F[_]: ConcurrentEffect](ec: ExecutionContext): Resource[F, JoexClient[F]] =
def resource[F[_]: Async](ec: ExecutionContext): Resource[F, JoexClient[F]] =
@ -1,12 +1,12 @@
package docspell.restserver
import java.net.InetAddress
import docspell.backend.auth.Login
import docspell.backend.{Config => BackendConfig}
import docspell.common._
import docspell.ftssolr.SolrConfig
import com.comcast.ip4s.IpAddress
case class Config(
appName: String,
appId: Ident,
@ -42,12 +42,14 @@ object Config {
case class HttpHeader(enabled: Boolean, headerName: String, headerValue: String)
case class AllowedIps(enabled: Boolean, ips: Set[String]) {
def containsAddress(inet: InetAddress): Boolean = {
val ip = inet.getHostAddress
def containsAddress(inet: IpAddress): Boolean = {
val ip = inet.fold(_.toUriString, _.toUriString) //.getHostAddress
lazy val ipParts = ip.split('.')
def checkSingle(pattern: String): Boolean =
pattern == ip || (inet.isLoopbackAddress && pattern == "") || (pattern
pattern == ip || (ip.contains(
) && pattern == "") || (pattern
.foldLeft(true) { case (r, (a, b)) =>
@ -52,9 +52,8 @@ object Main extends IOApp {
val pools = for {
cec <- connectEC
bec <- blockingEC
blocker = Blocker.liftExecutorService(bec)
rec <- restserverEC
} yield Pools(cec, bec, blocker, rec)
} yield Pools(cec, bec, rec)
if (EnvMode.current.isDev) {
@ -24,21 +24,20 @@ final class RestAppImpl[F[_]](val config: Config, val backend: BackendApp[F])
object RestAppImpl {
def create[F[_]: ConcurrentEffect: ContextShift](
def create[F[_]: Async](
cfg: Config,
connectEC: ExecutionContext,
httpClientEc: ExecutionContext,
blocker: Blocker
httpClientEc: ExecutionContext
): Resource[F, RestApp[F]] =
for {
backend <- BackendApp(cfg.backend, connectEC, httpClientEc, blocker)(
backend <- BackendApp(cfg.backend, connectEC, httpClientEc)(
app = new RestAppImpl[F](cfg, backend)
appR <- Resource.make(app.init.map(_ => app))(_.shutdown)
} yield appR
private def createFtsClient[F[_]: ConcurrentEffect](
private def createFtsClient[F[_]: Async](
cfg: Config
)(client: Client[F]): Resource[F, FtsClient[F]] =
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
@ -11,36 +11,33 @@ import docspell.restserver.routes._
import docspell.restserver.webapp._
import org.http4s._
import org.http4s.blaze.server.BlazeServerBuilder
import org.http4s.dsl.Http4sDsl
import org.http4s.headers.Location
import org.http4s.implicits._
import org.http4s.server.Router
import org.http4s.server.blaze.BlazeServerBuilder
import org.http4s.server.middleware.Logger
object RestServer {
def stream[F[_]: ConcurrentEffect](
cfg: Config,
pools: Pools
)(implicit T: Timer[F], CS: ContextShift[F]): Stream[F, Nothing] = {
def stream[F[_]: Async](cfg: Config, pools: Pools): Stream[F, Nothing] = {
val templates = TemplateRoutes[F](pools.blocker, cfg)
val templates = TemplateRoutes[F](cfg)
val app = for {
restApp <-
.create[F](cfg, pools.connectEC, pools.httpClientEC, pools.blocker)
.create[F](cfg, pools.connectEC, pools.httpClientEC)
httpApp = Router(
"/api/info" -> routes.InfoRoutes(),
"/api/v1/open/" -> openRoutes(cfg, restApp),
"/api/v1/sec/" -> Authenticate(restApp.backend.login, cfg.auth) { token =>
securedRoutes(cfg, pools, restApp, token)
securedRoutes(cfg, restApp, token)
"/api/v1/admin" -> AdminRoutes(cfg.adminEndpoint) {
adminRoutes(cfg, restApp)
"/api/doc" -> templates.doc,
"/app/assets" -> EnvMiddleware(WebjarRoutes.appRoutes[F](pools.blocker)),
"/app/assets" -> EnvMiddleware(WebjarRoutes.appRoutes[F]),
"/app" -> EnvMiddleware(templates.app),
"/sw.js" -> EnvMiddleware(templates.serviceWorker),
"/" -> redirectTo("/app")
@ -61,9 +58,8 @@ object RestServer {
def securedRoutes[F[_]: Effect: ContextShift](
def securedRoutes[F[_]: Async](
cfg: Config,
pools: Pools,
restApp: RestApp[F],
token: AuthToken
): HttpRoutes[F] =
@ -77,9 +73,9 @@ object RestServer {
"user" -> UserRoutes(restApp.backend, token),
"collective" -> CollectiveRoutes(restApp.backend, token),
"queue" -> JobQueueRoutes(restApp.backend, token),
"item" -> ItemRoutes(cfg, pools.blocker, restApp.backend, token),
"item" -> ItemRoutes(cfg, restApp.backend, token),
"items" -> ItemMultiRoutes(restApp.backend, token),
"attachment" -> AttachmentRoutes(pools.blocker, restApp.backend, token),
"attachment" -> AttachmentRoutes(restApp.backend, token),
"attachments" -> AttachmentMultiRoutes(restApp.backend, token),
"upload" -> UploadRoutes.secured(restApp.backend, cfg, token),
"checkfile" -> CheckFileRoutes.secured(restApp.backend, token),
@ -95,7 +91,7 @@ object RestServer {
"clientSettings" -> ClientSettingsRoutes(restApp.backend, token)
def openRoutes[F[_]: Effect](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
def openRoutes[F[_]: Async](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
"auth" -> LoginRoutes.login(restApp.backend.login, cfg),
"signup" -> RegisterRoutes(restApp.backend, cfg),
@ -104,14 +100,14 @@ object RestServer {
"integration" -> IntegrationEndpointRoutes.open(restApp.backend, cfg)
def adminRoutes[F[_]: Effect](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
def adminRoutes[F[_]: Async](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
"fts" -> FullTextIndexRoutes.admin(cfg, restApp.backend),
"user" -> UserRoutes.admin(restApp.backend),
"info" -> InfoRoutes.admin(cfg)
def redirectTo[F[_]: Effect](path: String): HttpRoutes[F] = {
def redirectTo[F[_]: Async](path: String): HttpRoutes[F] = {
val dsl = new Http4sDsl[F] {}
import dsl._
@ -119,7 +115,7 @@ object RestServer {
body = Stream.empty,
headers = Headers.of(Location(Uri(path = path)))
headers = Headers(Location(Uri(path = Uri.Path.unsafeFromString(path))))
@ -5,7 +5,7 @@ import docspell.common.AccountId
import docspell.common.LenientUri
import org.http4s._
import org.http4s.util._
import org.typelevel.ci.CIString
case class CookieData(auth: AuthToken) {
def accountId: AccountId = auth.account
@ -37,7 +37,7 @@ object CookieData {
def fromCookie[F[_]](req: Request[F]): Either[String, String] =
for {
header <- headers.Cookie.from(req.headers).toRight("Cookie parsing error")
header <- req.headers.get[headers.Cookie].toRight("Cookie parsing error")
cookie <-
.find(_.name == cookieName)
@ -46,8 +46,8 @@ object CookieData {
def fromHeader[F[_]](req: Request[F]): Either[String, String] =
.toRight("Couldn't find an authenticator")
def deleteCookie(baseUrl: LenientUri): ResponseCookie =
@ -33,7 +33,7 @@ object RememberCookieData {
def fromCookie[F[_]](req: Request[F]): Option[String] =
for {
header <- headers.Cookie.from(req.headers)
header <- req.headers.get[headers.Cookie]
cookie <- header.values.toList.find(_.name == cookieName)
} yield cookie.content
@ -2,7 +2,7 @@ package docspell.restserver.conv
import java.time.{LocalDate, ZoneId}
import cats.effect.{Effect, Sync}
import cats.effect.{Async, Sync}
import cats.implicits._
import fs2.Stream
@ -294,7 +294,7 @@ trait Conversions {
JobLogEvent(jl.created, jl.level, jl.message)
// upload
def readMultipart[F[_]: Effect](
def readMultipart[F[_]: Async](
mp: Multipart[F],
sourceName: String,
logger: Logger,
@ -347,11 +347,11 @@ trait Conversions {
.filter(p => p.name.forall(s => !s.equalsIgnoreCase("meta")))
.map(p =>
.File(p.filename, p.headers.get(`Content-Type`).map(fromContentType), p.body)
.File(p.filename, p.headers.get[`Content-Type`].map(fromContentType), p.body)
for {
metaData <- meta
_ <- Effect[F].delay(logger.debug(s"Parsed upload meta data: $metaData"))
_ <- Async[F].delay(logger.debug(s"Parsed upload meta data: $metaData"))
tracker <- Ident.randomId[F]
} yield UploadData(metaData._1, metaData._2, files, prio, Some(tracker))
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user