Initial version.
Features: - Upload PDF files let them analyze - Manage meta data and items - See processing in webapp
@ -1 +1,11 @@
<img align="right" src="./artwork/logo-only.svg" height="150px" style="padding-left: 20px"/>
# Docspell
# Docspell
Docspell is a personal document organizer. You'll need a scanner to
convert your papers into PDF files. Docspell can then assist in
organizing the resulting mess :wink:.
See the [microsite](https://eikek.github.io/docspell/) for more
Normal file
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.8 KiB |
@ -1,39 +1,43 @@
import com.github.eikek.sbt.openapi._
import com.github.eikek.sbt.openapi._
import scala.sys.process._
import scala.sys.process._
import com.typesafe.sbt.SbtGit.GitKeys._
import com.typesafe.sbt.SbtGit.GitKeys._
import docspell.build._
val sharedSettings = Seq(
val sharedSettings = Seq(
organization := "com.github.eikek",
organization := "com.github.eikek",
scalaVersion := "2.13.0",
scalaVersion := "2.13.1",
scalacOptions ++= Seq(
scalacOptions ++= Seq(
"-encoding", "UTF-8",
"-encoding", "UTF-8",
"-Xfatal-warnings", // fail when there are warnings
"-Werror", // fail when there are warnings
scalacOptions in (Compile, console) := Seq()
scalacOptions in (Compile, console) :=
(scalacOptions.value.filter(o => !o.contains("Xlint")) ++ Seq("-Xlint:_,-unused")),
scalacOptions in (Test, console) :=
(scalacOptions.value.filter(o => !o.contains("Xlint")) ++ Seq("-Xlint:_,-unused"))
val testSettings = Seq(
val testSettings = Seq(
testFrameworks += new TestFramework("minitest.runner.Framework"),
testFrameworks += new TestFramework("minitest.runner.Framework"),
libraryDependencies ++= Dependencies.miniTest
libraryDependencies ++= Dependencies.miniTest ++ Dependencies.logging.map(_ % Test)
val elmSettings = Seq(
val elmSettings = Seq(
Compile/resourceGenerators += (Def.task {
Compile/resourceGenerators += Def.task {
, (Compile/baseDirectory).value
, (Compile/baseDirectory).value
, (Compile/resourceManaged).value
, (Compile/resourceManaged).value
, name.value
, name.value
, version.value)
, version.value)
watchSources += Watched.WatchSource(
watchSources += Watched.WatchSource(
, FileFilter.globFilter("*.elm")
, FileFilter.globFilter("*.elm")
@ -42,14 +46,14 @@ val elmSettings = Seq(
val webjarSettings = Seq(
val webjarSettings = Seq(
Compile/resourceGenerators += (Def.task {
Compile/resourceGenerators += Def.task {
copyWebjarResources(Seq((sourceDirectory in Compile).value/"webjar")
copyWebjarResources(Seq((sourceDirectory in Compile).value/"webjar")
, (Compile/resourceManaged).value
, (Compile/resourceManaged).value
, name.value
, name.value
, version.value
, version.value
, streams.value.log
, streams.value.log
watchSources += Watched.WatchSource(
watchSources += Watched.WatchSource(
(Compile / sourceDirectory).value/"webjar"
(Compile / sourceDirectory).value/"webjar"
, FileFilter.globFilter("*.js") || FileFilter.globFilter("*.css")
, FileFilter.globFilter("*.js") || FileFilter.globFilter("*.css")
@ -57,7 +61,7 @@ val webjarSettings = Seq(
val debianSettings = Seq(
def debianSettings(cfgFile: String) = Seq(
maintainer := "Eike Kettner <eike.kettner@posteo.de>",
maintainer := "Eike Kettner <eike.kettner@posteo.de>",
packageSummary := description.value,
packageSummary := description.value,
packageDescription := description.value,
packageDescription := description.value,
@ -66,9 +70,9 @@ val debianSettings = Seq(
if (!conf.exists) {
if (!conf.exists) {
sys.error(s"File $conf not found")
sys.error(s"File $conf not found")
conf -> "conf/docspell.conf"
conf -> s"conf/$cfgFile.conf"
bashScriptExtraDefines += """addJava "-Dconfig.file=${app_home}/../conf/docspell.conf""""
bashScriptExtraDefines += s"""addJava "-Dconfig.file=$${app_home}/../conf/$cfgFile.conf""""
val buildInfoSettings = Seq(
val buildInfoSettings = Seq(
@ -77,7 +81,47 @@ val buildInfoSettings = Seq(
buildInfoOptions += BuildInfoOption.BuildTime
buildInfoOptions += BuildInfoOption.BuildTime
val openapiScalaSettings = Seq(
openapiScalaConfig := ScalaConfig().withJson(ScalaJson.circeSemiauto).
case TypeDef("LocalDateTime", _) =>
TypeDef("Timestamp", Imports("docspell.common.Timestamp"))
case "ident" => field =>
field.copy(typeDef = TypeDef("Ident", Imports("docspell.common.Ident")))
case "collectivestate" => field =>
field.copy(typeDef = TypeDef("CollectiveState", Imports("docspell.common.CollectiveState")))
case "userstate" => field =>
field.copy(typeDef = TypeDef("UserState", Imports("docspell.common.UserState")))
case "password" => field =>
field.copy(typeDef = TypeDef("Password", Imports("docspell.common.Password")))
case "contactkind" => field =>
field.copy(typeDef = TypeDef("ContactKind", Imports("docspell.common.ContactKind")))
case "direction" => field =>
field.copy(typeDef = TypeDef("Direction", Imports("docspell.common.Direction")))
case "priority" => field =>
field.copy(typeDef = TypeDef("Priority", Imports("docspell.common.Priority")))
case "jobstate" => field =>
field.copy(typeDef = TypeDef("JobState", Imports("docspell.common.JobState")))
case "loglevel" => field =>
field.copy(typeDef = TypeDef("LogLevel", Imports("docspell.common.LogLevel")))
case "mimetype" => field =>
field.copy(typeDef = TypeDef("MimeType", Imports("docspell.common.MimeType")))
case "itemstate" => field =>
field.copy(typeDef = TypeDef("ItemState", Imports("docspell.common.ItemState")))
case "nertag" => field =>
field.copy(typeDef = TypeDef("NerTag", Imports("docspell.common.NerTag")))
case "language" => field =>
field.copy(typeDef = TypeDef("Language", Imports("docspell.common.Language")))
val reStartSettings = Seq(
javaOptions in reStart ++= Seq(s"-Dconfig.file=${(LocalRootProject/baseDirectory).value/"dev.conf"}")
// --- Modules
val common = project.in(file("modules/common")).
val common = project.in(file("modules/common")).
@ -85,7 +129,10 @@ val common = project.in(file("modules/common")).
name := "docspell-common",
name := "docspell-common",
libraryDependencies ++=
libraryDependencies ++=
Dependencies.fs2 ++
Dependencies.circe ++
Dependencies.loggingApi ++
Dependencies.pureconfig.map(_ % "optional")
val store = project.in(file("modules/store")).
val store = project.in(file("modules/store")).
@ -96,16 +143,31 @@ val store = project.in(file("modules/store")).
libraryDependencies ++=
libraryDependencies ++=
Dependencies.doobie ++
Dependencies.doobie ++
Dependencies.bitpeace ++
Dependencies.bitpeace ++
Dependencies.tika ++
Dependencies.fs2 ++
Dependencies.fs2 ++
Dependencies.databases ++
Dependencies.databases ++
Dependencies.flyway ++
Dependencies.flyway ++
val text = project.in(file("modules/text")).
name := "docspell-text",
libraryDependencies ++=
Dependencies.fs2 ++
Dependencies.tika ++
val restapi = project.in(file("modules/restapi")).
val restapi = project.in(file("modules/restapi")).
name := "docspell-restapi",
name := "docspell-restapi",
libraryDependencies ++=
libraryDependencies ++=
@ -113,22 +175,21 @@ val restapi = project.in(file("modules/restapi")).
openapiTargetLanguage := Language.Scala,
openapiTargetLanguage := Language.Scala,
openapiPackage := Pkg("docspell.restapi.model"),
openapiPackage := Pkg("docspell.restapi.model"),
openapiSpec := (Compile/resourceDirectory).value/"docspell-openapi.yml",
openapiSpec := (Compile/resourceDirectory).value/"docspell-openapi.yml",
openapiScalaConfig := ScalaConfig().withJson(ScalaJson.circeSemiauto)
val joexapi = project.in(file("modules/joexapi")).
val joexapi = project.in(file("modules/joexapi")).
name := "docspell-joexapi",
name := "docspell-joexapi",
libraryDependencies ++=
libraryDependencies ++=
openapiTargetLanguage := Language.Scala,
openapiTargetLanguage := Language.Scala,
openapiPackage := Pkg("docspell.joexapi.model"),
openapiPackage := Pkg("docspell.joexapi.model"),
openapiSpec := (Compile/resourceDirectory).value/"joex-openapi.yml",
openapiSpec := (Compile/resourceDirectory).value/"joex-openapi.yml"
openapiScalaConfig := ScalaConfig().withJson(ScalaJson.circeSemiauto)
val joex = project.in(file("modules/joex")).
val joex = project.in(file("modules/joex")).
@ -137,7 +198,7 @@ val joex = project.in(file("modules/joex")).
, SystemdPlugin).
, SystemdPlugin).
name := "docspell-joex",
name := "docspell-joex",
@ -147,11 +208,12 @@ val joex = project.in(file("modules/joex")).
Dependencies.circe ++
Dependencies.circe ++
Dependencies.pureconfig ++
Dependencies.pureconfig ++
Dependencies.loggingApi ++
Dependencies.loggingApi ++
Dependencies.logging.map(_ % Runtime),
buildInfoPackage := "docspell.joex"
buildInfoPackage := "docspell.joex",
).dependsOn(store, joexapi, restapi)
reStart/javaOptions ++= Seq(s"-Dconfig.file=${(LocalRootProject/baseDirectory).value/"dev.conf"}")
).dependsOn(store, text, joexapi, restapi)
val backend = project.in(file("modules/backend")).
val backend = project.in(file("modules/backend")).
@ -160,7 +222,9 @@ val backend = project.in(file("modules/backend")).
name := "docspell-backend",
name := "docspell-backend",
libraryDependencies ++=
libraryDependencies ++=
Dependencies.loggingApi ++
Dependencies.loggingApi ++
Dependencies.fs2 ++
Dependencies.bcrypt ++
val webapp = project.in(file("modules/webapp")).
val webapp = project.in(file("modules/webapp")).
@ -183,7 +247,7 @@ val restserver = project.in(file("modules/restserver")).
, SystemdPlugin).
, SystemdPlugin).
name := "docspell-restserver",
name := "docspell-restserver",
@ -194,39 +258,122 @@ val restserver = project.in(file("modules/restserver")).
Dependencies.yamusca ++
Dependencies.yamusca ++
Dependencies.webjars ++
Dependencies.webjars ++
Dependencies.loggingApi ++
Dependencies.loggingApi ++
Dependencies.logging.map(_ % Runtime),
buildInfoPackage := "docspell.restserver",
buildInfoPackage := "docspell.restserver",
Compile/sourceGenerators += (Def.task {
Compile/sourceGenerators += Def.task {
createWebjarSource(Dependencies.webjars, (Compile/sourceManaged).value)
createWebjarSource(Dependencies.webjars, (Compile/sourceManaged).value)
Compile/unmanagedResourceDirectories ++= Seq((Compile/resourceDirectory).value.getParentFile/"templates")
Compile/resourceGenerators += Def.task {
, (Compile/resourceManaged).value
, name.value
, version.value
, streams.value.log)
Compile/unmanagedResourceDirectories ++= Seq((Compile/resourceDirectory).value.getParentFile/"templates"),
reStart/javaOptions ++= Seq(s"-Dconfig.file=${(LocalRootProject/baseDirectory).value/"dev.conf"}")
).dependsOn(restapi, joexapi, backend, webapp)
).dependsOn(restapi, joexapi, backend, webapp)
val microsite = project.in(file("modules/microsite")).
name := "docspell-microsite",
publishArtifact := false,
scalacOptions -= "-Yno-imports",
scalacOptions ~= { _ filterNot (_ startsWith "-Ywarn") },
scalacOptions ~= { _ filterNot (_ startsWith "-Xlint") },
scalaVersion := "2.12.9",
skip in publish := true,
micrositeFooterText := Some(
|<p>© 2019 <a href="https://github.com/eikek/docspell">Docspell, v{{site.version}}</a></p>
micrositeName := "Docspell",
micrositeDescription := "Docspell – A Document Organizer",
micrositeBaseUrl := "/docspell",
micrositeAuthor := "eikek",
micrositeGithubOwner := "eikek",
micrositeGithubRepo := "docspell",
micrositeGitterChannel := false,
micrositeFavicons := Seq(microsites.MicrositeFavicon("favicon.png", "96x96")),
micrositeShareOnSocial := false,
micrositeHighlightLanguages ++= Seq("json", "javascript"),
micrositePalette := Map(
"brand-primary" -> "#5d000a", // link color
"brand-secondary" -> "#172651", //sidebar background
"brand-tertiary" -> "#495680", //main brand background
"gray-dark" -> "#050913", //header font color
"gray" -> "#131f43", //font color
"gray-light" -> "#E3E2E3",
"gray-lighter" -> "#f8fbff", //body background
"white-color" -> "#FFFFFF"),
fork in tut := true,
scalacOptions in Tut ~= (_.filterNot(Set("-Ywarn-unused-import", "-Ywarn-dead-code", "-Werror"))),
resourceGenerators in Tut += Def.task {
val conf1 = (resourceDirectory in (restserver, Compile)).value / "reference.conf"
val conf2 = (resourceDirectory in (joex, Compile)).value / "reference.conf"
val out1 = resourceManaged.value/"main"/"jekyll"/"_includes"/"server.conf"
val out2 = resourceManaged.value/"main"/"jekyll"/"_includes"/"joex.conf"
streams.value.log.info(s"Copying reference.conf: $conf1 -> $out1, $conf2 -> $out2")
IO.write(out1, "{% raw %}\n")
IO.append(out1, IO.readBytes(conf1))
IO.write(out1, "\n{% endraw %}", append = true)
IO.write(out2, "{% raw %}\n")
IO.append(out2, IO.readBytes(conf2))
IO.write(out2, "\n{% endraw %}", append = true)
val oa1 = (resourceDirectory in (restapi, Compile)).value/"docspell-openapi.yml"
val oaout = resourceManaged.value/"main"/"jekyll"/"openapi"/"docspell-openapi.yml"
IO.copy(Seq(oa1 -> oaout))
Seq(out1, out2, oaout)
resourceGenerators in Tut += Def.task {
val staticDoc = (restapi/Compile/openapiStaticDoc).value
val target = resourceManaged.value/"main"/"jekyll"/"openapi"/"docspell-openapi.html"
IO.copy(Seq(staticDoc -> target))
micrositeCompilingDocsTool := WithTut //WithMdoc
// mdocIn := sourceDirectory.value / "main" / "tut"
val root = project.in(file(".")).
val root = project.in(file(".")).
name := "docspell-root"
name := "docspell-root"
aggregate(common, store, joexapi, joex, backend, webapp, restapi, restserver)
, text
, store
, joexapi
, joex
, backend
, webapp
, restapi
, restserver
, microsite)
// --- helpers
def copyWebjarResources(src: Seq[File], base: File, artifact: String, version: String, logger: Logger): Seq[File] = {
def copyWebjarResources(src: Seq[File], base: File, artifact: String, version: String, logger: Logger): Seq[File] = {
val targetDir = base/"META-INF"/"resources"/"webjars"/artifact/version
val targetDir = base/"META-INF"/"resources"/"webjars"/artifact/version
logger.info(s"Copy webjar resources from ${src.size} files/directories.")
src.flatMap { dir =>
src.flatMap { dir =>
if (dir.isDirectory) {
if (dir.isDirectory) {
val files = (dir ** "*").filter(_.isFile).get pair Path.relativeTo(dir)
val files = (dir ** "*").filter(_.isFile).get pair Path.relativeTo(dir)
files.map { case (f, name) =>
files.map { case (f, name) =>
val target = targetDir/name
val target = targetDir/name
logger.info(s"Copy $f -> $target")
IO.copy(Seq(f -> target))
IO.copy(Seq(f -> target))
} else {
} else {
val target = targetDir/dir.name
val target = targetDir/dir.name
logger.info(s"Copy $dir -> $target")
IO.copy(Seq(dir -> target))
IO.copy(Seq(dir -> target))
@ -255,3 +402,9 @@ def createWebjarSource(wj: Seq[ModuleID], out: File): Seq[File] = {
IO.write(target, content)
IO.write(target, content)
addCommandAlias("make", ";root/openapiCodegen ;root/test:compile")
addCommandAlias("make-zip", ";restserver/universal:packageBin ;joex/universal:packageBin")
addCommandAlias("make-deb", ";restserver/debian:packageBin ;joex/debian:packageBin")
addCommandAlias("make-pkg", ";make-zip ;make-deb")
@ -7,18 +7,26 @@
"elm-version": "0.19.0",
"elm-version": "0.19.0",
"dependencies": {
"dependencies": {
"direct": {
"direct": {
"CurrySoftware/elm-datepicker": "3.1.0",
"NoRedInk/elm-json-decode-pipeline": "1.0.0",
"NoRedInk/elm-json-decode-pipeline": "1.0.0",
"NoRedInk/elm-simple-fuzzy": "1.0.3",
"elm/browser": "1.0.1",
"elm/browser": "1.0.1",
"elm/core": "1.0.2",
"elm/core": "1.0.2",
"elm/file": "1.0.5",
"elm/html": "1.0.0",
"elm/html": "1.0.0",
"elm/http": "2.0.0",
"elm/http": "2.0.0",
"elm/json": "1.1.3",
"elm/json": "1.1.3",
"elm/url": "1.0.0"
"elm/time": "1.0.0",
"elm/url": "1.0.0",
"elm-explorations/markdown": "1.0.0",
"justinmimbs/date": "3.1.2",
"ryannhg/date-format": "2.3.0",
"truqu/elm-base64": "2.0.4"
"indirect": {
"indirect": {
"elm/bytes": "1.0.8",
"elm/bytes": "1.0.8",
"elm/file": "1.0.5",
"elm/parser": "1.1.0",
"elm/time": "1.0.0",
"elm/regex": "1.0.0",
"elm/virtual-dom": "1.0.2"
"elm/virtual-dom": "1.0.2"
@ -0,0 +1,66 @@
package docspell.backend
import cats.effect.{Blocker, ConcurrentEffect, ContextShift, Resource}
import docspell.backend.auth.Login
import docspell.backend.ops._
import docspell.backend.signup.OSignup
import docspell.store.Store
import docspell.store.ops.ONode
import docspell.store.queue.JobQueue
import scala.concurrent.ExecutionContext
trait BackendApp[F[_]] {
def login: Login[F]
def signup: OSignup[F]
def collective: OCollective[F]
def source: OSource[F]
def tag: OTag[F]
def equipment: OEquipment[F]
def organization: OOrganization[F]
def upload: OUpload[F]
def node: ONode[F]
def job: OJob[F]
def item: OItem[F]
object BackendApp {
def create[F[_]: ConcurrentEffect](cfg: Config, store: Store[F], httpClientEc: ExecutionContext): Resource[F, BackendApp[F]] =
for {
queue <- JobQueue(store)
loginImpl <- Login[F](store)
signupImpl <- OSignup[F](store)
collImpl <- OCollective[F](store)
sourceImpl <- OSource[F](store)
tagImpl <- OTag[F](store)
equipImpl <- OEquipment[F](store)
orgImpl <- OOrganization(store)
uploadImpl <- OUpload(store, queue, cfg, httpClientEc)
nodeImpl <- ONode(store)
jobImpl <- OJob(store, httpClientEc)
itemImpl <- OItem(store)
} yield new BackendApp[F] {
val login: Login[F] = loginImpl
val signup: OSignup[F] = signupImpl
val collective: OCollective[F] = collImpl
val source = sourceImpl
val tag = tagImpl
val equipment = equipImpl
val organization = orgImpl
val upload = uploadImpl
val node = nodeImpl
val job = jobImpl
val item = itemImpl
def apply[F[_]: ConcurrentEffect: ContextShift](cfg: Config
, connectEC: ExecutionContext
, httpClientEc: ExecutionContext
, blocker: Blocker): Resource[F, BackendApp[F]] =
for {
store <- Store.create(cfg.jdbc, connectEC, blocker)
backend <- create(cfg, store, httpClientEc)
} yield backend
@ -0,0 +1,10 @@
package docspell.backend
import cats.effect._
import org.mindrot.jbcrypt.BCrypt
object Common {
def genSaltString[F[_]: Sync]: F[String] =
Normal file
package docspell.backend
import docspell.backend.signup.{Config => SignupConfig}
import docspell.common.MimeType
import docspell.store.JdbcConfig
case class Config( jdbc: JdbcConfig
, signup: SignupConfig
, files: Config.Files) {
object Config {
case class Files(chunkSize: Int, validMimeTypes: Seq[MimeType])
@ -0,0 +1,13 @@
package docspell.backend
import docspell.common.Password
import org.mindrot.jbcrypt.BCrypt
object PasswordCrypt {
def crypt(pass: Password): Password =
Password(BCrypt.hashpw(pass.pass, BCrypt.gensalt()))
def check(plain: Password, hashed: Password): Boolean =
BCrypt.checkpw(plain.pass, hashed.pass)
package docspell.backend.auth
import cats.effect._
import cats.implicits._
import java.time.Instant
import javax.crypto.Mac
import javax.crypto.spec.SecretKeySpec
import scodec.bits.ByteVector
import docspell.backend.Common
import AuthToken._
import docspell.common._
case class AuthToken(millis: Long, account: AccountId, salt: String, sig: String) {
def asString = s"$millis-${b64enc(account.asString)}-$salt-$sig"
def sigValid(key: ByteVector): Boolean = {
val newSig = AuthToken.sign(this, key)
AuthToken.constTimeEq(sig, newSig)
def sigInvalid(key: ByteVector): Boolean =
def notExpired(validity: Duration): Boolean =
def isExpired(validity: Duration): Boolean = {
val ends = Instant.ofEpochMilli(millis).plusMillis(validity.millis)
def validate(key: ByteVector, validity: Duration): Boolean =
sigValid(key) && notExpired(validity)
object AuthToken {
private val utf8 = java.nio.charset.StandardCharsets.UTF_8
def fromString(s: String): Either[String, AuthToken] =
s.split("\\-", 4) match {
case Array(ms, as, salt, sig) =>
for {
millis <- asInt(ms).toRight("Cannot read authenticator data")
acc <- b64dec(as).toRight("Cannot read authenticator data")
accId <- AccountId.parse(acc)
} yield AuthToken(millis, accId, salt, sig)
case _ =>
Left("Invalid authenticator")
def user[F[_]: Sync](accountId: AccountId, key: ByteVector): F[AuthToken] = {
for {
salt <- Common.genSaltString[F]
millis = Instant.now.toEpochMilli
cd = AuthToken(millis, accountId, salt, "")
sig = sign(cd, key)
} yield cd.copy(sig = sig)
private def sign(cd: AuthToken, key: ByteVector): String = {
val raw = cd.millis.toString + cd.account.asString + cd.salt
val mac = Mac.getInstance("HmacSHA1")
mac.init(new SecretKeySpec(key.toArray, "HmacSHA1"))
private def b64enc(s: String): String =
private def b64dec(s: String): Option[String] =
private def asInt(s: String): Option[Long] =
private def constTimeEq(s1: String, s2: String): Boolean =
s1.zip(s2).foldLeft(true)({ case (r, (c1, c2)) => r & c1 == c2 }) & s1.length == s2.length
package docspell.backend.auth
import cats.effect._
import cats.implicits._
import Login._
import docspell.common._
import docspell.store.Store
import docspell.store.queries.QLogin
import docspell.store.records.RUser
import org.mindrot.jbcrypt.BCrypt
import scodec.bits.ByteVector
import org.log4s._
trait Login[F[_]] {
def loginSession(config: Config)(sessionKey: String): F[Result]
def loginUserPass(config: Config)(up: UserPass): F[Result]
object Login {
private[this] val logger = getLogger
case class Config(serverSecret: ByteVector, sessionValid: Duration)
case class UserPass(user: String, pass: String) {
def hidePass: UserPass =
if (pass.isEmpty) copy(pass = "<none>")
else copy(pass = "***")
sealed trait Result {
def toEither: Either[String, AuthToken]
object Result {
case class Ok(session: AuthToken) extends Result {
val toEither = Right(session)
case object InvalidAuth extends Result {
val toEither = Left("Authentication failed.")
case object InvalidTime extends Result {
val toEither = Left("Authentication failed.")
def ok(session: AuthToken): Result = Ok(session)
def invalidAuth: Result = InvalidAuth
def invalidTime: Result = InvalidTime
def apply[F[_]: Effect](store: Store[F]): Resource[F, Login[F]] = Resource.pure(new Login[F] {
def loginSession(config: Config)(sessionKey: String): F[Result] =
AuthToken.fromString(sessionKey) match {
case Right(at) =>
if (at.sigInvalid(config.serverSecret)) Result.invalidAuth.pure[F]
else if (at.isExpired(config.sessionValid)) Result.invalidTime.pure[F]
else Result.ok(at).pure[F]
case Left(err) =>
def loginUserPass(config: Config)(up: UserPass): F[Result] = {
AccountId.parse(up.user) match {
case Right(acc) =>
val okResult=
store.transact(RUser.updateLogin(acc)) *>
AuthToken.user(acc, config.serverSecret).map(Result.ok)
for {
data <- store.transact(QLogin.findUser(acc))
_ <- Sync[F].delay(logger.trace(s"Account lookup: $data"))
res <- if (data.exists(check(up.pass))) okResult
else Result.invalidAuth.pure[F]
} yield res
case Left(err) =>
private def check(given: String)(data: QLogin.Data): Boolean = {
val collOk = data.collectiveState == CollectiveState.Active ||
data.collectiveState == CollectiveState.ReadOnly
val userOk = data.userState == UserState.Active
val passOk = BCrypt.checkpw(given, data.password.pass)
collOk && userOk && passOk
package docspell.backend.ops
import cats.implicits._
import cats.effect.{Effect, Resource}
import docspell.common._
import docspell.store.{AddResult, Store}
import docspell.store.records.{RCollective, RUser}
import OCollective._
import docspell.backend.PasswordCrypt
import docspell.store.queries.QCollective
trait OCollective[F[_]] {
def find(name: Ident): F[Option[RCollective]]
def updateLanguage(collective: Ident, lang: Language): F[AddResult]
def listUser(collective: Ident): F[Vector[RUser]]
def add(s: RUser): F[AddResult]
def update(s: RUser): F[AddResult]
def deleteUser(login: Ident, collective: Ident): F[AddResult]
def insights(collective: Ident): F[InsightData]
def changePassword(accountId: AccountId, current: Password, newPass: Password): F[PassChangeResult]
object OCollective {
type InsightData = QCollective.InsightData
val insightData = QCollective.InsightData
sealed trait PassChangeResult
object PassChangeResult {
case object UserNotFound extends PassChangeResult
case object PasswordMismatch extends PassChangeResult
case object UpdateFailed extends PassChangeResult
case object Success extends PassChangeResult
def userNotFound: PassChangeResult = UserNotFound
def passwordMismatch: PassChangeResult = PasswordMismatch
def success: PassChangeResult = Success
def updateFailed: PassChangeResult = UpdateFailed
case class RegisterData(collName: Ident, login: Ident, password: Password, invite: Option[Ident])
sealed trait RegisterResult {
def toEither: Either[Throwable, Unit]
object RegisterResult {
case object Success extends RegisterResult {
val toEither = Right(())
case class CollectiveExists(id: Ident) extends RegisterResult {
val toEither = Left(new Exception())
case class Error(ex: Throwable) extends RegisterResult {
val toEither = Left(ex)
def apply[F[_]:Effect](store: Store[F]): Resource[F, OCollective[F]] =
Resource.pure(new OCollective[F] {
def find(name: Ident): F[Option[RCollective]] =
def updateLanguage(collective: Ident, lang: Language): F[AddResult] =
store.transact(RCollective.updateLanguage(collective, lang)).
def listUser(collective: Ident): F[Vector[RUser]] = {
store.transact(RUser.findAll(collective, _.login))
def add(s: RUser): F[AddResult] =
store.add(RUser.insert(s.copy(password = PasswordCrypt.crypt(s.password))), RUser.exists(s.login))
def update(s: RUser): F[AddResult] =
store.add(RUser.update(s), RUser.exists(s.login))
def deleteUser(login: Ident, collective: Ident): F[AddResult] =
store.transact(RUser.delete(login, collective)).
def insights(collective: Ident): F[InsightData] =
def changePassword(accountId: AccountId, current: Password, newPass: Password): F[PassChangeResult] = {
val q = for {
optUser <- RUser.findByAccount(accountId)
check = optUser.map(_.password).map(p => PasswordCrypt.check(current, p))
n <- check.filter(identity).traverse(_ => RUser.updatePassword(accountId, PasswordCrypt.crypt(newPass)))
res = check match {
case Some(true) =>
if (n.getOrElse(0) > 0) PassChangeResult.success else PassChangeResult.updateFailed
case Some(false) =>
case None =>
} yield res
package docspell.backend.ops
import cats.implicits._
import cats.effect.{Effect, Resource}
import docspell.common.{AccountId, Ident}
import docspell.store.{AddResult, Store}
import docspell.store.records.{REquipment, RItem}
trait OEquipment[F[_]] {
def findAll(account: AccountId): F[Vector[REquipment]]
def add(s: REquipment): F[AddResult]
def update(s: REquipment): F[AddResult]
def delete(id: Ident, collective: Ident): F[AddResult]
object OEquipment {
def apply[F[_]: Effect](store: Store[F]): Resource[F, OEquipment[F]] =
Resource.pure(new OEquipment[F] {
def findAll(account: AccountId): F[Vector[REquipment]] =
store.transact(REquipment.findAll(account.collective, _.name))
def add(e: REquipment): F[AddResult] = {
def insert = REquipment.insert(e)
def exists = REquipment.existsByName(e.cid, e.name)
val msg = s"An equipment '${e.name}' already exists"
store.add(insert, exists).map(_.fold(identity, _.withMsg(msg), identity))
def update(e: REquipment): F[AddResult] = {
def insert = REquipment.update(e)
def exists = REquipment.existsByName(e.cid, e.name)
val msg = s"An equipment '${e.name}' already exists"
store.add(insert, exists).map(_.fold(identity, _.withMsg(msg), identity))
def delete(id: Ident, collective: Ident): F[AddResult] = {
val io = for {
n0 <- RItem.removeConcEquip(collective, id)
n1 <- REquipment.delete(id, collective)
} yield n0 + n1
@ -0,0 +1,159 @@
package docspell.backend.ops
import fs2.Stream
import cats.implicits._
import cats.effect.{Effect, Resource}
import doobie._
import doobie.implicits._
import docspell.store.{AddResult, Store}
import docspell.store.queries.{QAttachment, QItem}
import OItem.{AttachmentData, ItemData, ListItem, Query}
import bitpeace.{FileMeta, RangeDef}
import docspell.common.{Direction, Ident, ItemState, MetaProposalList, Timestamp}
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem, RTagItem}
trait OItem[F[_]] {
def findItem(id: Ident, collective: Ident): F[Option[ItemData]]
def findItems(q: Query, maxResults: Int): F[Vector[ListItem]]
def findAttachment(id: Ident, collective: Ident): F[Option[AttachmentData[F]]]
def setTags(item: Ident, tagIds: List[Ident], collective: Ident): F[AddResult]
def setDirection(item: Ident, direction: Direction, collective: Ident): F[AddResult]
def setCorrOrg(item: Ident, org: Option[Ident], collective: Ident): F[AddResult]
def setCorrPerson(item: Ident, person: Option[Ident], collective: Ident): F[AddResult]
def setConcPerson(item: Ident, person: Option[Ident], collective: Ident): F[AddResult]
def setConcEquip(item: Ident, equip: Option[Ident], collective: Ident): F[AddResult]
def setNotes(item: Ident, notes: Option[String], collective: Ident): F[AddResult]
def setName(item: Ident, notes: String, collective: Ident): F[AddResult]
def setState(item: Ident, state: ItemState, collective: Ident): F[AddResult]
def setItemDate(item: Ident, date: Option[Timestamp], collective: Ident): F[AddResult]
def setItemDueDate(item: Ident, date: Option[Timestamp], collective: Ident): F[AddResult]
def getProposals(item: Ident, collective: Ident): F[MetaProposalList]
def delete(itemId: Ident, collective: Ident): F[Int]
def findAttachmentMeta(id: Ident, collective: Ident): F[Option[RAttachmentMeta]]
object OItem {
type Query = QItem.Query
val Query = QItem.Query
type ListItem = QItem.ListItem
val ListItem = QItem.ListItem
type ItemData = QItem.ItemData
val ItemData = QItem.ItemData
case class AttachmentData[F[_]](ra: RAttachment, meta: FileMeta, data: Stream[F, Byte])
def apply[F[_]: Effect](store: Store[F]): Resource[F, OItem[F]] =
Resource.pure(new OItem[F] {
def findItem(id: Ident, collective: Ident): F[Option[ItemData]] =
map(opt => opt.flatMap(_.filterCollective(collective)))
def findItems(q: Query, maxResults: Int): F[Vector[ListItem]] = {
def findAttachment(id: Ident, collective: Ident): F[Option[AttachmentData[F]]] = {
store.transact(RAttachment.findByIdAndCollective(id, collective)).
case Some(ra) =>
map(_.map(m => AttachmentData[F](ra, m, store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m)))))
case None =>
(None: Option[AttachmentData[F]]).pure[F]
def setTags(item: Ident, tagIds: List[Ident], collective: Ident): F[AddResult] = {
val db = for {
cid <- RItem.getCollective(item)
nd <- if (cid.contains(collective)) RTagItem.deleteItemTags(item) else 0.pure[ConnectionIO]
ni <- if (tagIds.nonEmpty && cid.contains(collective)) RTagItem.insertItemTags(item, tagIds) else 0.pure[ConnectionIO]
} yield nd + ni
def setDirection(item: Ident, direction: Direction, collective: Ident): F[AddResult] =
store.transact(RItem.updateDirection(item, collective, direction)).
def setCorrOrg(item: Ident, org: Option[Ident], collective: Ident): F[AddResult] =
store.transact(RItem.updateCorrOrg(item, collective, org)).
def setCorrPerson(item: Ident, person: Option[Ident], collective: Ident): F[AddResult] =
store.transact(RItem.updateCorrPerson(item, collective, person)).
def setConcPerson(item: Ident, person: Option[Ident], collective: Ident): F[AddResult] =
store.transact(RItem.updateConcPerson(item, collective, person)).
def setConcEquip(item: Ident, equip: Option[Ident], collective: Ident): F[AddResult] =
store.transact(RItem.updateConcEquip(item, collective, equip)).
def setNotes(item: Ident, notes: Option[String], collective: Ident): F[AddResult] =
store.transact(RItem.updateNotes(item, collective, notes)).
def setName(item: Ident, name: String, collective: Ident): F[AddResult] =
store.transact(RItem.updateName(item, collective, name)).
def setState(item: Ident, state: ItemState, collective: Ident): F[AddResult] =
store.transact(RItem.updateStateForCollective(item, state, collective)).
def setItemDate(item: Ident, date: Option[Timestamp], collective: Ident): F[AddResult] =
store.transact(RItem.updateDate(item, collective, date)).
def setItemDueDate(item: Ident, date: Option[Timestamp], collective: Ident): F[AddResult] =
store.transact(RItem.updateDueDate(item, collective, date)).
def delete(itemId: Ident, collective: Ident): F[Int] =
QItem.delete(store)(itemId, collective)
def getProposals(item: Ident, collective: Ident): F[MetaProposalList] =
store.transact(QAttachment.getMetaProposals(item, collective))
def findAttachmentMeta(id: Ident, collective: Ident): F[Option[RAttachmentMeta]] =
store.transact(QAttachment.getAttachmentMeta(id, collective))
package docspell.backend.ops
import cats.implicits._
import cats.effect.{ConcurrentEffect, Resource}
import docspell.backend.ops.OJob.{CollectiveQueueState, JobCancelResult}
import docspell.common.{Ident, JobState}
import docspell.store.Store
import docspell.store.queries.QJob
import docspell.store.records.{RJob, RJobLog}
import scala.concurrent.ExecutionContext
trait OJob[F[_]] {
def queueState(collective: Ident, maxResults: Int): F[CollectiveQueueState]
def cancelJob(id: Ident, collective: Ident): F[JobCancelResult]
object OJob {
sealed trait JobCancelResult
object JobCancelResult {
case object Removed extends JobCancelResult
case object CancelRequested extends JobCancelResult
case object JobNotFound extends JobCancelResult
case class JobDetail(job: RJob, logs: Vector[RJobLog])
case class CollectiveQueueState(jobs: Vector[JobDetail]) {
def queued: Vector[JobDetail] =
jobs.filter(r => JobState.queued.contains(r.job.state))
def done: Vector[JobDetail] =
jobs.filter(r => JobState.done.contains(r.job.state))
def running: Vector[JobDetail] =
jobs.filter(_.job.state == JobState.Running)
def apply[F[_]: ConcurrentEffect](store: Store[F], clientEC: ExecutionContext): Resource[F, OJob[F]] =
Resource.pure(new OJob[F] {
def queueState(collective: Ident, maxResults: Int): F[CollectiveQueueState] = {
map(t => JobDetail(t._1, t._2)).
def cancelJob(id: Ident, collective: Ident): F[JobCancelResult] = {
def mustCancel(job: Option[RJob]): Option[(RJob, Ident)] =
for {
worker <- job.flatMap(_.worker)
job <- job.filter(j => j.state == JobState.Scheduled || j.state == JobState.Running)
} yield (job, worker)
def canDelete(j: RJob): Boolean =
val tryDelete = for {
job <- RJob.findByIdAndGroup(id, collective)
jobm = job.filter(canDelete)
del <- jobm.traverse(j => RJob.delete(j.id))
} yield del match {
case Some(n) => Right(JobCancelResult.Removed: JobCancelResult)
case None => Left(mustCancel(job))
def tryCancel(job: RJob, worker: Ident): F[JobCancelResult] =
OJoex.cancelJob(job.id, worker, store, clientEC).
map(flag => if (flag) JobCancelResult.CancelRequested else JobCancelResult.JobNotFound)
for {
tryDel <- store.transact(tryDelete)
result <- tryDel match {
case Right(r) => r.pure[F]
case Left(Some((job, worker))) =>
tryCancel(job, worker)
case Left(None) =>
(JobCancelResult.JobNotFound: OJob.JobCancelResult).pure[F]
} yield result
package docspell.backend.ops
import cats.implicits._
import cats.effect.ConcurrentEffect
import docspell.common.{Ident, NodeType}
import docspell.store.Store
import docspell.store.records.RNode
import org.http4s.client.blaze.BlazeClientBuilder
import org.http4s.Method._
import org.http4s.{Request, Uri}
import scala.concurrent.ExecutionContext
import org.log4s._
object OJoex {
private [this] val logger = getLogger
def notifyAll[F[_]: ConcurrentEffect](store: Store[F], clientExecutionContext: ExecutionContext): F[Unit] = {
for {
nodes <- store.transact(RNode.findAll(NodeType.Joex))
_ <- nodes.toList.traverse(notifyJoex[F](clientExecutionContext))
} yield ()
def cancelJob[F[_]: ConcurrentEffect](jobId: Ident, worker: Ident, store: Store[F], clientEc: ExecutionContext): F[Boolean] =
for {
node <- store.transact(RNode.findById(worker))
cancel <- node.traverse(joexCancel(clientEc)(_, jobId))
} yield cancel.getOrElse(false)
private def joexCancel[F[_]: ConcurrentEffect](ec: ExecutionContext)(node: RNode, job: Ident): F[Boolean] = {
val notifyUrl = node.url/"api"/"v1"/"job"/job.id/"cancel"
BlazeClientBuilder[F](ec).resource.use { client =>
val req = Request[F](POST, Uri.unsafeFromString(notifyUrl.asString))
client.expect[String](req).map(_ => true)
private def notifyJoex[F[_]: ConcurrentEffect](ec: ExecutionContext)(node: RNode): F[Unit] = {
val notifyUrl = node.url/"api"/"v1"/"notify"
val execute = BlazeClientBuilder[F](ec).resource.use { client =>
val req = Request[F](POST, Uri.unsafeFromString(notifyUrl.asString))
client.expect[String](req).map(_ => ())
execute.attempt.map {
case Right(_) =>
case Left(_) =>
logger.warn(s"Notifying Joex instance '${node.id.id}/${node.url.asString}' failed.")
package docspell.backend.ops
import cats.implicits._
import cats.effect.{Effect, Resource}
import docspell.common._
import docspell.store._
import docspell.store.records._
import OOrganization._
import docspell.store.queries.QOrganization
trait OOrganization[F[_]] {
def findAllOrg(account: AccountId): F[Vector[OrgAndContacts]]
def findAllOrgRefs(account: AccountId): F[Vector[IdRef]]
def addOrg(s: OrgAndContacts): F[AddResult]
def updateOrg(s: OrgAndContacts): F[AddResult]
def findAllPerson(account: AccountId): F[Vector[PersonAndContacts]]
def findAllPersonRefs(account: AccountId): F[Vector[IdRef]]
def addPerson(s: PersonAndContacts): F[AddResult]
def updatePerson(s: PersonAndContacts): F[AddResult]
def deleteOrg(orgId: Ident, collective: Ident): F[AddResult]
def deletePerson(personId: Ident, collective: Ident): F[AddResult]
object OOrganization {
case class OrgAndContacts(org: ROrganization, contacts: Seq[RContact])
case class PersonAndContacts(person: RPerson, contacts: Seq[RContact])
def apply[F[_] : Effect](store: Store[F]): Resource[F, OOrganization[F]] =
Resource.pure(new OOrganization[F] {
def findAllOrg(account: AccountId): F[Vector[OrgAndContacts]] =
store.transact(QOrganization.findOrgAndContact(account.collective, _.name)).
map({ case (org, cont) => OrgAndContacts(org, cont) }).
def findAllOrgRefs(account: AccountId): F[Vector[IdRef]] =
store.transact(ROrganization.findAllRef(account.collective, _.name))
def addOrg(s: OrgAndContacts): F[AddResult] =
QOrganization.addOrg(s.org, s.contacts, s.org.cid)(store)
def updateOrg(s: OrgAndContacts): F[AddResult] =
QOrganization.updateOrg(s.org, s.contacts, s.org.cid)(store)
def findAllPerson(account: AccountId): F[Vector[PersonAndContacts]] =
store.transact(QOrganization.findPersonAndContact(account.collective, _.name)).
map({ case (person, cont) => PersonAndContacts(person, cont) }).
def findAllPersonRefs(account: AccountId): F[Vector[IdRef]] =
store.transact(RPerson.findAllRef(account.collective, _.name))
def addPerson(s: PersonAndContacts): F[AddResult] =
QOrganization.addPerson(s.person, s.contacts, s.person.cid)(store)
def updatePerson(s: PersonAndContacts): F[AddResult] =
QOrganization.updatePerson(s.person, s.contacts, s.person.cid)(store)
def deleteOrg(orgId: Ident, collective: Ident): F[AddResult] =
store.transact(QOrganization.deleteOrg(orgId, collective)).
def deletePerson(personId: Ident, collective: Ident): F[AddResult] =
@ -0,0 +1,48 @@
package docspell.backend.ops
import cats.implicits._
import cats.effect.{Effect, Resource}
import docspell.common.{AccountId, Ident}
import docspell.store.{AddResult, Store}
import docspell.store.records.RSource
trait OSource[F[_]] {
def findAll(account: AccountId): F[Vector[RSource]]
def add(s: RSource): F[AddResult]
def update(s: RSource): F[AddResult]
def delete(id: Ident, collective: Ident): F[AddResult]
object OSource {
def apply[F[_]: Effect](store: Store[F]): Resource[F, OSource[F]] =
Resource.pure(new OSource[F] {
def findAll(account: AccountId): F[Vector[RSource]] =
store.transact(RSource.findAll(account.collective, _.abbrev))
def add(s: RSource): F[AddResult] = {
def insert = RSource.insert(s)
def exists = RSource.existsByAbbrev(s.cid, s.abbrev)
val msg = s"A source with abbrev '${s.abbrev}' already exists"
store.add(insert, exists).map(_.fold(identity, _.withMsg(msg), identity))
def update(s: RSource): F[AddResult] = {
def insert = RSource.updateNoCounter(s)
def exists = RSource.existsByAbbrev(s.cid, s.abbrev)
val msg = s"A source with abbrev '${s.abbrev}' already exists"
store.add(insert, exists).map(_.fold(identity, _.withMsg(msg), identity))
def delete(id: Ident, collective: Ident): F[AddResult] =
store.transact(RSource.delete(id, collective)).
package docspell.backend.ops
import cats.implicits._
import cats.effect.{Effect, Resource}
import docspell.common.{AccountId, Ident}
import docspell.store.{AddResult, Store}
import docspell.store.records.{RTag, RTagItem}
trait OTag[F[_]] {
def findAll(account: AccountId): F[Vector[RTag]]
def add(s: RTag): F[AddResult]
def update(s: RTag): F[AddResult]
def delete(id: Ident, collective: Ident): F[AddResult]
object OTag {
def apply[F[_]: Effect](store: Store[F]): Resource[F, OTag[F]] =
Resource.pure(new OTag[F] {
def findAll(account: AccountId): F[Vector[RTag]] =
store.transact(RTag.findAll(account.collective, _.name))
def add(t: RTag): F[AddResult] = {
def insert = RTag.insert(t)
def exists = RTag.existsByName(t)
val msg = s"A tag '${t.name}' already exists"
store.add(insert, exists).map(_.fold(identity, _.withMsg(msg), identity))
def update(t: RTag): F[AddResult] = {
def insert = RTag.update(t)
def exists = RTag.existsByName(t)
val msg = s"A tag '${t.name}' already exists"
store.add(insert, exists).map(_.fold(identity, _.withMsg(msg), identity))
def delete(id: Ident, collective: Ident): F[AddResult] = {
val io = for {
optTag <- RTag.findByIdAndCollective(id, collective)
n0 <- optTag.traverse(t => RTagItem.deleteTag(t.tagId))
n1 <- optTag.traverse(t => RTag.delete(t.tagId, collective))
} yield n0.getOrElse(0) + n1.getOrElse(0)
package docspell.backend.ops
import bitpeace.MimetypeHint
import cats.implicits._
import cats.effect.{ConcurrentEffect, Effect, Resource}
import docspell.backend.Config
import fs2.Stream
import docspell.common._
import docspell.common.syntax.all._
import docspell.store.Store
import docspell.store.queue.JobQueue
import docspell.store.records.{RCollective, RJob, RSource}
import org.log4s._
import scala.concurrent.ExecutionContext
trait OUpload[F[_]] {
def submit(data: OUpload.UploadData[F], account: AccountId): F[OUpload.UploadResult]
def submit(data: OUpload.UploadData[F], sourceId: Ident): F[OUpload.UploadResult]
object OUpload {
private [this] val logger = getLogger
case class File[F[_]](name: Option[String], advertisedMime: Option[MimeType], data: Stream[F, Byte])
case class UploadMeta( direction: Option[Direction]
, sourceAbbrev: String
, validFileTypes: Seq[MimeType])
case class UploadData[F[_]]( multiple: Boolean
, meta: UploadMeta
, files: Vector[File[F]], priority: Priority, tracker: Option[Ident])
sealed trait UploadResult
object UploadResult {
case object Success extends UploadResult
case object NoFiles extends UploadResult
case object NoSource extends UploadResult
def apply[F[_]: ConcurrentEffect](store: Store[F], queue: JobQueue[F], cfg: Config, httpClientEC: ExecutionContext): Resource[F, OUpload[F]] =
Resource.pure(new OUpload[F] {
def submit(data: OUpload.UploadData[F], account: AccountId): F[OUpload.UploadResult] = {
for {
files <- data.files.traverse(saveFile).map(_.flatten)
pred <- checkFileList(files)
lang <- store.transact(RCollective.findLanguage(account.collective))
meta = ProcessItemArgs.ProcessMeta(account.collective, lang.getOrElse(Language.German), data.meta.direction, data.meta.sourceAbbrev, data.meta.validFileTypes)
args = if (data.multiple) files.map(f => ProcessItemArgs(meta, List(f))) else Vector(ProcessItemArgs(meta, files.toList))
job <- pred.traverse(_ => makeJobs(args, account, data.priority, data.tracker))
_ <- logger.fdebug(s"Storing jobs: $job")
res <- job.traverse(submitJobs)
_ <- store.transact(RSource.incrementCounter(data.meta.sourceAbbrev, account.collective))
} yield res.fold(identity, identity)
def submit(data: OUpload.UploadData[F], sourceId: Ident): F[OUpload.UploadResult] =
for {
sOpt <- store.transact(RSource.find(sourceId)).map(_.toRight(UploadResult.NoSource))
abbrev = sOpt.map(_.abbrev).toOption.getOrElse(data.meta.sourceAbbrev)
updata = data.copy(meta = data.meta.copy(sourceAbbrev = abbrev))
accId = sOpt.map(source => AccountId(source.cid, source.sid))
result <- accId.traverse(acc => submit(updata, acc))
} yield result.fold(identity, identity)
private def submitJobs(jobs: Vector[RJob]): F[OUpload.UploadResult] = {
for {
_ <- logger.fdebug(s"Storing jobs: $jobs")
_ <- queue.insertAll(jobs)
_ <- OJoex.notifyAll(store, httpClientEC)
} yield UploadResult.Success
private def saveFile(file: File[F]): F[Option[ProcessItemArgs.File]] = {
logger.finfo(s"Receiving file $file") *>
store.bitpeace.saveNew(file.data, cfg.files.chunkSize, MimetypeHint(file.name, None), None).
compile.lastOrError.map(fm => Ident.unsafe(fm.id)).attempt.
map(_.fold(ex => {
logger.warn(ex)(s"Could not store file for processing!")
}, id => Some(ProcessItemArgs.File(file.name, id))))
private def checkFileList(files: Seq[ProcessItemArgs.File]): F[Either[UploadResult, Unit]] =
Effect[F].pure(if (files.isEmpty) Left(UploadResult.NoFiles) else Right(()))
private def makeJobs(args: Vector[ProcessItemArgs], account: AccountId, prio: Priority, tracker: Option[Ident]): F[Vector[RJob]] = {
def create(id: Ident, now: Timestamp, arg: ProcessItemArgs): RJob =
RJob.newJob(id, ProcessItemArgs.taskName, account.collective, arg, arg.makeSubject, now, account.user, prio, tracker)
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
jobs = args.map(a => create(id, now, a))
@ -0,0 +1,41 @@
package docspell.backend.signup
import docspell.common.{Duration, Password}
import io.circe._
case class Config(mode: Config.Mode, newInvitePassword: Password, inviteTime: Duration)
object Config {
sealed trait Mode { self: Product =>
final def name: String =
object Mode {
case object Open extends Mode
case object Invite extends Mode
case object Closed extends Mode
def fromString(str: String): Either[String, Mode] =
str.toLowerCase match {
case "open" => Right(Open)
case "invite" => Right(Invite)
case "closed" => Right(Closed)
case _ => Left(s"Invalid signup mode: $str")
def unsafe(str: String): Mode =
fromString(str).fold(sys.error, identity)
implicit val jsonEncoder: Encoder[Mode] =
implicit val jsonDecoder: Decoder[Mode] =
def open: Mode = Mode.Open
def invite: Mode = Mode.Invite
def closed: Mode = Mode.Closed
@ -0,0 +1,19 @@
package docspell.backend.signup
import docspell.common.Ident
sealed trait NewInviteResult { self: Product =>
final def name: String =
object NewInviteResult {
case class Success(id: Ident) extends NewInviteResult
case object InvitationDisabled extends NewInviteResult
case object PasswordMismatch extends NewInviteResult
def passwordMismatch: NewInviteResult = PasswordMismatch
def invitationClosed: NewInviteResult = InvitationDisabled
def success(id: Ident): NewInviteResult = Success(id)
@ -0,0 +1,83 @@
package docspell.backend.signup
import cats.implicits._
import cats.effect.{Effect, Resource}
import docspell.backend.ops.OCollective.RegisterData
import docspell.common._
import docspell.store.{AddResult, Store}
import docspell.store.records.{RCollective, RInvitation, RUser}
import doobie.free.connection.ConnectionIO
trait OSignup[F[_]] {
def register(cfg: Config)(data: RegisterData): F[SignupResult]
def newInvite(cfg: Config)(password: Password): F[NewInviteResult]
object OSignup {
def apply[F[_]:Effect](store: Store[F]): Resource[F, OSignup[F]] =
Resource.pure(new OSignup[F] {
def newInvite(cfg: Config)(password: Password): F[NewInviteResult] = {
if (cfg.mode == Config.Mode.Invite) {
if (cfg.newInvitePassword.isEmpty || cfg.newInvitePassword != password) NewInviteResult.passwordMismatch.pure[F]
else store.transact(RInvitation.insertNew).map(ri => NewInviteResult.success(ri.id))
} else {
def register(cfg: Config)(data: RegisterData): F[SignupResult] = {
cfg.mode match {
case Config.Mode.Open =>
case Config.Mode.Closed =>
case Config.Mode.Invite =>
data.invite match {
case Some(inv) =>
for {
now <- Timestamp.current[F]
min = now.minus(cfg.inviteTime)
ok <- store.transact(RInvitation.useInvite(inv, min))
res <- if (ok) addUser(data).map(SignupResult.fromAddResult)
else SignupResult.invalidInvitationKey.pure[F]
} yield res
case None =>
private def addUser(data: RegisterData): F[AddResult] = {
def toRecords: F[(RCollective, RUser)] =
for {
id2 <- Ident.randomId[F]
now <- Timestamp.current[F]
c = RCollective(data.collName, CollectiveState.Active, Language.German, now)
u = RUser(id2, data.login, data.collName, PasswordCrypt.crypt(data.password), UserState.Active, None, 0, None, now)
} yield (c, u)
def insert(coll: RCollective, user: RUser): ConnectionIO[Int] = {
for {
n1 <- RCollective.insert(coll)
n2 <- RUser.insert(user)
} yield n1 + n2
def collectiveExists: ConnectionIO[Boolean] =
val msg = s"The collective '${data.collName}' already exists."
for {
cu <- toRecords
save <- store.add(insert(cu._1, cu._2), collectiveExists)
} yield save.fold(identity, _.withMsg(msg), identity)
package docspell.backend.signup
import docspell.store.AddResult
sealed trait SignupResult {
object SignupResult {
case object CollectiveExists extends SignupResult
case object InvalidInvitationKey extends SignupResult
case object SignupClosed extends SignupResult
case class Failure(ex: Throwable) extends SignupResult
case object Success extends SignupResult
def collectiveExists: SignupResult = CollectiveExists
def invalidInvitationKey: SignupResult = InvalidInvitationKey
def signupClosed: SignupResult = SignupClosed
def failure(ex: Throwable): SignupResult = Failure(ex)
def success: SignupResult = Success
def fromAddResult(ar: AddResult): SignupResult = ar match {
case AddResult.Success => Success
case AddResult.Failure(ex) => Failure(ex)
case AddResult.EntityExists(_) => CollectiveExists
package docspell.common
case class AccountId(collective: Ident, user: Ident) {
def asString =
object AccountId {
private[this] val sepearatorChars: String = "/\\:"
def parse(str: String): Either[String, AccountId] = {
val input = str.replaceAll("\\s+", "").trim
val invalid: Either[String, AccountId] =
Left(s"Cannot parse account id: $str")
def parse0(sep: Char): Either[String, AccountId] =
input.indexOf(sep.toInt) match {
case n if n > 0 && input.length > 2 =>
val coll = input.substring(0, n)
val user = input.substring(n + 1)
flatMap(collId => Ident.fromString(user).
map(userId => AccountId(collId, userId)))
case _ =>
val separated = sepearatorChars.foldRight(invalid) { (c, v) =>
separated.orElse(Ident.fromString(str).map(id => AccountId(id, id)))
@ -0,0 +1,34 @@
package docspell.common
case class Banner( component: String
, version: String
, gitHash: Option[String]
, jdbcUrl: LenientUri
, configFile: Option[String]
, appId: Ident
, baseUrl: LenientUri) {
private val banner =
"""______ _ _
|| _ \ | | |
|| | | |___ ___ ___ _ __ ___| | |
|| | | / _ \ / __/ __| '_ \ / _ \ | |
|| |/ / (_) | (__\__ \ |_) | __/ | |
||___/ \___/ \___|___/ .__/ \___|_|_|
| | |
|""".stripMargin +
s""" |_| v$version (#${gitHash.map(_.take(8)).getOrElse("")})"""
def render(prefix: String): String = {
val text = banner.split('\n').toList ++ List(
s"<< $component >>"
, s"Id: ${appId.id}"
, s"Base-Url: ${baseUrl.asString}"
, s"Database: ${jdbcUrl.asString}"
, s"Config: ${configFile.getOrElse("")}"
, ""
text.map(line => s"$prefix $line").mkString("\n")
package docspell.common
import java.time.Instant
import io.circe._
object BaseJsonCodecs {
implicit val encodeInstantEpoch: Encoder[Instant] =
implicit val decodeInstantEpoch: Decoder[Instant] =
@ -0,0 +1,52 @@
package docspell.common
import io.circe.{Decoder, Encoder}
sealed trait CollectiveState
object CollectiveState {
val all = List(Active, ReadOnly, Closed, Blocked)
/** A normal active collective */
case object Active extends CollectiveState
/** A collective may be readonly in cases it is implicitly closed
* (e.g. no payment). Users can still see there data and
* download, but have no write access. */
case object ReadOnly extends CollectiveState
/** A collective that has been explicitely closed. */
case object Closed extends CollectiveState
/** A collective blocked by a super user, usually some emergency
* action. */
case object Blocked extends CollectiveState
def fromString(s: String): Either[String, CollectiveState] =
s.toLowerCase match {
case "active" => Right(Active)
case "readonly" => Right(ReadOnly)
case "closed" => Right(Closed)
case "blocked" => Right(Blocked)
case _ => Left(s"Unknown state: $s")
def unsafe(str: String): CollectiveState =
fromString(str).fold(sys.error, identity)
def asString(state: CollectiveState): String = state match {
case Active => "active"
case Blocked => "blocked"
case Closed => "closed"
case ReadOnly => "readonly"
implicit val collectiveStateEncoder: Encoder[CollectiveState] =
implicit val collectiveStateDecoder: Decoder[CollectiveState] =
package docspell.common
import io.circe.{Decoder, Encoder}
sealed trait ContactKind { self: Product =>
def asString: String = self.productPrefix
object ContactKind {
val all = List()
case object Phone extends ContactKind
case object Mobile extends ContactKind
case object Fax extends ContactKind
case object Email extends ContactKind
case object Docspell extends ContactKind
case object Website extends ContactKind
def fromString(s: String): Either[String, ContactKind] =
s.toLowerCase match {
case "phone" => Right(Phone)
case "mobile" => Right(Mobile)
case "fax" => Right(Fax)
case "email" => Right(Email)
case "docspell" => Right(Docspell)
case "website" => Right(Website)
case _ => Left(s"Not a state value: $s")
def unsafe(str: String): ContactKind =
fromString(str).fold(sys.error, identity)
def asString(s: ContactKind): String =
implicit val contactKindEncoder: Encoder[ContactKind] =
implicit val contactKindDecoder: Decoder[ContactKind] =
package docspell.common
import io.circe.{Decoder, Encoder}
sealed trait Direction {
self: Product =>
def name: String =
object Direction {
case object Incoming extends Direction
case object Outgoing extends Direction
def incoming: Direction = Incoming
def outgoing: Direction = Outgoing
def parse(str: String): Either[String, Direction] =
str.toLowerCase match {
case "incoming" => Right(Incoming)
case "outgoing" => Right(Outgoing)
case _ => Left(s"No direction: $str")
def unsafe(str: String): Direction =
parse(str).fold(sys.error, identity)
def isIncoming(dir: Direction): Boolean =
dir == Direction.Incoming
def isOutgoing(dir: Direction): Boolean =
dir == Direction.Outgoing
implicit val directionEncoder: Encoder[Direction] =
implicit val directionDecoder: Decoder[Direction] =
@ -0,0 +1,54 @@
package docspell.common
import cats.implicits._
import scala.concurrent.duration.{FiniteDuration, Duration => SDur}
import java.time.{Duration => JDur}
import java.util.concurrent.TimeUnit
import cats.effect.Sync
case class Duration(nanos: Long) {
def millis: Long = nanos / 1000000
def seconds: Long = millis / 1000
def toScala: FiniteDuration =
FiniteDuration(nanos, TimeUnit.NANOSECONDS)
def toJava: JDur =
def formatExact: String =
s"$millis ms"
object Duration {
def apply(d: SDur): Duration =
def apply(d: JDur): Duration =
def seconds(n: Long): Duration =
def millis(n: Long): Duration =
def minutes(n: Long): Duration =
def hours(n: Long): Duration =
def nanos(n: Long): Duration =
def stopTime[F[_]: Sync]: F[F[Duration]] =
for {
now <- Timestamp.current[F]
end = Timestamp.current[F]
} yield end.map(e => Duration.millis(e.toMillis - now.toMillis))
@ -0,0 +1,16 @@
package docspell.common
import io.circe._
import io.circe.generic.semiauto._
case class IdRef(id: Ident, name: String) {
object IdRef {
implicit val jsonEncoder: Encoder[IdRef] =
implicit val jsonDecoder: Decoder[IdRef] =
@ -0,0 +1,57 @@
package docspell.common
import java.security.SecureRandom
import java.util.UUID
import cats.effect.Sync
import io.circe.{Decoder, Encoder}
import scodec.bits.ByteVector
case class Ident(id: String) {
object Ident {
val chars: Set[Char] = (('A' to 'Z') ++ ('a' to 'z') ++ ('0' to '9') ++ "-_").toSet
def randomUUID[F[_]: Sync]: F[Ident] =
def randomId[F[_]: Sync]: F[Ident] = Sync[F].delay {
val random = new SecureRandom()
val buffer = new Array[Byte](32)
def apply(str: String): Either[String, Ident] =
def fromString(s: String): Either[String, Ident] =
if (s.forall(chars.contains)) Right(new Ident(s))
else Left(s"Invalid identifier: $s. Allowed chars: ${chars.mkString}")
def fromBytes(bytes: ByteVector): Ident =
def fromByteArray(bytes: Array[Byte]): Ident =
def unsafe(s: String): Ident =
fromString(s) match {
case Right(id) => id
case Left(err) => sys.error(err)
def unapply(arg: String): Option[Ident] =
implicit val encodeIdent: Encoder[Ident] =
implicit val decodeIdent: Decoder[Ident] =
package docspell.common
import io.circe.{Decoder, Encoder}
sealed trait ItemState { self: Product =>
final def name: String =
object ItemState {
case object Premature extends ItemState
case object Processing extends ItemState
case object Created extends ItemState
case object Confirmed extends ItemState
def fromString(str: String): Either[String, ItemState] =
str.toLowerCase match {
case "premature" => Right(Premature)
case "processing" => Right(Processing)
case "created" => Right(Created)
case "confirmed" => Right(Confirmed)
case _ => Left(s"Invalid item state: $str")
def unsafe(str: String): ItemState =
fromString(str).fold(sys.error, identity)
implicit val jsonDecoder: Decoder[ItemState] =
implicit val jsonEncoder: Encoder[ItemState] =
package docspell.common
import io.circe.{Decoder, Encoder}
sealed trait JobState { self: Product =>
def name: String =
object JobState {
/** Waiting for being executed. */
case object Waiting extends JobState {
/** A scheduler has picked up this job and will pass it to the next
* free slot. */
case object Scheduled extends JobState {
/** Is currently executing */
case object Running extends JobState {
/** Finished with failure and is being retried. */
case object Stuck extends JobState {
/** Finished finally with a failure */
case object Failed extends JobState {
/** Finished by cancellation. */
case object Cancelled extends JobState {
/** Finished with success */
case object Success extends JobState {
val all: Set[JobState] = Set(Waiting, Scheduled, Running, Stuck, Failed, Cancelled, Success)
val queued: Set[JobState] = Set(Waiting, Scheduled, Stuck)
val done: Set[JobState] = Set(Failed, Cancelled, Success)
def parse(str: String): Either[String, JobState] =
str.toLowerCase match {
case "waiting" => Right(Waiting)
case "scheduled" => Right(Scheduled)
case "running" => Right(Running)
case "stuck" => Right(Stuck)
case "failed" => Right(Failed)
case "cancelled" => Right(Cancelled)
case "success" => Right(Success)
case _ => Left(s"Not a job state: $str")
def unsafe(str: String): JobState =
parse(str).fold(sys.error, identity)
def asString(state: JobState): String =
implicit val jobStateEncoder: Encoder[JobState] =
implicit val jobStateDecoder: Decoder[JobState] =
package docspell.common
import io.circe.{Decoder, Encoder}
sealed trait Language { self: Product =>
final def name: String =
def iso2: String
def iso3: String
private[common] def allNames =
Set(name, iso3, iso2)
object Language {
case object German extends Language {
val iso2 = "de"
val iso3 = "deu"
case object English extends Language {
val iso2 = "en"
val iso3 = "eng"
val all: List[Language] = List(German, English)
def fromString(str: String): Either[String, Language] = {
val lang = str.toLowerCase
toRight(s"Unsupported or invalid language: $str")
def unsafe(str: String): Language =
fromString(str).fold(sys.error, identity)
implicit val jsonDecoder: Decoder[Language] =
implicit val jsonEncoder: Encoder[Language] =
package docspell.common
import java.net.URL
import fs2.Stream
import cats.implicits._
import cats.data.NonEmptyList
import cats.effect.{Blocker, ContextShift, Sync}
import docspell.common.LenientUri.Path
import io.circe.{Decoder, Encoder}
/** A URI.
* It is not compliant to rfc3986, but covers most use cases in a convenient way.
case class LenientUri(scheme: NonEmptyList[String]
, authority: Option[String]
, path: LenientUri.Path
, query: Option[String]
, fragment: Option[String]) {
def /(segment: String): LenientUri =
copy(path = path / segment)
def ++ (np: Path): LenientUri =
copy(path = np.segments.foldLeft(path)(_ / _))
def ++ (np: String): LenientUri = {
val rel = LenientUri.stripLeading(np, '/')
def toJavaUrl: Either[String, URL] =
Either.catchNonFatal(new URL(asString)).left.map(_.getMessage)
def readURL[F[_]: Sync : ContextShift](chunkSize: Int, blocker: Blocker): Stream[F, Byte] =
Stream.emit(Either.catchNonFatal(new URL(asString))).
flatMap(url => fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, blocker, true))
def asString: String = {
val schemePart = scheme.toList.mkString(":")
val authPart = authority.map(a => s"//$a").getOrElse("")
val pathPart = path.asString
val queryPart = query.map(q => s"?$q").getOrElse("")
val fragPart = fragment.map(f => s"#$f").getOrElse("")
object LenientUri {
sealed trait Path {
def segments: List[String]
def isRoot: Boolean
def isEmpty: Boolean
def /(segment: String): Path
def asString: String
case object RootPath extends Path {
val segments = Nil
val isRoot = true
val isEmpty = false
def /(seg: String): Path =
def asString = "/"
case object EmptyPath extends Path {
val segments = Nil
val isRoot = false
val isEmpty = true
def /(seg: String): Path =
def asString = ""
case class NonEmptyPath(segs: NonEmptyList[String]) extends Path {
def segments = segs.toList
val isEmpty = false
val isRoot = false
def /(seg: String): Path =
copy(segs = segs.append(seg))
def asString = segs.head match {
case "." => segments.map(percentEncode).mkString("/")
case ".." => segments.map(percentEncode).mkString("/")
case _ => "/" + segments.map(percentEncode).mkString("/")
def unsafe(str: String): LenientUri =
parse(str).fold(sys.error, identity)
def fromJava(u: URL): LenientUri =
def parse(str: String): Either[String, LenientUri] = {
def makePath(str: String): Path = str.trim match {
case "/" => RootPath
case "" => EmptyPath
case _ => NonEmptyList.fromList(stripLeading(str, '/').split('/').toList.map(percentDecode)) match {
case Some(nl) => NonEmptyPath(nl)
case None => sys.error(s"Invalid url: $str")
def makeNonEmpty(str: String): Option[String] =
def makeScheme(s: String): Option[NonEmptyList[String]] =
def splitPathQF(pqf: String): (Path, Option[String], Option[String]) =
pqf.indexOf('?') match {
case -1 =>
pqf.indexOf('#') match {
case -1 =>
(makePath(pqf), None, None)
case n =>
(makePath(pqf.substring(0, n)), None, makeNonEmpty(pqf.substring(n + 1)))
case n =>
pqf.indexOf('#', n) match {
case -1 =>
(makePath(pqf.substring(0, n)), makeNonEmpty(pqf.substring(n+1)), None)
case k =>
(makePath(pqf.substring(0, n)), makeNonEmpty(pqf.substring(n+1, k)), makeNonEmpty(pqf.substring(k+1)))
str.split("//", 2) match {
case Array(p0, p1) =>
// scheme:scheme:authority/path
val scheme = makeScheme(p0)
val (auth, pathQF) = p1.indexOf('/') match {
case -1 => (Some(p1), "")
case n => (Some(p1.substring(0, n)), p1.substring(n))
val (path, query, frag) = splitPathQF(pathQF)
scheme match {
case None =>
Left(s"No scheme found: $str")
case Some(nl) =>
Right(LenientUri(nl, auth, path, query, frag))
case Array(p0) =>
// scheme:scheme:path
p0.lastIndexOf(':') match {
case -1 =>
Left(s"No scheme found: $str")
case n =>
val scheme = makeScheme(p0.substring(0, n))
val (path, query, frag) = splitPathQF(p0.substring(n + 1))
scheme match {
case None =>
Left(s"No scheme found: $str")
case Some(nl) =>
Right(LenientUri(nl, None, path, query, frag))
private[this] val delims: Set[Char] = ":/?#[]@".toSet
private def percentEncode(s: String): String =
s.flatMap(c => if (delims.contains(c)) s"%${c.toInt.toHexString}" else c.toString)
private def percentDecode(s: String): String =
if (!s.contains("%")) s
else s.foldLeft(("", "")) { case ((acc, res), c) =>
if (acc.length == 2) ("", res :+ Integer.parseInt(acc.drop(1) :+ c, 16).toChar)
else if (acc.startsWith("%")) (acc :+ c, res)
else if (c == '%') ("%", res)
else (acc, res :+ c)
private def stripLeading(s: String, c: Char): String =
if (s.length > 0 && s.charAt(0) == c) s.substring(1)
else s
implicit val encodeLenientUri: Encoder[LenientUri] =
implicit val decodeLenientUri: Decoder[LenientUri] =
package docspell.common
import io.circe.{Decoder, Encoder}
sealed trait LogLevel { self: Product =>
def toInt: Int
final def name: String =
object LogLevel {
case object Debug extends LogLevel { val toInt = 0 }
case object Info extends LogLevel { val toInt = 1 }
case object Warn extends LogLevel { val toInt = 2 }
case object Error extends LogLevel { val toInt = 3 }
def fromInt(n: Int): LogLevel =
n match {
case 0 => Debug
case 1 => Info
case 2 => Warn
case 3 => Error
case _ => Debug
def fromString(str: String): Either[String, LogLevel] =
str.toLowerCase match {
case "debug" => Right(Debug)
case "info" => Right(Info)
case "warn" => Right(Warn)
case "warning" => Right(Warn)
case "error" => Right(Error)
case _ => Left(s"Invalid log-level: $str")
def unsafeString(str: String): LogLevel =
fromString(str).fold(sys.error, identity)
implicit val jsonDecoder: Decoder[LogLevel] =
implicit val jsonEncoder: Encoder[LogLevel] =
@ -0,0 +1,44 @@
package docspell.common
import cats.data.NonEmptyList
import docspell.common.MetaProposal.Candidate
import io.circe._
import io.circe.generic.semiauto._
case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Candidate]) {
def addIdRef(refs: Seq[Candidate]): MetaProposal =
copy(values = MetaProposal.flatten(values ++ refs.toList))
def isSingleValue: Boolean =
def isMultiValue: Boolean =
def size: Int =
object MetaProposal {
case class Candidate(ref: IdRef, origin: Set[NerLabel])
object Candidate {
implicit val jsonEncoder: Encoder[Candidate] =
implicit val jsonDecoder: Decoder[Candidate] =
def flatten(s: NonEmptyList[Candidate]): NonEmptyList[Candidate] = {
def append(list: List[Candidate]): Candidate =
list.reduce((l0, l1) => l0.copy(origin = l0.origin ++ l1.origin))
val grouped = s.toList.groupBy(_.ref.id)
implicit val jsonDecoder: Decoder[MetaProposal] =
implicit val jsonEncoder: Encoder[MetaProposal] =
@ -0,0 +1,82 @@
package docspell.common
import cats.data.NonEmptyList
import cats.kernel.Monoid
import docspell.common.MetaProposal.Candidate
import io.circe._
import io.circe.generic.semiauto._
case class MetaProposalList private (proposals: List[MetaProposal]) {
def isEmpty: Boolean = proposals.isEmpty
def nonEmpty: Boolean = proposals.nonEmpty
def hasResults(mt: MetaProposalType, mts: MetaProposalType*): Boolean = {
(mts :+ mt).map(mtp => proposals.exists(_.proposalType == mtp)).
reduce(_ && _)
def hasResultsAll: Boolean =
proposals.map(_.proposalType).toSet == MetaProposalType.all.toSet
def getTypes: Set[MetaProposalType] =
proposals.foldLeft(Set.empty[MetaProposalType])(_ + _.proposalType)
def fillEmptyFrom(ml: MetaProposalList): MetaProposalList = {
val list = ml.proposals.foldLeft(proposals){ (mine, mp) =>
if (hasResults(mp.proposalType)) mine
else mp :: mine
new MetaProposalList(list)
def find(mpt: MetaProposalType): Option[MetaProposal] =
proposals.find(_.proposalType == mpt)
object MetaProposalList {
val empty = MetaProposalList(Nil)
def apply(lmp: List[MetaProposal]): MetaProposalList =
flatten(lmp.map(m => new MetaProposalList(List(m))))
def of(mps: MetaProposal*): MetaProposalList =
flatten(mps.toList.map(mp => MetaProposalList(List(mp))))
def from(mt: MetaProposalType, label: NerLabel)(refs: Seq[IdRef]): MetaProposalList =
fromSeq1(mt, refs.map(ref => Candidate(ref, Set(label))))
def fromSeq1(mt: MetaProposalType, refs: Seq[Candidate]): MetaProposalList =
map(nl => MetaProposalList.of(MetaProposal(mt, nl))).
def fromMap(m: Map[MetaProposalType, MetaProposal]): MetaProposalList = {
new MetaProposalList(m.toList.map({ case (k, v) => v.copy(proposalType = k) }))
def flatten(ml: Seq[MetaProposalList]): MetaProposalList = {
val init: Map[MetaProposalType, MetaProposal] = Map.empty
def updateMap(map: Map[MetaProposalType, MetaProposal], mp: MetaProposal): Map[MetaProposalType, MetaProposal] =
map.get(mp.proposalType) match {
case Some(mp0) => map.updated(mp.proposalType, mp0.addIdRef(mp.values.toList))
case None => map.updated(mp.proposalType, mp)
val merged = ml.foldLeft(init) { (map, el) =>
implicit val jsonEncoder: Encoder[MetaProposalList] =
implicit val jsonDecoder: Decoder[MetaProposalList] =
implicit val metaProposalListMonoid: Monoid[MetaProposalList] =
Monoid.instance(empty, (m0, m1) => flatten(Seq(m0, m1)))
@ -0,0 +1,41 @@
package docspell.common
import io.circe._
sealed trait MetaProposalType { self: Product =>
final def name: String =
object MetaProposalType {
case object CorrOrg extends MetaProposalType
case object CorrPerson extends MetaProposalType
case object ConcPerson extends MetaProposalType
case object ConcEquip extends MetaProposalType
case object DocDate extends MetaProposalType
case object DueDate extends MetaProposalType
val all: List[MetaProposalType] =
List(CorrOrg, CorrPerson, ConcPerson, ConcEquip)
def fromString(str: String): Either[String, MetaProposalType] =
str.toLowerCase match {
case "corrorg" => Right(CorrOrg)
case "corrperson" => Right(CorrPerson)
case "concperson" => Right(ConcPerson)
case "concequip" => Right(ConcEquip)
case "docdate" => Right(DocDate)
case "duedate" => Right(DueDate)
case _ => Left(s"Invalid item-proposal-type: $str")
def unsafe(str: String): MetaProposalType =
fromString(str).fold(sys.error, identity)
implicit val jsonDecoder: Decoder[MetaProposalType] =
implicit val jsonEncoder: Encoder[MetaProposalType] =
@ -0,0 +1,62 @@
package docspell.common
import docspell.common.syntax.all._
import io.circe.{Decoder, Encoder}
/** A MIME Type impl with just enough features for the use here.
case class MimeType(primary: String, sub: String) {
def asString: String =
def matches(other: MimeType): Boolean =
primary == other.primary &&
(sub == other.sub || sub == "*" )
object MimeType {
def application(sub: String): MimeType =
MimeType("application", partFromString(sub).throwLeft)
def text(sub: String): MimeType =
MimeType("text", partFromString(sub).throwLeft)
def image(sub: String): MimeType =
MimeType("image", partFromString(sub).throwLeft)
private[this] val validChars: Set[Char] = (('A' to 'Z') ++ ('a' to 'z') ++ ('0' to '9') ++ "*-").toSet
def parse(str: String): Either[String, MimeType] = {
str.indexOf('/') match {
case -1 => Left(s"Invalid MIME type: $str")
case n =>
for {
prim <- partFromString(str.substring(0, n))
sub <- partFromString(str.substring(n + 1))
} yield MimeType(prim.toLowerCase, sub.toLowerCase)
def unsafe(str: String): MimeType =
private def partFromString(s: String): Either[String, String] =
if (s.forall(validChars.contains)) Right(s)
else Left(s"Invalid identifier: $s. Allowed chars: ${validChars.mkString}")
val octetStream = application("octet-stream")
val pdf = application("pdf")
val png = image("png")
val jpeg = image("jpeg")
val tiff = image("tiff")
val html = text("html")
val plain = text("plain")
implicit val jsonEncoder: Encoder[MimeType] =
implicit val jsonDecoder: Decoder[MimeType] =
@ -0,0 +1,7 @@
package docspell.common
import java.time.LocalDate
case class NerDateLabel(date: LocalDate, label: NerLabel) {
package docspell.common
import io.circe.generic.semiauto._
import io.circe.{Decoder, Encoder}
case class NerLabel(label: String, tag: NerTag, startPosition: Int, endPosition: Int) {
object NerLabel {
implicit val jsonEncoder: Encoder[NerLabel] = deriveEncoder[NerLabel]
implicit val jsonDecoder: Decoder[NerLabel] = deriveDecoder[NerLabel]
package docspell.common
import io.circe.{Decoder, Encoder}
sealed trait NerTag { self: Product =>
final def name: String =
object NerTag {
case object Organization extends NerTag
case object Person extends NerTag
case object Location extends NerTag
case object Misc extends NerTag
case object Email extends NerTag
case object Website extends NerTag
case object Date extends NerTag
val all: List[NerTag] = List(Organization, Person, Location)
def fromString(str: String): Either[String, NerTag] =
str.toLowerCase match {
case "organization" => Right(Organization)
case "person" => Right(Person)
case "location" => Right(Location)
case "misc" => Right(Misc)
case "email" => Right(Email)
case "website" => Right(Website)
case "date" => Right(Date)
case _ => Left(s"Invalid ner tag: $str")
def unsafe(str: String): NerTag =
fromString(str).fold(sys.error, identity)
implicit val jsonDecoder: Decoder[NerTag] =
implicit val jsonEncoder: Encoder[NerTag] =
package docspell.common
sealed trait NodeType { self: Product =>
final def name: String =
object NodeType {
case object Restserver extends NodeType
case object Joex extends NodeType
def fromString(str: String): Either[String, NodeType] =
str.toLowerCase match {
case "restserver" => Right(Restserver)
case "joex" => Right(Joex)
case _ => Left(s"Invalid node type: $str")
def unsafe(str: String): NodeType =
fromString(str).fold(sys.error, identity)
package docspell.common
import io.circe.{Decoder, Encoder}
final class Password(val pass: String) extends AnyVal {
def isEmpty: Boolean= pass.isEmpty
override def toString: String =
if (pass.isEmpty) "<empty>" else "***"
object Password {
val empty = Password("")
def apply(pass: String): Password =
new Password(pass)
implicit val passwordEncoder: Encoder[Password] =
implicit val passwordDecoder: Decoder[Password] =
package docspell.common
import cats.implicits._
import cats.Order
import io.circe.{Decoder, Encoder}
sealed trait Priority { self: Product =>
final def name: String =
object Priority {
case object High extends Priority
case object Low extends Priority
def fromString(str: String): Either[String, Priority] =
str.toLowerCase match {
case "high" => Right(High)
case "low" => Right(Low)
case _ => Left(s"Invalid priority: $str")
def unsafe(str: String): Priority =
fromString(str).fold(sys.error, identity)
def fromInt(n: Int): Priority =
if (n <= toInt(Low)) Low
else High
def toInt(p: Priority): Int =
p match {
case Low => 0
case High => 10
implicit val priorityOrder: Order[Priority] =
Order.by[Priority, Int](toInt)
implicit val jsonEncoder: Encoder[Priority] =
implicit val jsonDecoder: Decoder[Priority] =
@ -0,0 +1,47 @@
package docspell.common
import io.circe._, io.circe.generic.semiauto._
import docspell.common.syntax.all._
import ProcessItemArgs._
case class ProcessItemArgs(meta: ProcessMeta, files: List[File]) {
def makeSubject: String = {
files.flatMap(_.name) match {
case Nil => s"${meta.sourceAbbrev}: No files"
case n :: Nil => n
case n1 :: n2 :: Nil => s"$n1, $n2"
case more => s"${files.size} files from ${meta.sourceAbbrev}"
object ProcessItemArgs {
val taskName = Ident.unsafe("process-item")
case class ProcessMeta( collective: Ident
, language: Language
, direction: Option[Direction]
, sourceAbbrev: String
, validFileTypes: Seq[MimeType])
object ProcessMeta {
implicit val jsonEncoder: Encoder[ProcessMeta] = deriveEncoder[ProcessMeta]
implicit val jsonDecoder: Decoder[ProcessMeta] = deriveDecoder[ProcessMeta]
case class File(name: Option[String], fileMetaId: Ident)
object File {
implicit val jsonEncoder: Encoder[File] = deriveEncoder[File]
implicit val jsonDecoder: Decoder[File] = deriveDecoder[File]
implicit val jsonEncoder: Encoder[ProcessItemArgs] = deriveEncoder[ProcessItemArgs]
implicit val jsonDecoder: Decoder[ProcessItemArgs] = deriveDecoder[ProcessItemArgs]
def parse(str: String): Either[Throwable, ProcessItemArgs] =
@ -0,0 +1,20 @@
package docspell.common
import java.util.concurrent.atomic.AtomicLong
import java.util.concurrent.{Executors, ThreadFactory}
object ThreadFactories {
def ofName(prefix: String): ThreadFactory =
new ThreadFactory {
val counter = new AtomicLong(0)
override def newThread(r: Runnable): Thread = {
val t = Executors.defaultThreadFactory().newThread(r)
@ -0,0 +1,41 @@
package docspell.common
import java.time.{Instant, LocalDate, ZoneId}
import cats.effect.Sync
import io.circe.{Decoder, Encoder}
case class Timestamp(value: Instant) {
def toMillis: Long = value.toEpochMilli
def toSeconds: Long = value.toEpochMilli / 1000L
def minus(d: Duration): Timestamp =
def minusHours(n: Long): Timestamp =
Timestamp(value.minusSeconds(n * 60 * 60))
def toDate: LocalDate =
def asString: String = value.toString
object Timestamp {
val Epoch = Timestamp(Instant.EPOCH)
def current[F[_]: Sync]: F[Timestamp] =
implicit val encodeTimestamp: Encoder[Timestamp] =
implicit val decodeTimestamp: Decoder[Timestamp] =
@ -0,0 +1,37 @@
package docspell.common
import io.circe.{Decoder, Encoder}
sealed trait UserState
object UserState {
val all = List(Active, Disabled)
/** An active or enabled user. */
case object Active extends UserState
/** The user is blocked by an admin. */
case object Disabled extends UserState
def fromString(s: String): Either[String, UserState] =
s.toLowerCase match {
case "active" => Right(Active)
case "disabled" => Right(Disabled)
case _ => Left(s"Not a state value: $s")
def unsafe(str: String): UserState =
fromString(str).fold(sys.error, identity)
def asString(s: UserState): String = s match {
case Active => "active"
case Disabled => "disabled"
implicit val userStateEncoder: Encoder[UserState] =
implicit val userStateDecoder: Decoder[UserState] =
@ -0,0 +1,35 @@
package docspell.common.pureconfig
import docspell.common._
import _root_.pureconfig._
import _root_.pureconfig.error.{CannotConvert, FailureReason}
import scodec.bits.ByteVector
import scala.reflect.ClassTag
object Implicits {
implicit val lenientUriReader: ConfigReader[LenientUri] =
implicit val durationReader: ConfigReader[Duration] =
ConfigReader[scala.concurrent.duration.Duration].map(sd => Duration(sd))
implicit val passwordReader: ConfigReader[Password] =
implicit val mimeTypeReader: ConfigReader[MimeType] =
implicit val identReader: ConfigReader[Ident] =
implicit val byteVectorReader: ConfigReader[ByteVector] =
ConfigReader[String].emap(reason(str => {
if (str.startsWith("hex:")) ByteVector.fromHex(str.drop(4)).toRight("Invalid hex value.")
else if (str.startsWith("b64:")) ByteVector.fromBase64(str.drop(4)).toRight("Invalid Base64 string.")
else ByteVector.fromHex(str).toRight("Invalid hex value.")
def reason[A: ClassTag](f: String => Either[String, A]): String => Either[FailureReason, A] =
in => f(in).left.map(str => CannotConvert(in, implicitly[ClassTag[A]].runtimeClass.toString, str))
@ -0,0 +1,21 @@
package docspell.common.syntax
trait EitherSyntax {
implicit final class LeftStringEitherOps[A](e: Either[String, A]) {
def throwLeft: A = e match {
case Right(a) => a
case Left(err) => sys.error(err)
implicit final class ThrowableLeftEitherOps[A](e: Either[Throwable, A]) {
def throwLeft: A = e match {
case Right(a) => a
case Left(err) => throw err
object EitherSyntax extends EitherSyntax
@ -0,0 +1,35 @@
package docspell.common.syntax
import cats.effect.Sync
import fs2.Stream
import org.log4s.Logger
trait LoggerSyntax {
implicit final class LoggerOps(logger: Logger) {
def ftrace[F[_]: Sync](msg: => String): F[Unit] =
def fdebug[F[_]: Sync](msg: => String): F[Unit] =
def sdebug[F[_]: Sync](msg: => String): Stream[F, Nothing] =
def finfo[F[_]: Sync](msg: => String): F[Unit] =
def sinfo[F[_]: Sync](msg: => String): Stream[F, Nothing] =
def fwarn[F[_]: Sync](msg: => String): F[Unit] =
def ferror[F[_]: Sync](msg: => String): F[Unit] =
def ferror[F[_]: Sync](ex: Throwable)(msg: => String): F[Unit] =
@ -0,0 +1,24 @@
package docspell.common.syntax
import cats.effect.Sync
import fs2.Stream
import cats.implicits._
import io.circe._
import io.circe.parser._
trait StreamSyntax {
implicit class StringStreamOps[F[_]](s: Stream[F, String]) {
def parseJsonAs[A](implicit d: Decoder[A], F: Sync[F]): F[Either[Throwable, A]] =
s.fold("")(_ + _).
map(optStr => for {
str <- optStr.map(_.trim).toRight(new Exception("Empty string cannot be parsed into a value"))
json <- parse(str).leftMap(_.underlying)
value <- json.as[A]
} yield value)
@ -0,0 +1,21 @@
package docspell.common.syntax
import cats.implicits._
import io.circe.Decoder
import io.circe.parser._
trait StringSyntax {
implicit class EvenMoreStringOps(s: String) {
def asNonBlank: Option[String] =
def parseJsonAs[A](implicit d: Decoder[A]): Either[Throwable, A] =
for {
json <- parse(s).leftMap(_.underlying)
value <- json.as[A]
} yield value
@ -0,0 +1,10 @@
package docspell.common
package object syntax {
object all extends EitherSyntax
with StreamSyntax
with StringSyntax
with LoggerSyntax
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<pattern>[%thread] %highlight(%-5level) %cyan(%logger{15}) - %msg %n</pattern>
<logger name="docspell" level="debug" />
<root level="INFO">
<appender-ref ref="STDOUT" />
@ -1,3 +1,131 @@
docspell.joex {
docspell.joex {
# This is the id of this node. If you run more than one server, you
# have to make sure to provide unique ids per node.
app-id = "joex1"
# This is the base URL this application is deployed to. This is used
# to register this joex instance such that docspell rest servers can
# reach them
base-url = "http://localhost:7878"
# Where the REST server binds to.
# JOEX provides a very simple REST interface to inspect its state.
bind {
address = "localhost"
port = 7878
# The database connection.
# By default a H2 file-based database is configured. You can provide
# a postgresql or mariadb connection here. When using H2 use the
# PostgreSQL compatibility mode and AUTO_SERVER feature.
# It must be the same connection as the rest server is using.
jdbc {
url = "jdbc:h2://"${java.io.tmpdir}"/docspell-demo.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
user = "sa"
password = ""
# Configuration for the job scheduler.
scheduler {
# Each scheduler needs a unique name. This defaults to the node
# name, which must be unique, too.
name = ${docspell.joex.app-id}
# Number of processing allowed in parallel.
pool-size = 2
# A counting scheme determines the ratio of how high- and low-prio
# jobs are run. For example: 4,1 means run 4 high prio jobs, then
# 1 low prio and then start over.
counting-scheme = "4,1"
# How often a failed job should be retried until it enters failed
# state. If a job fails, it becomes "stuck" and will be retried
# after a delay.
retries = 5
# The delay until the next try is performed for a failed job. This
# delay is increased exponentially with the number of retries.
retry-delay = "1 minute"
# The queue size of log statements from a job.
log-buffer-size = 500
# If no job is left in the queue, the scheduler will wait until a
# notify is requested (using the REST interface). To also retry
# stuck jobs, it will notify itself periodically.
wakeup-period = "30 minutes"
# Configuration of text extraction
# Extracting text currently only work for image and pdf files. It
# will first runs ghostscript to create a gray image from a
# pdf. Then unpaper is run to optimize the image for the upcoming
# ocr, which will be done by tesseract. All these programs must be
# available in your PATH or the absolute path can be specified
# below.
extraction {
allowed-content-types = [ "application/pdf", "image/jpeg", "image/png" ]
# Defines what pages to process. If a PDF with 600 pages is
# submitted, it is probably not necessary to scan through all of
# them. This would take a long time and occupy resources for no
# value. The first few pages should suffice. The default is first
# 10 pages.
# If you want all pages being processed, set this number to -1.
# Note: if you change the ghostscript command below, be aware that
# this setting (if not -1) will add another parameter to the
# beginning of the command.
page-range {
begin = 10
# The ghostscript command.
ghostscript {
command {
program = "gs"
args = [ "-dNOPAUSE"
, "-dBATCH"
, "-dSAFER"
, "-sDEVICE=tiffscaled8"
, "-sOutputFile={{outfile}}"
, "{{infile}}"
timeout = "5 minutes"
working-dir = ${java.io.tmpdir}"/docspell-extraction"
# The unpaper command.
unpaper {
command {
program = "unpaper"
args = [ "{{infile}}", "{{outfile}}" ]
timeout = "5 minutes"
# The tesseract command.
tesseract {
command {
program = "tesseract"
args = ["{{file}}"
, "stdout"
, "-l"
, "{{lang}}"
timeout = "5 minutes"
@ -1,18 +1,21 @@
package docspell.joex
package docspell.joex
import docspell.common.{Ident, LenientUri}
import docspell.joex.scheduler.SchedulerConfig
import docspell.store.JdbcConfig
import docspell.store.JdbcConfig
import docspell.text.ocr.{Config => OcrConfig}
case class Config(id: String
case class Config(appId: Ident
, baseUrl: LenientUri
, bind: Config.Bind
, bind: Config.Bind
, jdbc: JdbcConfig
, jdbc: JdbcConfig
, scheduler: SchedulerConfig
, extraction: OcrConfig
object Config {
object Config {
val postgres = JdbcConfig(LenientUri.unsafe("jdbc:postgresql://localhost:5432/docspelldev"), "dev", "dev")
val h2 = JdbcConfig(LenientUri.unsafe("jdbc:h2:./target/docspelldev.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE"), "sa", "")
val default: Config =
Config("testid", Config.Bind("localhost", 7878), JdbcConfig("", "", ""))
case class Bind(address: String, port: Int)
case class Bind(address: String, port: Int)
package docspell.joex
import docspell.common.pureconfig.Implicits._
import _root_.pureconfig._
import _root_.pureconfig.generic.auto._
import docspell.joex.scheduler.CountingScheme
object ConfigFile {
import Implicits._
def loadConfig: Config =
object Implicits {
implicit val countingSchemeReader: ConfigReader[CountingScheme] =
@ -1,6 +1,23 @@
package docspell.joex
package docspell.joex
import docspell.common.Ident
import docspell.joex.scheduler.Scheduler
import docspell.store.records.RJobLog
trait JoexApp[F[_]] {
trait JoexApp[F[_]] {
def init: F[Unit]
def init: F[Unit]
def scheduler: Scheduler[F]
def findLogs(jobId: Ident): F[Vector[RJobLog]]
/** Shuts down the job executor.
* It will immediately stop taking new jobs, waiting for currently
* running jobs to complete normally (i.e. running jobs are not
* canceled). After this completed, the webserver stops and the
* main loop will exit.
def initShutdown: F[Unit]
@ -1,16 +1,56 @@
package docspell.joex
package docspell.joex
import cats.implicits._
import cats.effect._
import cats.effect._
import docspell.common.{Ident, NodeType, ProcessItemArgs}
import docspell.joex.process.ItemHandler
import docspell.joex.scheduler.{JobTask, Scheduler, SchedulerBuilder}
import docspell.store.Store
import docspell.store.ops.ONode
import docspell.store.records.RJobLog
import fs2.concurrent.SignallingRef
final class JoexAppImpl[F[_]: Sync](cfg: Config) extends JoexApp[F] {
import scala.concurrent.ExecutionContext
def init: F[Unit] =
final class JoexAppImpl[F[_]: ConcurrentEffect : ContextShift: Timer]( cfg: Config
, nodeOps: ONode[F]
, store: Store[F]
, termSignal: SignallingRef[F, Boolean]
, val scheduler: Scheduler[F]) extends JoexApp[F] {
def init: F[Unit] = {
val run = scheduler.start.compile.drain
for {
_ <- ConcurrentEffect[F].start(run)
_ <- scheduler.periodicAwake
_ <- nodeOps.register(cfg.appId, NodeType.Joex, cfg.baseUrl)
} yield ()
def findLogs(jobId: Ident): F[Vector[RJobLog]] =
def shutdown: F[Unit] =
def initShutdown: F[Unit] =
scheduler.shutdown(false) *> termSignal.set(true)
object JoexAppImpl {
object JoexAppImpl {
def create[F[_]: Sync](cfg: Config): Resource[F, JoexApp[F]] =
def create[F[_]: ConcurrentEffect : ContextShift: Timer](cfg: Config
Resource.liftF(Sync[F].pure(new JoexAppImpl(cfg)))
, termSignal: SignallingRef[F, Boolean]
, connectEC: ExecutionContext
, blocker: Blocker): Resource[F, JoexApp[F]] =
for {
store <- Store.create(cfg.jdbc, connectEC, blocker)
nodeOps <- ONode(store)
sch <- SchedulerBuilder(cfg.scheduler, blocker, store).
withTask(JobTask.json(ProcessItemArgs.taskName, ItemHandler[F](cfg.extraction), ItemHandler.onCancel[F])).
app = new JoexAppImpl(cfg, nodeOps, store, termSignal, sch)
appR <- Resource.make(app.init.map(_ => app))(_.shutdown)
} yield appR
@ -1,37 +1,48 @@
package docspell.joex
package docspell.joex
import cats.effect._
import cats.effect._
import cats.effect.concurrent.Ref
import docspell.joex.routes._
import org.http4s.server.blaze.BlazeServerBuilder
import org.http4s.server.blaze.BlazeServerBuilder
import org.http4s.implicits._
import org.http4s.implicits._
import fs2.Stream
import fs2.Stream
import fs2.concurrent.SignallingRef
import org.http4s.HttpApp
import org.http4s.server.middleware.Logger
import org.http4s.server.middleware.Logger
import org.http4s.server.Router
import org.http4s.server.Router
import scala.concurrent.ExecutionContext
object JoexServer {
object JoexServer {
def stream[F[_]: ConcurrentEffect](cfg: Config)
private case class App[F[_]](httpApp: HttpApp[F], termSig: SignallingRef[F, Boolean], exitRef: Ref[F, ExitCode])
def stream[F[_]: ConcurrentEffect : ContextShift](cfg: Config, connectEC: ExecutionContext, blocker: Blocker)
(implicit T: Timer[F]): Stream[F, Nothing] = {
(implicit T: Timer[F]): Stream[F, Nothing] = {
val app = for {
val app = for {
joexApp <- JoexAppImpl.create[F](cfg)
signal <- Resource.liftF(SignallingRef[F, Boolean](false))
_ <- Resource.liftF(joexApp.init)
exitCode <- Resource.liftF(Ref[F].of(ExitCode.Success))
joexApp <- JoexAppImpl.create[F](cfg, signal, connectEC, blocker)
httpApp = Router(
httpApp = Router(
"/api/info" -> InfoRoutes(cfg)
"/api/info" -> InfoRoutes(cfg),
"/api/v1" -> JoexRoutes(cfg, joexApp)
// With Middlewares in place
// With Middlewares in place
finalHttpApp = Logger.httpApp(false, false)(httpApp)
finalHttpApp = Logger.httpApp(false, false)(httpApp)
} yield finalHttpApp
} yield App(finalHttpApp, signal, exitCode)
Stream.resource(app).flatMap(httpApp =>
Stream.resource(app).flatMap(app =>
.bindHttp(cfg.bind.port, cfg.bind.address)
bindHttp(cfg.bind.port, cfg.bind.address).
serveWhile(app.termSig, app.exitRef)
@ -1,16 +1,23 @@
package docspell.joex
package docspell.joex
import cats.effect.{ExitCode, IO, IOApp}
import cats.effect.{Blocker, ExitCode, IO, IOApp}
import cats.implicits._
import cats.implicits._
import scala.concurrent.ExecutionContext
import scala.concurrent.ExecutionContext
import java.util.concurrent.Executors
import java.util.concurrent.Executors
import java.nio.file.{Files, Paths}
import java.nio.file.{Files, Paths}
import docspell.common.{Banner, ThreadFactories}
import org.log4s._
import org.log4s._
object Main extends IOApp {
object Main extends IOApp {
private[this] val logger = getLogger
private[this] val logger = getLogger
val blockingEc: ExecutionContext = ExecutionContext.fromExecutor(Executors.newCachedThreadPool)
val blockingEc: ExecutionContext = ExecutionContext.fromExecutor(
val blocker = Blocker.liftExecutionContext(blockingEc)
val connectEC: ExecutionContext = ExecutionContext.fromExecutorService(
Executors.newFixedThreadPool(5, ThreadFactories.ofName("docspell-joex-dbconnect")))
def run(args: List[String]) = {
def run(args: List[String]) = {
args match {
args match {
@ -32,7 +39,14 @@ object Main extends IOApp {
val cfg = Config.default
val cfg = ConfigFile.loadConfig
val banner = Banner("JOEX"
, BuildInfo.version
, BuildInfo.gitHeadCommit
, cfg.jdbc.url
, Option(System.getProperty("config.file"))
, cfg.appId, cfg.baseUrl)
JoexServer.stream[IO](cfg, connectEC, blocker).compile.drain.as(ExitCode.Success)
@ -0,0 +1,73 @@
package docspell.joex.process
import cats.implicits._
import cats.effect.Sync
import fs2.Stream
import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.store.queries.QItem
import docspell.store.records.{RAttachment, RItem}
* Task that creates the item.
object CreateItem {
def apply[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] =
findExisting[F].flatMap {
case Some(ri) => Task.pure(ri)
case None => createNew[F]
def createNew[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
val validFiles = ctx.args.meta.validFileTypes.map(_.asString).toSet
def fileMetas(itemId: Ident, now: Timestamp) = Stream.emits(ctx.args.files).
flatMap(f => ctx.store.bitpeace.get(f.fileMetaId.id).map(fm => (f, fm))).
collect({ case (f, Some(fm)) if validFiles.contains(fm.mimetype.baseType) => (f, fm) }).
evalMap({ case ((f, fm), index) =>
Ident.randomId[F].map(id => RAttachment(id, itemId, f.fileMetaId, index.toInt, now, f.name))
val item = RItem.newItem[F](ctx.args.meta.collective
, ctx.args.makeSubject
, ctx.args.meta.sourceAbbrev
, ctx.args.meta.direction.getOrElse(Direction.Incoming)
, ItemState.Premature)
for {
_ <- ctx.logger.info(s"Creating new item with ${ctx.args.files.size} attachment(s)")
time <- Duration.stopTime[F]
it <- item
n <- ctx.store.transact(RItem.insert(it))
_ <- if (n != 1) storeItemError[F](ctx) else ().pure[F]
fm <- fileMetas(it.id, it.created)
k <- fm.traverse(a => ctx.store.transact(RAttachment.insert(a)))
_ <- logDifferences(ctx, fm, k.sum)
dur <- time
_ <- ctx.logger.info(s"Creating item finished in ${dur.formatExact}")
} yield ItemData(it, fm, Vector.empty, Vector.empty)
def findExisting[F[_]: Sync]: Task[F, ProcessItemArgs, Option[ItemData]] =
Task { ctx =>
for {
cand <- ctx.store.transact(QItem.findByFileIds(ctx.args.files.map(_.fileMetaId)))
_ <- if (cand.nonEmpty) ctx.logger.warn("Found existing item with these files.") else ().pure[F]
ht <- cand.drop(1).traverse(ri => QItem.delete(ctx.store)(ri.id, ri.cid))
_ <- if (ht.sum > 0) ctx.logger.warn(s"Removed ${ht.sum} items with same attachments") else ().pure[F]
rms <- cand.headOption.traverse(ri => ctx.store.transact(RAttachment.findByItemAndCollective(ri.id, ri.cid)))
} yield cand.headOption.map(ri => ItemData(ri, rms.getOrElse(Vector.empty), Vector.empty, Vector.empty))
private def logDifferences[F[_]: Sync](ctx: Context[F, ProcessItemArgs], saved: Vector[RAttachment], saveCount: Int): F[Unit] =
ctx.logger.info("TODO log diffs")
private def storeItemError[F[_]: Sync](ctx: Context[F, ProcessItemArgs]): F[Unit] = {
val msg = "Inserting item failed. DB returned 0 update count!"
ctx.logger.error(msg) *> Sync[F].raiseError(new Exception(msg))
@ -0,0 +1,181 @@
package docspell.joex.process
import java.time.ZoneId
import cats.{Applicative, FlatMap}
import cats.implicits._
import cats.effect.Sync
import docspell.common.MetaProposal.Candidate
import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.store.records.{RAttachmentMeta, REquipment, ROrganization, RPerson}
import docspell.text.contact.Domain
/** Super simple approach to find corresponding meta data to an item
* by looking up values from NER in the users address book.
object FindProposal {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
val rmas = data.metas.map(rm =>
rm.copy(nerlabels = removeDuplicates(rm.nerlabels)))
ctx.logger.info("Starting find-proposal") *>
rmas.traverse(rm => processAttachment(rm, data.findDates(rm), ctx).map(ml => rm.copy(proposals = ml))).
flatMap(rmv => rmv.traverse(rm =>
ctx.logger.debug(s"Storing attachment proposals: ${rm.proposals}") *>
ctx.store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals))).
map(_ => data.copy(metas = rmv)))
def processAttachment[F[_]: Sync]( rm: RAttachmentMeta
, rd: Vector[NerDateLabel]
, ctx: Context[F, ProcessItemArgs]): F[MetaProposalList] = {
val finder = Finder.searchExact(ctx).next(Finder.searchFuzzy(ctx))
List(finder.find(rm.nerlabels), makeDateProposal(rd)).
def makeDateProposal[F[_]: Sync](dates: Vector[NerDateLabel]): F[MetaProposalList] = {
Timestamp.current[F].map { now =>
val latestFirst = dates.sortWith(_.date isAfter _.date)
val nowDate = now.value.atZone(ZoneId.of("GMT")).toLocalDate
val (after, before) = latestFirst.span(ndl => ndl.date.isAfter(nowDate))
val dueDates = MetaProposalList.fromSeq1(MetaProposalType.DueDate,
after.map(ndl => Candidate(IdRef(Ident.unsafe(ndl.date.toString), ndl.date.toString), Set(ndl.label))))
val itemDates = MetaProposalList.fromSeq1(MetaProposalType.DocDate,
before.map(ndl => Candidate(IdRef(Ident.unsafe(ndl.date.toString), ndl.date.toString), Set(ndl.label))))
MetaProposalList.flatten(Seq(dueDates, itemDates))
def removeDuplicates(labels: List[NerLabel]): List[NerLabel] =
labels.foldLeft((Set.empty[String], List.empty[NerLabel])) { case ((seen, result), el) =>
if (seen.contains(el.tag.name+el.label.toLowerCase)) (seen, result)
else (seen + (el.tag.name + el.label.toLowerCase), el :: result)
trait Finder[F[_]] { self =>
def find(labels: Seq[NerLabel]): F[MetaProposalList]
def contraMap(f: Seq[NerLabel] => Seq[NerLabel]): Finder[F] =
labels => self.find(f(labels))
def filterLabels(f: NerLabel => Boolean): Finder[F] =
def flatMap(f: MetaProposalList => Finder[F])(implicit F: FlatMap[F]): Finder[F] =
labels => self.find(labels).flatMap(ml => f(ml).find(labels))
def map(f: MetaProposalList => MetaProposalList)(implicit F: Applicative[F]): Finder[F] =
labels => self.find(labels).map(f)
def next(f: Finder[F])(implicit F: FlatMap[F], F3: Applicative[F]): Finder[F] =
flatMap({ ml0 =>
if (ml0.hasResultsAll) Finder.unit[F](ml0)
else f.map(ml1 => ml0.fillEmptyFrom(ml1))
def nextWhenEmpty(f: Finder[F], mt0: MetaProposalType, mts: MetaProposalType*)
(implicit F: FlatMap[F], F2: Applicative[F]): Finder[F] =
flatMap(res0 => {
if (res0.hasResults(mt0, mts: _*)) Finder.unit[F](res0)
else f.map(res1 => res0.fillEmptyFrom(res1))
object Finder {
def none[F[_]: Applicative]: Finder[F] =
_ => MetaProposalList.empty.pure[F]
def unit[F[_]: Applicative](value: MetaProposalList): Finder[F] =
_ => value.pure[F]
def searchExact[F[_]: Sync](ctx: Context[F, ProcessItemArgs]): Finder[F] =
labels => labels.toList.traverse(nl => search(nl, true, ctx)).map(MetaProposalList.flatten)
def searchFuzzy[F[_]: Sync](ctx: Context[F, ProcessItemArgs]): Finder[F] =
labels => labels.toList.traverse(nl => search(nl, false, ctx)).map(MetaProposalList.flatten)
private def search[F[_]: Sync](nt: NerLabel, exact: Boolean, ctx: Context[F, ProcessItemArgs]): F[MetaProposalList] = {
val value =
if (exact) normalizeSearchValue(nt.label)
else s"%${normalizeSearchValue(nt.label)}%"
val minLength =
if (exact) 2 else 5
if (value.length < minLength) {
ctx.logger.debug(s"Skipping too small value '$value' (original '${nt.label}').").map(_ => MetaProposalList.empty)
} else nt.tag match {
case NerTag.Organization =>
ctx.logger.debug(s"Looking for organizations: $value") *>
ctx.store.transact(ROrganization.findLike(ctx.args.meta.collective, value)).
map(MetaProposalList.from(MetaProposalType.CorrOrg, nt))
case NerTag.Person =>
val s1 = ctx.store.transact(RPerson.findLike(ctx.args.meta.collective, value, true)).
map(MetaProposalList.from(MetaProposalType.ConcPerson, nt))
val s2 = ctx.store.transact(RPerson.findLike(ctx.args.meta.collective, value, false)).
map(MetaProposalList.from(MetaProposalType.CorrPerson, nt))
ctx.logger.debug(s"Looking for persons: $value") *> (for {
ml0 <- s1
ml1 <- s2
} yield ml0 |+| ml1)
case NerTag.Location =>
ctx.logger.debug(s"NerTag 'Location' is currently not used. Ignoring value '$value'.").
map(_ => MetaProposalList.empty)
case NerTag.Misc =>
ctx.logger.debug(s"Looking for equipments: $value") *>
ctx.store.transact(REquipment.findLike(ctx.args.meta.collective, value)).
map(MetaProposalList.from(MetaProposalType.ConcEquip, nt))
case NerTag.Email =>
searchContact(nt, ContactKind.Email, value, ctx)
case NerTag.Website =>
if (!exact) {
val searchString = Domain.domainFromUri(nt.label.toLowerCase).
map(s => s"%$s%").
searchContact(nt, ContactKind.Website, searchString, ctx)
} else {
searchContact(nt, ContactKind.Website, value, ctx)
case NerTag.Date =>
// There is no database search required for this tag
private def searchContact[F[_]: Sync]( nt: NerLabel
, kind: ContactKind
, value: String
, ctx: Context[F, ProcessItemArgs]): F[MetaProposalList] = {
val orgs = ctx.store.transact(ROrganization.findLike(ctx.args.meta.collective, kind, value)).
map(MetaProposalList.from(MetaProposalType.CorrOrg, nt))
val corrP = ctx.store.transact(RPerson.findLike(ctx.args.meta.collective, kind, value, false)).
map(MetaProposalList.from(MetaProposalType.CorrPerson, nt))
val concP = ctx.store.transact(RPerson.findLike(ctx.args.meta.collective, kind, value, true)).
map(MetaProposalList.from(MetaProposalType.CorrPerson, nt))
ctx.logger.debug(s"Looking with $kind: $value") *>
List(orgs, corrP, concP).traverse(identity).map(MetaProposalList.flatten)
// The backslash *must* be stripped from search strings.
private [this] val invalidSearch =
private def normalizeSearchValue(str: String): String =
str.toLowerCase.filter(c => !invalidSearch.contains(c))
@ -0,0 +1,27 @@
package docspell.joex.process
import docspell.common.{Ident, NerDateLabel, NerLabel}
import docspell.joex.process.ItemData.AttachmentDates
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem}
case class ItemData( item: RItem
, attachments: Vector[RAttachment]
, metas: Vector[RAttachmentMeta]
, dateLabels: Vector[AttachmentDates]) {
def findMeta(attachId: Ident): Option[RAttachmentMeta] =
metas.find(_.id == attachId)
def findDates(rm: RAttachmentMeta): Vector[NerDateLabel] =
dateLabels.find(m => m.rm.id == rm.id).map(_.dates).getOrElse(Vector.empty)
object ItemData {
case class AttachmentDates(rm: RAttachmentMeta, dates: Vector[NerDateLabel]) {
def toNerLabel: Vector[NerLabel] =
dates.map(dl => dl.label.copy(label = dl.date.toString))
@ -0,0 +1,62 @@
package docspell.joex.process
import cats.implicits._
import cats.effect.{ContextShift, Sync}
import docspell.common.{ItemState, ProcessItemArgs}
import docspell.joex.scheduler.{Context, Task}
import docspell.store.queries.QItem
import docspell.store.records.{RItem, RJob}
import docspell.text.ocr.{Config => OcrConfig}
object ItemHandler {
def onCancel[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] =
logWarn("Now cancelling. Deleting potentially created data.").
flatMap(_ => deleteByFileIds)
def apply[F[_]: Sync: ContextShift](cfg: OcrConfig): Task[F, ProcessItemArgs, Unit] =
map(_ => ())
def itemStateTask[F[_]: Sync, A](state: ItemState)(data: ItemData): Task[F, A, ItemData] =
Task { ctx =>
ctx.store.transact(RItem.updateState(data.item.id, state)).map(_ => data)
def isLastRetry[F[_]: Sync, A](ctx: Context[F, A]): F[Boolean] =
for {
current <- ctx.store.transact(RJob.getRetries(ctx.jobId))
last = ctx.config.retries == current.getOrElse(0)
} yield last
def safeProcess[F[_]: Sync: ContextShift](cfg: OcrConfig)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task(isLastRetry[F, ProcessItemArgs] _).flatMap {
case true =>
case Right(d) =>
case Left(ex) =>
logWarn[F]("Processing failed on last retry. Creating item but without proposals.").
flatMap(_ => itemStateTask(ItemState.Created)(data)).
andThen(_ => Sync[F].raiseError(ex))
case false =>
def deleteByFileIds[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] =
Task { ctx =>
for {
items <- ctx.store.transact(QItem.findByFileIds(ctx.args.files.map(_.fileMetaId)))
_ <- ctx.logger.info(s"Deleting items ${items.map(_.id.id)}")
_ <- items.traverse(i => QItem.delete(ctx.store)(i.id, ctx.args.meta.collective))
} yield ()
private def logWarn[F[_]](msg: => String): Task[F, ProcessItemArgs, Unit] =
@ -0,0 +1,71 @@
package docspell.joex.process
import cats.implicits._
import cats.effect.Sync
import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.store.records.RItem
object LinkProposal {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
val proposals = MetaProposalList.flatten(data.metas.map(_.proposals))
ctx.logger.info(s"Starting linking proposals") *>
traverse(applyValue(data, proposals, ctx)).
map(result => ctx.logger.info(s"Results from proposal processing: $result")).
map(_ => data)
def applyValue[F[_]: Sync](data: ItemData, proposalList: MetaProposalList, ctx: Context[F, ProcessItemArgs])(mpt: MetaProposalType): F[Result] = {
proposalList.find(mpt) match {
case None =>
case Some(a) if a.isSingleValue =>
ctx.logger.info(s"Found one candidate for ${a.proposalType}") *>
setItemMeta(data.item.id, ctx, a.proposalType, a.values.head.ref.id).
map(_ => Result.single(mpt))
case Some(a) =>
ctx.logger.info(s"Found many (${a.size}, ${a.values.map(_.ref.id.id)}) candidates for ${a.proposalType}. Setting first.") *>
setItemMeta(data.item.id, ctx, a.proposalType, a.values.head.ref.id).
map(_ => Result.multiple(mpt))
def setItemMeta[F[_]: Sync](itemId: Ident, ctx: Context[F, ProcessItemArgs], mpt: MetaProposalType, value: Ident): F[Int] =
mpt match {
case MetaProposalType.CorrOrg =>
ctx.logger.debug(s"Updating item organization with: ${value.id}") *>
ctx.store.transact(RItem.updateCorrOrg(itemId, ctx.args.meta.collective, Some(value)))
case MetaProposalType.ConcPerson =>
ctx.logger.debug(s"Updating item concerning person with: $value") *>
ctx.store.transact(RItem.updateConcPerson(itemId, ctx.args.meta.collective, Some(value)))
case MetaProposalType.CorrPerson =>
ctx.logger.debug(s"Updating item correspondent person with: $value") *>
ctx.store.transact(RItem.updateCorrPerson(itemId, ctx.args.meta.collective, Some(value)))
case MetaProposalType.ConcEquip =>
ctx.logger.debug(s"Updating item concerning equipment with: $value") *>
ctx.store.transact(RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value)))
case MetaProposalType.DocDate =>
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
case MetaProposalType.DueDate =>
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
sealed trait Result {
def proposalType: MetaProposalType
object Result {
case class NoneFound(proposalType: MetaProposalType) extends Result
case class SingleResult(proposalType: MetaProposalType) extends Result
case class MultipleResult(proposalType: MetaProposalType) extends Result
def noneFound(proposalType: MetaProposalType): Result = NoneFound(proposalType)
def single(proposalType: MetaProposalType): Result = SingleResult(proposalType)
def multiple(proposalType: MetaProposalType): Result = MultipleResult(proposalType)
@ -0,0 +1,19 @@
package docspell.joex.process
import cats.effect.{ContextShift, Sync}
import docspell.common.ProcessItemArgs
import docspell.joex.scheduler.Task
import docspell.text.ocr.{Config => OcrConfig}
object ProcessItem {
def apply[F[_]: Sync: ContextShift](cfg: OcrConfig)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
TextExtraction(cfg, item).
@ -0,0 +1,39 @@
package docspell.joex.process
import cats.implicits._
import cats.effect.Sync
import docspell.common.ProcessItemArgs
import docspell.common.syntax.all._
import docspell.joex.scheduler.Task
import org.log4s._
object TestTasks {
private [this] val logger = getLogger
def success[F[_]]: Task[F, ProcessItemArgs, Unit] =
Task { ctx =>
ctx.logger.info(s"Running task now: ${ctx.args}")
def failing[F[_]: Sync]: Task[F, ProcessItemArgs, Unit] =
Task { ctx =>
ctx.logger.info(s"Failing the task run :(").map(_ =>
sys.error("Oh, cannot extract gold from this document")
def longRunning[F[_]: Sync]: Task[F, ProcessItemArgs, Unit] =
Task { ctx =>
logger.fwarn(s"${Thread.currentThread()} From executing long running task") >>
ctx.logger.info(s"${Thread.currentThread()} Running task now: ${ctx.args}") >>
sleep(2400) >>
ctx.logger.debug("doing things") >>
sleep(2400) >>
ctx.logger.debug("doing more things") >>
sleep(2400) >>
ctx.logger.info("doing more things")
private def sleep[F[_]:Sync](ms: Long): F[Unit] =
@ -0,0 +1,49 @@
package docspell.joex.process
import cats.implicits._
import cats.effect.Sync
import docspell.common.{Duration, Language, NerLabel, ProcessItemArgs}
import docspell.joex.process.ItemData.AttachmentDates
import docspell.joex.scheduler.Task
import docspell.store.records.RAttachmentMeta
import docspell.text.contact.Contact
import docspell.text.date.DateFind
import docspell.text.nlp.StanfordNerClassifier
object TextAnalysis {
def apply[F[_]: Sync](item: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
for {
_ <- ctx.logger.info("Starting text analysis")
s <- Duration.stopTime[F]
t <- item.metas.toList.traverse(annotateAttachment[F](ctx.args.meta.language))
_ <- ctx.logger.debug(s"Storing tags: ${t.map(_._1.copy(content = None))}")
_ <- t.traverse(m => ctx.store.transact(RAttachmentMeta.updateLabels(m._1.id, m._1.nerlabels)))
e <- s
_ <- ctx.logger.info(s"Text-Analysis finished in ${e.formatExact}")
v = t.toVector
} yield item.copy(metas = v.map(_._1), dateLabels = v.map(_._2))
def annotateAttachment[F[_]: Sync](lang: Language)(rm: RAttachmentMeta): F[(RAttachmentMeta, AttachmentDates)] =
for {
list0 <- stanfordNer[F](lang, rm)
list1 <- contactNer[F](rm)
dates <- dateNer[F](rm, lang)
} yield (rm.copy(nerlabels = (list0 ++ list1 ++ dates.toNerLabel).toList), dates)
def stanfordNer[F[_]: Sync](lang: Language, rm: RAttachmentMeta): F[Vector[NerLabel]] = Sync[F].delay {
def contactNer[F[_]: Sync](rm: RAttachmentMeta): F[Vector[NerLabel]] = Sync[F].delay {
def dateNer[F[_]: Sync](rm: RAttachmentMeta, lang: Language): F[AttachmentDates] = Sync[F].delay {
AttachmentDates(rm, rm.content.map(txt => DateFind.findDates(txt, lang).toVector).getOrElse(Vector.empty))
@ -0,0 +1,45 @@
package docspell.joex.process
import bitpeace.RangeDef
import cats.implicits._
import cats.effect.{Blocker, ContextShift, Sync}
import docspell.common.{Duration, Language, ProcessItemArgs}
import docspell.joex.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.records.{RAttachment, RAttachmentMeta}
import docspell.text.ocr.{TextExtract, Config => OcrConfig}
object TextExtraction {
def apply[F[_]: Sync : ContextShift](cfg: OcrConfig, item: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
for {
_ <- ctx.logger.info("Starting text extraction")
start <- Duration.stopTime[F]
txt <- item.attachments.traverse(extractTextToMeta(ctx, cfg, ctx.args.meta.language))
_ <- ctx.logger.debug("Storing extracted texts")
_ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm)))
dur <- start
_ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}")
} yield item.copy(metas = txt)
def extractTextToMeta[F[_]: Sync : ContextShift](ctx: Context[F, _], cfg: OcrConfig, lang: Language)(ra: RAttachment): F[RAttachmentMeta] =
for {
_ <- ctx.logger.debug(s"Extracting text for attachment ${ra.name}")
dst <- Duration.stopTime[F]
txt <- extractText(cfg, lang, ctx.store, ctx.blocker)(ra)
meta = RAttachmentMeta.empty(ra.id).copy(content = txt.map(_.trim).filter(_.nonEmpty))
est <- dst
_ <- ctx.logger.debug(s"Extracting text for attachment ${ra.name} finished in ${est.formatExact}")
} yield meta
def extractText[F[_]: Sync : ContextShift](ocrConfig: OcrConfig, lang: Language, store: Store[F], blocker: Blocker)(ra: RAttachment): F[Option[String]] = {
val data = store.bitpeace.get(ra.fileId.id).
TextExtract.extract(data, blocker, lang.iso3, ocrConfig).
@ -1,14 +1,12 @@
package docspell.joex
package docspell.joex.routes
import cats.effect._
import cats.effect.Sync
import org.http4s._
import docspell.joex.{BuildInfo, Config}
import docspell.joexapi.model.VersionInfo
import org.http4s.HttpRoutes
import org.http4s.HttpRoutes
import org.http4s.dsl.Http4sDsl
import org.http4s.dsl.Http4sDsl
import org.http4s.circe.CirceEntityEncoder._
import org.http4s.circe.CirceEntityEncoder._
import docspell.joexapi.model._
import docspell.joex.BuildInfo
object InfoRoutes {
object InfoRoutes {
def apply[F[_]: Sync](cfg: Config): HttpRoutes[F] = {
def apply[F[_]: Sync](cfg: Config): HttpRoutes[F] = {
@ -0,0 +1,59 @@
package docspell.joex.routes
import cats.implicits._
import cats.effect._
import docspell.common.{Duration, Ident, Timestamp}
import docspell.joex.{Config, JoexApp}
import docspell.joexapi.model._
import docspell.store.records.{RJob, RJobLog}
import org.http4s.HttpRoutes
import org.http4s.dsl.Http4sDsl
import org.http4s.circe.CirceEntityEncoder._
object JoexRoutes {
def apply[F[_]: ConcurrentEffect: Timer](cfg: Config, app: JoexApp[F]): HttpRoutes[F] = {
val dsl = new Http4sDsl[F]{}
import dsl._
HttpRoutes.of[F] {
case POST -> Root / "notify" =>
for {
_ <- app.scheduler.notifyChange
resp <- Ok(BasicResult(true, "Scheduler notified."))
} yield resp
case GET -> Root / "running" =>
for {
jobs <- app.scheduler.getRunning
jj = jobs.map(mkJob)
resp <- Ok(JobList(jj.toList))
} yield resp
case POST -> Root / "shutdownAndExit" =>
for {
_ <- ConcurrentEffect[F].start(Timer[F].sleep(Duration.seconds(1).toScala) *> app.initShutdown)
resp <- Ok(BasicResult(true, "Shutdown initiated."))
} yield resp
case GET -> Root / "job" / Ident(id) =>
for {
optJob <- app.scheduler.getRunning.map(_.find(_.id == id))
optLog <- optJob.traverse(j => app.findLogs(j.id))
jAndL = for { job <- optJob; log <- optLog } yield mkJobLog(job, log)
resp <- jAndL.map(Ok(_)).getOrElse(NotFound(BasicResult(false, "Not found")))
} yield resp
case POST -> Root / "job" / Ident(id) / "cancel" =>
for {
flag <- app.scheduler.requestCancel(id)
resp <- Ok(BasicResult(flag, if (flag) "Cancel request submitted" else "Job not found"))
} yield resp
def mkJob(j: RJob): Job =
Job(j.id, j.subject, j.submitted, j.priority, j.retries, j.progress, j.started.getOrElse(Timestamp.Epoch))
def mkJobLog(j: RJob, jl: Vector[RJobLog]): JobAndLog =
JobAndLog(mkJob(j), jl.map(r => JobLogEvent(r.created, r.level, r.message)).toList)
@ -0,0 +1,69 @@
package docspell.joex.scheduler
import cats.Functor
import cats.effect.{Blocker, Concurrent}
import cats.implicits._
import docspell.common.Ident
import docspell.store.Store
import docspell.store.records.RJob
import docspell.common.syntax.all._
import org.log4s.{Logger => _, _}
trait Context[F[_], A] { self =>
def jobId: Ident
def args: A
def config: SchedulerConfig
def logger: Logger[F]
def setProgress(percent: Int): F[Unit]
def store: Store[F]
def blocker: Blocker
def map[C](f: A => C)(implicit F: Functor[F]): Context[F, C] =
new Context.ContextImpl[F, C](f(args), logger, store, blocker, config, jobId)
object Context {
private [this] val log = getLogger
def create[F[_]: Functor, A]( job: RJob
, arg: A
, config: SchedulerConfig
, log: Logger[F]
, store: Store[F]
, blocker: Blocker): Context[F, A] =
new ContextImpl(arg, log, store, blocker, config, job.id)
def apply[F[_]: Concurrent, A]( job: RJob
, arg: A
, config: SchedulerConfig
, logSink: LogSink[F]
, blocker: Blocker
, store: Store[F]): F[Context[F, A]] =
for {
_ <- log.ftrace("Creating logger for task run")
logger <- Logger(job.id, job.info, config.logBufferSize, logSink)
_ <- log.ftrace("Logger created, instantiating context")
ctx = create[F, A](job, arg, config, logger, store, blocker)
} yield ctx
private final class ContextImpl[F[_]: Functor, A]( val args: A
, val logger: Logger[F]
, val store: Store[F]
, val blocker: Blocker
, val config: SchedulerConfig
, val jobId: Ident)
extends Context[F,A] {
def setProgress(percent: Int): F[Unit] = {
val pval = math.min(100, math.max(0, percent))
store.transact(RJob.setProgress(jobId, pval)).map(_ => ())
@ -0,0 +1,40 @@
package docspell.joex.scheduler
import cats.implicits._
import docspell.common.Priority
/** A counting scheme to indicate a ratio between scheduling high and
* low priority jobs.
* For example high=4, low=1 means: ”schedule 4 high priority jobs
* and then 1 low priority job“.
case class CountingScheme(high: Int, low: Int, counter: Int = 0) {
def nextPriority: (CountingScheme, Priority) = {
if (counter <= 0) (increment, Priority.High)
else {
val rest = counter % (high + low)
if (rest < high) (increment, Priority.High)
else (increment, Priority.Low)
def increment: CountingScheme =
copy(counter = counter + 1)
object CountingScheme {
def writeString(cs: CountingScheme): String =
def readString(str: String): Either[String, CountingScheme] =
str.split(',') match {
case Array(h, l) =>
Either.catchNonFatal(CountingScheme(h.toInt, l.toInt)).
case _ =>
Left(s"Invalid counting scheme: $str")
@ -0,0 +1,33 @@
package docspell.joex.scheduler
import cats.implicits._
import cats.effect.Sync
import docspell.common.Ident
import docspell.common.syntax.all._
import io.circe.Decoder
* Binds a Task to a name. This is required to lookup the code based
* on the taskName in the RJob data and to execute it given the
* arguments that have to be read from a string.
* Since the scheduler only has a string for the task argument, this
* only works for Task impls that accept a string. There is a
* convenience constructor that uses circe to decode json into some
* type A.
case class JobTask[F[_]](name: Ident, task: Task[F, String, Unit], onCancel: Task[F, String, Unit])
object JobTask {
def json[F[_]: Sync, A](name: Ident, task: Task[F, A, Unit], onCancel: Task[F, A, Unit])
(implicit D: Decoder[A]): JobTask[F] = {
val convert: String => F[A] =
str => str.parseJsonAs[A] match {
case Right(a) => a.pure[F]
case Left(ex) => Sync[F].raiseError(new Exception(s"Cannot parse task arguments: $str", ex))
JobTask(name, task.contramap(convert), onCancel.contramap(convert))
@ -0,0 +1,26 @@
package docspell.joex.scheduler
import docspell.common.Ident
* This is a mapping from some identifier to a task. This is used by
* the scheduler to lookup an implementation using the taskName field
* of the RJob database record.
final class JobTaskRegistry[F[_]](tasks: Map[Ident, JobTask[F]]) {
def withTask(task: JobTask[F]): JobTaskRegistry[F] =
JobTaskRegistry(tasks.updated(task.name, task))
def find(taskName: Ident): Option[JobTask[F]] =
object JobTaskRegistry {
def apply[F[_]](map: Map[Ident, JobTask[F]]): JobTaskRegistry[F] =
new JobTaskRegistry[F](map)
def empty[F[_]]: JobTaskRegistry[F] = apply(Map.empty)
@ -0,0 +1,25 @@
package docspell.joex.scheduler
import cats.implicits._
import docspell.common._
import cats.effect.Sync
case class LogEvent( jobId: Ident
, jobInfo: String
, time: Timestamp
, level: LogLevel
, msg: String
, ex: Option[Throwable] = None) {
def logLine: String =
s">>> ${time.asString} $level $jobInfo: $msg"
object LogEvent {
def create[F[_]: Sync](jobId: Ident, jobInfo: String, level: LogLevel, msg: String): F[LogEvent] =
Timestamp.current[F].map(now => LogEvent(jobId, jobInfo, now, level, msg))
@ -0,0 +1,59 @@
package docspell.joex.scheduler
import cats.implicits._
import cats.effect.{Concurrent, Sync}
import fs2.{Pipe, Stream}
import org.log4s.{LogLevel => _, _}
import docspell.common._
import docspell.common.syntax.all._
import docspell.store.Store
import docspell.store.records.RJobLog
trait LogSink[F[_]] {
def receive: Pipe[F, LogEvent, Unit]
object LogSink {
private[this] val logger = getLogger
def apply[F[_]](sink: Pipe[F, LogEvent, Unit]): LogSink[F] =
new LogSink[F] {
val receive = sink
def logInternal[F[_]: Sync](e: LogEvent): F[Unit] =
e.level match {
case LogLevel.Info =>
case LogLevel.Debug =>
case LogLevel.Warn =>
case LogLevel.Error =>
e.ex match {
case Some(exc) =>
case None =>
def printer[F[_]: Sync]: LogSink[F] =
LogSink(_.evalMap(e => logInternal(e)))
def db[F[_]: Sync](store: Store[F]): LogSink[F] =
LogSink(_.evalMap(ev => for {
id <- Ident.randomId[F]
joblog = RJobLog(id, ev.jobId, ev.level, ev.time, ev.msg + ev.ex.map(th => ": "+ th.getMessage).getOrElse(""))
_ <- logInternal(ev)
_ <- store.transact(RJobLog.insert(joblog))
} yield ()))
def dbAndLog[F[_]: Concurrent](store: Store[F]): LogSink[F] = {
val s: Stream[F, Pipe[F, LogEvent, Unit]] =
Stream.emits(Seq(printer[F].receive, db[F](store).receive))
@ -0,0 +1,49 @@
package docspell.joex.scheduler
import cats.implicits._
import cats.effect.{Concurrent, Sync}
import docspell.common._
import fs2.concurrent.Queue
trait Logger[F[_]] {
def trace(msg: => String): F[Unit]
def debug(msg: => String): F[Unit]
def info(msg: => String): F[Unit]
def warn(msg: => String): F[Unit]
def error(ex: Throwable)(msg: => String): F[Unit]
def error(msg: => String): F[Unit]
object Logger {
def create[F[_]: Sync](jobId: Ident, jobInfo: String, q: Queue[F, LogEvent]): Logger[F] =
new Logger[F] {
def trace(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.enqueue1)
def debug(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Debug, msg).flatMap(q.enqueue1)
def info(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Info, msg).flatMap(q.enqueue1)
def warn(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Warn, msg).flatMap(q.enqueue1)
def error(ex: Throwable)(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Error, msg).map(le => le.copy(ex = Some(ex))).flatMap(q.enqueue1)
def error(msg: => String): F[Unit] =
LogEvent.create[F](jobId, jobInfo, LogLevel.Error, msg).flatMap(q.enqueue1)
def apply[F[_]: Concurrent](jobId: Ident, jobInfo: String, bufferSize: Int, sink: LogSink[F]): F[Logger[F]] =
for {
q <- Queue.circularBuffer[F, LogEvent](bufferSize)
log = create(jobId, jobInfo, q)
fib <- Concurrent[F].start(q.dequeue.through(sink.receive).compile.drain)
} yield log
@ -0,0 +1,33 @@
package docspell.joex.scheduler
import cats.effect.{Fiber, Timer}
import fs2.Stream
import docspell.common.Ident
import docspell.store.records.RJob
trait Scheduler[F[_]] {
def config: SchedulerConfig
def getRunning: F[Vector[RJob]]
def requestCancel(jobId: Ident): F[Boolean]
def notifyChange: F[Unit]
def start: Stream[F, Nothing]
/** Requests to shutdown the scheduler.
* The scheduler will not take any new jobs from the queue. If
* there are still running jobs, it waits for them to complete.
* when the cancelAll flag is set to true, it cancels all running
* jobs.
* The returned F[Unit] can be evaluated to wait for all that to
* complete.
def shutdown(cancelAll: Boolean): F[Unit]
def periodicAwake(implicit T: Timer[F]): F[Fiber[F, Unit]]
@ -0,0 +1,66 @@
package docspell.joex.scheduler
import cats.implicits._
import cats.effect.concurrent.Semaphore
import cats.effect.{Blocker, ConcurrentEffect, ContextShift, Resource}
import docspell.store.Store
import docspell.store.queue.JobQueue
import fs2.concurrent.SignallingRef
case class SchedulerBuilder[F[_]: ConcurrentEffect : ContextShift](
config: SchedulerConfig
, tasks: JobTaskRegistry[F]
, store: Store[F]
, blocker: Blocker
, queue: Resource[F, JobQueue[F]]
, logSink: LogSink[F]) {
def withConfig(cfg: SchedulerConfig): SchedulerBuilder[F] =
copy(config = cfg)
def withTaskRegistry(reg: JobTaskRegistry[F]): SchedulerBuilder[F] =
copy(tasks = reg)
def withTask[A](task: JobTask[F]): SchedulerBuilder[F] =
def withQueue(queue: Resource[F, JobQueue[F]]): SchedulerBuilder[F] =
SchedulerBuilder[F](config, tasks, store, blocker, queue, logSink)
def withBlocker(blocker: Blocker): SchedulerBuilder[F] =
copy(blocker = blocker)
def withLogSink(sink: LogSink[F]): SchedulerBuilder[F] =
copy(logSink = sink)
def serve: Resource[F, Scheduler[F]] =
resource.evalMap(sch => ConcurrentEffect[F].start(sch.start.compile.drain).map(_ => sch))
def resource: Resource[F, Scheduler[F]] = {
val scheduler = for {
jq <- queue
waiter <- Resource.liftF(SignallingRef(true))
state <- Resource.liftF(SignallingRef(SchedulerImpl.emptyState[F]))
perms <- Resource.liftF(Semaphore(config.poolSize.toLong))
} yield new SchedulerImpl[F](config, blocker, jq, tasks, store, logSink, state, waiter, perms)
map(s => s: Scheduler[F])
object SchedulerBuilder {
def apply[F[_]: ConcurrentEffect : ContextShift]( config: SchedulerConfig
, blocker: Blocker
, store: Store[F]): SchedulerBuilder[F] =
new SchedulerBuilder[F](config
, JobTaskRegistry.empty[F]
, store
, blocker
, JobQueue(store)
, LogSink.db[F](store))
@ -0,0 +1,25 @@
package docspell.joex.scheduler
import docspell.common._
case class SchedulerConfig( name: Ident
, poolSize: Int
, countingScheme: CountingScheme
, retries: Int
, retryDelay: Duration
, logBufferSize: Int
, wakeupPeriod: Duration
object SchedulerConfig {
val default = SchedulerConfig(
name = Ident.unsafe("default-scheduler")
, poolSize = 2 // math.max(2, Runtime.getRuntime.availableProcessors / 2)
, countingScheme = CountingScheme(2, 1)
, retries = 5
, retryDelay = Duration.seconds(30)
, logBufferSize = 500
, wakeupPeriod = Duration.minutes(10)
@ -0,0 +1,227 @@
package docspell.joex.scheduler
import fs2.Stream
import cats.implicits._
import cats.effect.concurrent.Semaphore
import docspell.common._
import docspell.common.syntax.all._
import docspell.store.queue.JobQueue
import docspell.store.records.RJob
import fs2.concurrent.SignallingRef
import cats.effect._
import org.log4s._
import SchedulerImpl._
import docspell.store.Store
import docspell.store.queries.QJob
final class SchedulerImpl[F[_]: ConcurrentEffect : ContextShift](val config: SchedulerConfig
, blocker: Blocker
, queue: JobQueue[F]
, tasks: JobTaskRegistry[F]
, store: Store[F]
, logSink: LogSink[F]
, state: SignallingRef[F, State[F]]
, waiter: SignallingRef[F, Boolean]
, permits: Semaphore[F]) extends Scheduler[F] {
private [this] val logger = getLogger
* On startup, get all jobs in state running from this scheduler
* and put them into waiting state, so they get picked up again.
def init: F[Unit] =
QJob.runningToWaiting(config.name, store)
def periodicAwake(implicit T: Timer[F]): F[Fiber[F, Unit]] =
evalMap(_ => logger.fdebug("Periodic awake reached") *> notifyChange).compile.drain)
def getRunning: F[Vector[RJob]] =
state.get.flatMap(s => QJob.findAll(s.getRunning, store))
def requestCancel(jobId: Ident): F[Boolean] =
state.get.flatMap(_.cancelRequest(jobId) match {
case Some(ct) => ct.map(_ => true)
case None => logger.fwarn(s"Job ${jobId.id} not found, cannot cancel.").map(_ => false)
def notifyChange: F[Unit] =
waiter.update(b => !b)
def shutdown(cancelAll: Boolean): F[Unit] = {
val doCancel =
map(_ => ())
val runShutdown =
state.modify(_.requestShutdown) *> (if (cancelAll) doCancel else ().pure[F])
val wait = Stream.eval(runShutdown).
evalMap(_ => logger.finfo("Scheduler is shutting down now.")).
flatMap(_ => Stream.eval(state.get) ++ Stream.suspend(state.discrete.takeWhile(_.getRunning.nonEmpty))).
flatMap(state => {
if (state.getRunning.isEmpty) Stream.eval(logger.finfo("No jobs running."))
else Stream.eval(logger.finfo(s"Waiting for ${state.getRunning.size} jobs to finish.")) ++
(wait.drain ++ Stream.emit(())).compile.lastOrError
def start: Stream[F, Nothing] =
logger.sinfo("Starting scheduler") ++
def mainLoop: Stream[F, Nothing] = {
val body: F[Boolean] =
for {
_ <- permits.available.flatMap(a => logger.fdebug(s"Try to acquire permit ($a free)"))
_ <- permits.acquire
_ <- logger.fdebug("New permit acquired")
down <- state.get.map(_.shutdownRequest)
rjob <- if (down) logger.finfo("") *> permits.release *> (None: Option[RJob]).pure[F]
else queue.nextJob(group => state.modify(_.nextPrio(group, config.countingScheme)), config.name, config.retryDelay)
_ <- logger.fdebug(s"Next job found: ${rjob.map(_.info)}")
_ <- rjob.map(execute).getOrElse(permits.release)
} yield rjob.isDefined
evalTap(if (_) logger.finfo[F]("Stopping main loop due to shutdown request.") else ().pure[F]).
flatMap(if (_) Stream.empty else Stream.eval(body)).
case true =>
case false =>
logger.sdebug(s"Waiting for notify") ++
waiter.discrete.take(2).drain ++
logger.sdebug(s"Notify signal, going into main loop") ++
def execute(job: RJob): F[Unit] = {
val task = for {
jobtask <- tasks.find(job.task).toRight(s"This executor cannot run tasks with name: ${job.task}")
} yield jobtask
task match {
case Left(err) =>
logger.ferror(s"Unable to start a task for job ${job.info}: $err")
case Right(t) =>
for {
_ <- logger.fdebug(s"Creating context for job ${job.info} to run $t")
ctx <- Context[F, String](job, job.args, config, logSink, blocker, store)
jot = wrapTask(job, t.task, ctx)
tok <- forkRun(job, jot.run(ctx), t.onCancel.run(ctx), ctx)
_ <- state.modify(_.addRunning(job, tok))
} yield ()
def onFinish(job: RJob, finalState: JobState): F[Unit] =
for {
_ <- logger.fdebug(s"Job ${job.info} done $finalState. Releasing resources.")
_ <- permits.release *> permits.available.flatMap(a => logger.fdebug(s"Permit released ($a free)"))
_ <- state.modify(_.removeRunning(job))
_ <- QJob.setFinalState(job.id, finalState, store)
} yield ()
def onStart(job: RJob): F[Unit] =
QJob.setRunning(job.id, config.name, store) //also increments retries if current state=stuck
def wrapTask(job: RJob, task: Task[F, String, Unit], ctx: Context[F, String]): Task[F, String, Unit] = {
task.mapF(fa => onStart(job) *> logger.fdebug("Starting task now") *> blocker.blockOn(fa)).
case Right(()) =>
logger.info(s"Job execution successful: ${job.info}")
ctx.logger.info("Job execution successful") *>
(JobState.Success: JobState).pure[F]
case Left(ex) =>
state.get.map(_.wasCancelled(job)).flatMap {
case true =>
logger.error(ex)(s"Job ${job.info} execution failed (cancel = true)")
ctx.logger.error(ex)("Job execution failed (cancel = true)") *>
(JobState.Cancelled: JobState).pure[F]
case false =>
QJob.exceedsRetries(job.id, config.retries, store).flatMap {
case true =>
logger.error(ex)(s"Job ${job.info} execution failed. Retries exceeded.")
ctx.logger.error(ex)(s"Job ${job.info} execution failed. Retries exceeded.").
map(_ => JobState.Failed: JobState)
case false =>
logger.error(ex)(s"Job ${job.info} execution failed. Retrying later.")
ctx.logger.error(ex)(s"Job ${job.info} execution failed. Retrying later.").
map(_ => JobState.Stuck: JobState)
mapF(_.attempt.flatMap {
case Right(jstate) =>
onFinish(job, jstate)
case Left(ex) =>
logger.error(ex)(s"Error happened during post-processing of ${job.info}!")
// we don't know the real outcome here…
// since tasks should be idempotent, set it to stuck. if above has failed, this might fail anyways
onFinish(job, JobState.Stuck)
def forkRun(job: RJob, code: F[Unit], onCancel: F[Unit], ctx: Context[F, String]): F[F[Unit]] = {
val bfa = blocker.blockOn(code)
logger.fdebug(s"Forking job ${job.info}") *>
map(fiber =>
logger.fdebug(s"Cancelling job ${job.info}") *>
fiber.cancel *>
case Right(_) => ()
case Left(ex) =>
logger.error(ex)(s"Task's cancelling code failed. Job ${job.info}.")
}) *>
state.modify(_.markCancelled(job)) *>
onFinish(job, JobState.Cancelled) *>
ctx.logger.warn("Job has been cancelled.") *>
logger.fdebug(s"Job ${job.info} has been cancelled."))
object SchedulerImpl {
def emptyState[F[_]]: State[F] =
State(Map.empty, Set.empty, Map.empty, false)
case class State[F[_]]( counters: Map[Ident, CountingScheme]
, cancelled: Set[Ident]
, cancelTokens: Map[Ident, CancelToken[F]]
, shutdownRequest: Boolean) {
def nextPrio(group: Ident, initial: CountingScheme): (State[F], Priority) = {
val (cs, prio) = counters.getOrElse(group, initial).nextPriority
(copy(counters = counters.updated(group, cs)), prio)
def addRunning(job: RJob, token: CancelToken[F]): (State[F], Unit) =
(State(counters, cancelled, cancelTokens.updated(job.id, token), shutdownRequest), ())
def removeRunning(job: RJob): (State[F], Unit) =
(copy(cancelled = cancelled - job.id, cancelTokens = cancelTokens.removed(job.id)), ())
def markCancelled(job: RJob): (State[F], Unit) =
(copy(cancelled = cancelled + job.id), ())
def wasCancelled(job: RJob): Boolean =
def cancelRequest(id: Ident): Option[F[Unit]] =
def getRunning: Seq[Ident] =
def requestShutdown: (State[F], Unit) =
(copy(shutdownRequest = true), ())
@ -0,0 +1,55 @@
package docspell.joex.scheduler
import cats.implicits._
import cats.{Applicative, ApplicativeError, FlatMap, Functor}
import cats.data.Kleisli
import cats.effect.Sync
* The code that is executed by the scheduler
trait Task[F[_], A, B] {
def run(ctx: Context[F, A]): F[B]
def map[C](f: B => C)(implicit F: Functor[F]): Task[F, A, C] =
def flatMap[C](f: B => Task[F, A, C])(implicit F: FlatMap[F]): Task[F, A, C] =
Task(Task.toKleisli(this).flatMap(a => Task.toKleisli(f(a))))
def andThen[C](f: B => F[C])(implicit F: FlatMap[F]): Task[F, A, C] =
def mapF[C](f: F[B] => F[C]): Task[F, A, C] =
def attempt(implicit F: ApplicativeError[F,Throwable]): Task[F, A, Either[Throwable, B]] =
def contramap[C](f: C => F[A])(implicit F: FlatMap[F]): Task[F, C, B] = {
ctxc: Context[F, C] => f(ctxc.args).flatMap(a => run(ctxc.map(_ => a)))
object Task {
def pure[F[_]: Applicative, A, B](b: B): Task[F, A, B] =
Task(_ => b.pure[F])
def of[F[_], A, B](b: F[B]): Task[F, A, B] =
Task(_ => b)
def apply[F[_], A, B](f: Context[F, A] => F[B]): Task[F, A, B] =
(ctx: Context[F, A]) => f(ctx)
def apply[F[_], A, B](k: Kleisli[F, Context[F, A], B]): Task[F, A, B] =
c => k.run(c)
def toKleisli[F[_], A, B](t: Task[F, A, B]): Kleisli[F, Context[F, A], B] =
def setProgress[F[_]: Sync, A, B](n: Int)(data: B): Task[F, A, B] =
Task(_.setProgress(n).map(_ => data))
@ -0,0 +1,15 @@
package docspell.joex.scheduler
import docspell.common.Priority
import minitest.SimpleTestSuite
object CountingSchemeSpec extends SimpleTestSuite {
test("counting") {
val cs = CountingScheme(2,1)
val list = List.iterate(cs.nextPriority, 6)(_._1.nextPriority).map(_._2)
val expect = List(Priority.High, Priority.High, Priority.Low)
assertEquals(list, expect ++ expect)
@ -9,9 +9,162 @@ servers:
description: Current host
description: Current host
tag: [ Api Info ]
summary: Get basic information about this software.
description: |
Returns the version and project name and other properties of the build.
$ref: "#/components/schemas/VersionInfo"
tag: [ Job Executor ]
summary: Notify the job executor.
description: |
Notifies the job executor to wake up and look for jobs in th queue.
description: Ok
$ref: "#/components/schemas/BasicResult"
tag: [ Job Executor ]
summary: Get a list of currently executing jobs.
description: |
Returns all jobs this executor is currently executing.
description: Ok
$ref: "#/components/schemas/JobList"
tag: [ Job Executor ]
summary: Stops this component and exits.
description: |
Gracefully stops the scheduler and also stops the process.
description: Ok
$ref: "#/components/schemas/BasicResult"
tag: [ Current Jobs ]
summary: Get a job by its id.
description: |
Returns details about a job given the id.
description: Ok
$ref: "#/components/schemas/JobAndLogs"
tag: [ Current Jobs ]
summary: Request to cancel a running job.
description: |
Requests to cancel the running job. This will try to cancel
the execution but it is not guaranteed that it can immediately
abort. The job is then removed from the queue.
description: Ok
$ref: "#/components/schemas/BasicResult"
description: |
Some more details about the job.
- job
- logs
$ref: "#/components/schemas/Job"
type: array
$ref: "#/components/schemas/JobLogEvent"
description: |
A log output line.
- time
- level
- message
description: DateTime
type: integer
format: date-time
type: string
format: loglevel
type: string
description: |
A list of jobs.
- items
type: array
$ref: "#/components/schemas/Job"
description: |
Data about a running job.
- id
- name
- submitted
- priority
- retries
- progress
- started
type: string
format: ident
type: string
description: DateTime
type: integer
format: date-time
type: integer
format: priority
type: integer
format: int32
type: integer
format: int32
description: DateTime
type: integer
format: date-time
description: |
description: |
Information about the software.
Information about the software.
@ -33,3 +186,14 @@ components:
type: string
type: string
type: string
type: string
description: |
Some basic result of an operation.
- success
- message
type: boolean
type: string
@ -0,0 +1,18 @@
.jumbotron {
background: url(../img/back-master-small.jpg);
background-repeat: no-repeat;
background-size: 100% 800px;
.content-wrapper h1, .h1 {
border-bottom: 1px solid #d8dfe5;
padding-bottom: 0.8rem;
body {
font-size: 1.75em;
h4 {
text-decoration: underline;
- title: Home
url: index.html
- title: Getit
url: getit.html
- title: Documentation
url: doc.html
- title: Installation
url: doc/install.html
- title: Configuring
url: doc/configure.html
- title: Adding Meta Data
url: doc/metadata.html
- title: Uploads
url: doc/uploading.html
- title: Processing Queue
url: doc/processing.html
- title: Find and Review
url: doc/curate.html
- title: Joex
url: doc/joex.html
- title: Development
url: dev.html
- tite: ADRs
url: dev/adr.html
- title: Api
url: api.html
- title: REST Api Doc
url: openapi/docspell-openapi.html
- title: REST OpenApi Spec
url: openapi/docspell-openapi.yml
