mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 10:28:27 +00:00
Experiment with addons
Addons allow to execute external programs in some context inside docspell. Currently it is possible to run them after processing files. Addons are provided by URLs to zip files.
This commit is contained in:
@ -780,4 +780,75 @@ Docpell Update Check
|
||||
index-all-chunk = 10
|
||||
}
|
||||
}
|
||||
|
||||
addons {
|
||||
# A directory to extract addons when running them. Everything in
|
||||
# here will be cleared after each run.
|
||||
working-dir = ${java.io.tmpdir}"/docspell-addons"
|
||||
|
||||
# A directory for addons to store data between runs. This is not
|
||||
# cleared by Docspell and can get large depending on the addons
|
||||
# executed.
|
||||
#
|
||||
# This directory is used as base. In it subdirectories are created
|
||||
# per run configuration id.
|
||||
cache-dir = ${java.io.tmpdir}"/docspell-addon-cache"
|
||||
|
||||
executor-config {
|
||||
# Define a (comma or whitespace separated) list of runners that
|
||||
# are responsible for executing an addon. This setting is
|
||||
# compared to what is supported by addons. Possible values are:
|
||||
#
|
||||
# - nix-flake: use nix-flake runner if the addon supports it
|
||||
# (this requires the nix package manager on the joex machine)
|
||||
# - docker: use docker
|
||||
# - trivial: use the trivial runner
|
||||
#
|
||||
# The first successful execution is used. This should list all
|
||||
# runners the computer supports.
|
||||
runner = "nix-flake, docker, trivial"
|
||||
|
||||
# systemd-nspawn can be used to run the program in a container.
|
||||
# This is used by runners nix-flake and trivial.
|
||||
nspawn = {
|
||||
# If this is false, systemd-nspawn is not tried. When true, the
|
||||
# addon is executed inside a lightweight container via
|
||||
# systemd-nspawn.
|
||||
enabled = false
|
||||
|
||||
# Path to sudo command. By default systemd-nspawn is executed
|
||||
# via sudo - the user running joex must be allowed to do so NON
|
||||
# INTERACTIVELY. If this is empty, then nspawn is tried to
|
||||
# execute without sudo.
|
||||
sudo-binary = "sudo"
|
||||
|
||||
# Path to the systemd-nspawn command.
|
||||
nspawn-binary = "systemd-nspawn"
|
||||
|
||||
# Workaround, if multiple same named containers are run too fast
|
||||
container-wait = "100 millis"
|
||||
}
|
||||
|
||||
# The timeout for running an addon.
|
||||
run-timeout = "15 minutes"
|
||||
|
||||
# Configure the nix flake runner.
|
||||
nix-runner {
|
||||
# Path to the nix command.
|
||||
nix-binary = "nix"
|
||||
|
||||
# The timeout for building the package (running nix build).
|
||||
build-timeout = "15 minutes"
|
||||
}
|
||||
|
||||
# Configure the docker runner
|
||||
docker-runner {
|
||||
# Path to the docker command.
|
||||
docker-binary = "docker"
|
||||
|
||||
# The timeout for building the package (running docker build).
|
||||
build-timeout = "15 minutes"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -12,6 +12,7 @@ import fs2.io.file.Path
|
||||
import docspell.analysis.TextAnalysisConfig
|
||||
import docspell.analysis.classifier.TextClassifierConfig
|
||||
import docspell.backend.Config.Files
|
||||
import docspell.backend.joex.AddonEnvConfig
|
||||
import docspell.common._
|
||||
import docspell.config.{FtsType, PgFtsConfig}
|
||||
import docspell.convert.ConvertConfig
|
||||
@ -43,7 +44,8 @@ case class Config(
|
||||
files: Files,
|
||||
mailDebug: Boolean,
|
||||
fullTextSearch: Config.FullTextSearch,
|
||||
updateCheck: UpdateCheckConfig
|
||||
updateCheck: UpdateCheckConfig,
|
||||
addons: AddonEnvConfig
|
||||
) {
|
||||
|
||||
def pubSubConfig(headerValue: Ident): PubSubConfig =
|
||||
|
@ -145,6 +145,8 @@ object JoexAppImpl extends MailAddressCodec {
|
||||
schedulerModule.scheduler,
|
||||
schedulerModule.periodicScheduler
|
||||
)
|
||||
nodes <- ONode(store)
|
||||
_ <- nodes.withRegistered(cfg.appId, NodeType.Joex, cfg.baseUrl, None)
|
||||
appR <- Resource.make(app.init.map(_ => app))(_.initShutdown)
|
||||
} yield appR
|
||||
|
||||
|
@ -59,7 +59,7 @@ object JoexServer {
|
||||
Router("pubsub" -> pubSub.receiveRoute)
|
||||
},
|
||||
"/api/info" -> InfoRoutes(cfg),
|
||||
"/api/v1" -> JoexRoutes(joexApp)
|
||||
"/api/v1" -> JoexRoutes(cfg, joexApp)
|
||||
).orNotFound
|
||||
|
||||
// With Middlewares in place
|
||||
|
@ -9,7 +9,9 @@ package docspell.joex
|
||||
import cats.effect.{Async, Resource}
|
||||
|
||||
import docspell.analysis.TextAnalyser
|
||||
import docspell.backend.BackendCommands
|
||||
import docspell.backend.fulltext.CreateIndex
|
||||
import docspell.backend.joex.AddonOps
|
||||
import docspell.backend.ops._
|
||||
import docspell.backend.task.DownloadZipArgs
|
||||
import docspell.common._
|
||||
@ -17,6 +19,7 @@ import docspell.config.FtsType
|
||||
import docspell.ftsclient.FtsClient
|
||||
import docspell.ftspsql.PsqlFtsClient
|
||||
import docspell.ftssolr.SolrFtsClient
|
||||
import docspell.joex.addon.{ItemAddonTask, ScheduledAddonTask}
|
||||
import docspell.joex.analysis.RegexNerFile
|
||||
import docspell.joex.download.DownloadZipTask
|
||||
import docspell.joex.emptytrash.EmptyTrashTask
|
||||
@ -32,6 +35,7 @@ import docspell.joex.preview.{AllPreviewsTask, MakePreviewTask}
|
||||
import docspell.joex.process.{ItemHandler, ReProcessItem}
|
||||
import docspell.joex.scanmailbox.ScanMailboxTask
|
||||
import docspell.joex.updatecheck.{ThisVersion, UpdateCheck, UpdateCheckTask}
|
||||
import docspell.joexapi.client.JoexClient
|
||||
import docspell.notification.api.NotificationModule
|
||||
import docspell.pubsub.api.PubSubT
|
||||
import docspell.scheduler.impl.JobStoreModuleBuilder
|
||||
@ -57,7 +61,8 @@ final class JoexTasks[F[_]: Async](
|
||||
createIndex: CreateIndex[F],
|
||||
joex: OJoex[F],
|
||||
jobs: OJob[F],
|
||||
itemSearch: OItemSearch[F]
|
||||
itemSearch: OItemSearch[F],
|
||||
addons: AddonOps[F]
|
||||
) {
|
||||
val downloadAll: ODownloadAll[F] =
|
||||
ODownloadAll(store, jobs, jobStoreModule.jobs)
|
||||
@ -68,7 +73,8 @@ final class JoexTasks[F[_]: Async](
|
||||
.withTask(
|
||||
JobTask.json(
|
||||
ProcessItemArgs.taskName,
|
||||
ItemHandler.newItem[F](cfg, store, itemOps, fts, analyser, regexNer),
|
||||
ItemHandler
|
||||
.newItem[F](cfg, store, itemOps, fts, analyser, regexNer, addons),
|
||||
ItemHandler.onCancel[F](store)
|
||||
)
|
||||
)
|
||||
@ -82,7 +88,15 @@ final class JoexTasks[F[_]: Async](
|
||||
.withTask(
|
||||
JobTask.json(
|
||||
ReProcessItemArgs.taskName,
|
||||
ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer, store),
|
||||
ReProcessItem[F](
|
||||
cfg,
|
||||
fts,
|
||||
itemOps,
|
||||
analyser,
|
||||
regexNer,
|
||||
addons,
|
||||
store
|
||||
),
|
||||
ReProcessItem.onCancel[F]
|
||||
)
|
||||
)
|
||||
@ -223,6 +237,20 @@ final class JoexTasks[F[_]: Async](
|
||||
DownloadZipTask.onCancel[F]
|
||||
)
|
||||
)
|
||||
.withTask(
|
||||
JobTask.json(
|
||||
ScheduledAddonTaskArgs.taskName,
|
||||
ScheduledAddonTask[F](addons),
|
||||
ScheduledAddonTask.onCancel[F]
|
||||
)
|
||||
)
|
||||
.withTask(
|
||||
JobTask.json(
|
||||
ItemAddonTaskArgs.taskName,
|
||||
ItemAddonTask[F](addons, store),
|
||||
ItemAddonTask.onCancel[F]
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
object JoexTasks {
|
||||
@ -237,8 +265,9 @@ object JoexTasks {
|
||||
emailService: Emil[F]
|
||||
): Resource[F, JoexTasks[F]] =
|
||||
for {
|
||||
joex <- OJoex(pubSub)
|
||||
store = jobStoreModule.store
|
||||
store <- Resource.pure(jobStoreModule.store)
|
||||
node <- ONode(store)
|
||||
joex <- OJoex(pubSub, node, JoexClient(httpClient))
|
||||
upload <- OUpload(store, jobStoreModule.jobs)
|
||||
fts <- createFtsClient(cfg, pools, store, httpClient)
|
||||
createIndex <- CreateIndex.resource(fts, store)
|
||||
@ -250,6 +279,16 @@ object JoexTasks {
|
||||
notification <- ONotification(store, notificationModule)
|
||||
fileRepo <- OFileRepository(store, jobStoreModule.jobs)
|
||||
jobs <- OJob(store, joex, pubSub)
|
||||
fields <- OCustomFields(store)
|
||||
attachmentOps = OAttachment(store, fts, jobStoreModule.jobs)
|
||||
cmdRunner = BackendCommands(itemOps, attachmentOps, fields, notification, None)
|
||||
addons = AddonOps(
|
||||
cfg.addons,
|
||||
store,
|
||||
cmdRunner,
|
||||
attachmentOps,
|
||||
jobStoreModule.jobs
|
||||
)
|
||||
} yield new JoexTasks[F](
|
||||
cfg,
|
||||
store,
|
||||
@ -266,7 +305,8 @@ object JoexTasks {
|
||||
createIndex,
|
||||
joex,
|
||||
jobs,
|
||||
itemSearchOps
|
||||
itemSearchOps,
|
||||
addons
|
||||
)
|
||||
|
||||
private def createFtsClient[F[_]: Async](
|
||||
|
@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Copyright 2020 Eike K. & Contributors
|
||||
*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package docspell.joex.addon
|
||||
|
||||
import cats.MonadError
|
||||
|
||||
import docspell.addons.AddonExecutionResult
|
||||
import docspell.scheduler.PermanentError
|
||||
|
||||
trait AddonTaskExtension {
|
||||
implicit final class AddonExecutionResultOps(self: AddonExecutionResult) {
|
||||
def raiseErrorIfNeeded[F[_]](implicit m: MonadError[F, Throwable]): F[Unit] =
|
||||
if (self.isFailure && self.pure) {
|
||||
m.raiseError(new Exception(s"Addon execution failed: $self"))
|
||||
} else if (self.isFailure) {
|
||||
m.raiseError(
|
||||
PermanentError(
|
||||
new Exception(
|
||||
"Addon execution failed. Do not retry, because some addons were impure."
|
||||
)
|
||||
)
|
||||
)
|
||||
} else m.pure(())
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,130 @@
|
||||
/*
|
||||
* Copyright 2020 Eike K. & Contributors
|
||||
*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package docspell.joex.addon
|
||||
|
||||
import cats.data.{Kleisli, OptionT}
|
||||
import cats.effect._
|
||||
import cats.syntax.all._
|
||||
import fs2.io.file.Files
|
||||
|
||||
import docspell.addons.{AddonTriggerType, InputEnv, Middleware}
|
||||
import docspell.backend.joex.AddonOps.ExecResult
|
||||
import docspell.backend.joex.{AddonOps, LoggerExtension}
|
||||
import docspell.common._
|
||||
import docspell.files.FileSupport
|
||||
import docspell.joex.process.ItemData
|
||||
import docspell.logging.Logger
|
||||
import docspell.scheduler.Task
|
||||
import docspell.store.Store
|
||||
import docspell.store.queries.QAttachment
|
||||
|
||||
object GenericItemAddonTask extends LoggerExtension with FileSupport {
|
||||
|
||||
private val itemSubdir = "item"
|
||||
private val itemDataJson = s"$itemSubdir/item-data.json"
|
||||
private val argsMetaJson = s"$itemSubdir/given-data.json"
|
||||
private val pdfDir = s"$itemSubdir/pdfs"
|
||||
private val originalDir = s"$itemSubdir/originals"
|
||||
private val originalMetaJson = s"$itemSubdir/source-files.json"
|
||||
private val pdfMetaJson = s"$itemSubdir/pdf-files.json"
|
||||
|
||||
// This environment can be used by the addon to access data of the current task
|
||||
private val itemEnv = Map(
|
||||
"ITEM_DIR" -> itemSubdir,
|
||||
"ITEM_DATA_JSON" -> itemDataJson,
|
||||
"ITEM_ARGS_JSON" -> argsMetaJson,
|
||||
"ITEM_PDF_DIR" -> pdfDir,
|
||||
"ITEM_ORIGINAL_DIR" -> originalDir,
|
||||
"ITEM_ORIGINAL_JSON" -> originalMetaJson,
|
||||
"ITEM_PDF_JSON" -> pdfMetaJson
|
||||
)
|
||||
|
||||
def apply[F[_]: Async](
|
||||
ops: AddonOps[F],
|
||||
store: Store[F],
|
||||
trigger: AddonTriggerType,
|
||||
addonTaskIds: Set[Ident]
|
||||
)(
|
||||
collective: Ident,
|
||||
data: ItemData,
|
||||
maybeMeta: Option[ProcessItemArgs.ProcessMeta]
|
||||
): Task[F, Unit, ItemData] =
|
||||
addonResult(ops, store, trigger, addonTaskIds)(collective, data, maybeMeta).as(
|
||||
data
|
||||
)
|
||||
|
||||
def addonResult[F[_]: Async](
|
||||
ops: AddonOps[F],
|
||||
store: Store[F],
|
||||
trigger: AddonTriggerType,
|
||||
addonTaskIds: Set[Ident]
|
||||
)(
|
||||
collective: Ident,
|
||||
data: ItemData,
|
||||
maybeMeta: Option[ProcessItemArgs.ProcessMeta]
|
||||
): Task[F, Unit, ExecResult] =
|
||||
Task { ctx =>
|
||||
ops.execAll(collective, Set(trigger), addonTaskIds, ctx.logger.some)(
|
||||
Middleware.prepare(Kleisli(prepareItemData(ctx.logger, store, data, maybeMeta)))
|
||||
)
|
||||
}
|
||||
|
||||
def prepareItemData[F[_]: Async](
|
||||
logger: Logger[F],
|
||||
store: Store[F],
|
||||
data: ItemData,
|
||||
maybeMeta: Option[ProcessItemArgs.ProcessMeta]
|
||||
)(
|
||||
input: InputEnv
|
||||
): F[InputEnv] =
|
||||
for {
|
||||
_ <- logger.debug(s"Preparing item data '${data.item.name}' for addon")
|
||||
wd = input.baseDir
|
||||
itemMetaFile = wd / itemDataJson
|
||||
argsMetaFile = wd / argsMetaJson
|
||||
pdfs = wd / pdfDir
|
||||
originals = wd / originalDir
|
||||
srcJson = wd / originalMetaJson
|
||||
pdfJson = wd / pdfMetaJson
|
||||
|
||||
_ <- List(wd / itemSubdir, pdfs, originals).traverse(Files[F].createDirectories)
|
||||
|
||||
_ <- logger.debug("Writing collected item data…")
|
||||
_ <- itemMetaFile.writeJson(data)
|
||||
|
||||
_ <- OptionT
|
||||
.fromOption[F](maybeMeta)
|
||||
.semiflatMap { meta =>
|
||||
logger.debug("Writing context meta data…") *>
|
||||
argsMetaFile.writeJson(meta)
|
||||
}
|
||||
.value
|
||||
|
||||
_ <- logger.debug("Storing all attachments…")
|
||||
_ <- data.attachments
|
||||
.flatMap(a =>
|
||||
Vector(
|
||||
pdfs / a.id.id -> a.fileId,
|
||||
originals / a.id.id -> data.originFile(a.id)
|
||||
)
|
||||
)
|
||||
.traverse_ { case (out, key) =>
|
||||
logger.debug(s"Storing attachment $out") *>
|
||||
store.fileRepo
|
||||
.getBytes(key)
|
||||
.through(Files[F].writeAll(out))
|
||||
.compile
|
||||
.drain
|
||||
}
|
||||
|
||||
_ <- logger.debug("Storing file metadata")
|
||||
srcMeta <- store.transact(QAttachment.attachmentSourceFile(data.item.id))
|
||||
pdfMeta <- store.transact(QAttachment.attachmentFile(data.item.id))
|
||||
_ <- srcJson.writeJson(srcMeta)
|
||||
_ <- pdfJson.writeJson(pdfMeta)
|
||||
} yield input.addEnv(itemEnv)
|
||||
}
|
@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright 2020 Eike K. & Contributors
|
||||
*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package docspell.joex.addon
|
||||
|
||||
import cats.data.OptionT
|
||||
import cats.effect._
|
||||
import cats.syntax.all._
|
||||
|
||||
import docspell.addons.AddonTriggerType
|
||||
import docspell.backend.joex.AddonOps
|
||||
import docspell.common.{ItemAddonTaskArgs, MetaProposalList}
|
||||
import docspell.joex.process.ItemData
|
||||
import docspell.scheduler.{PermanentError, Task}
|
||||
import docspell.store.Store
|
||||
import docspell.store.queries.QAttachment
|
||||
import docspell.store.records._
|
||||
|
||||
object ItemAddonTask extends AddonTaskExtension {
|
||||
type Args = ItemAddonTaskArgs
|
||||
val name = ItemAddonTaskArgs.taskName
|
||||
|
||||
def onCancel[F[_]]: Task[F, Args, Unit] =
|
||||
Task.log(_.warn(s"Cancelling ${name.id} task"))
|
||||
|
||||
def apply[F[_]: Async](ops: AddonOps[F], store: Store[F]): Task[F, Args, Result] =
|
||||
Task { ctx =>
|
||||
(for {
|
||||
item <- OptionT(
|
||||
store.transact(
|
||||
RItem.findByIdAndCollective(ctx.args.itemId, ctx.args.collective)
|
||||
)
|
||||
)
|
||||
data <- OptionT.liftF(makeItemData(store, item))
|
||||
inner = GenericItemAddonTask.addonResult(
|
||||
ops,
|
||||
store,
|
||||
AddonTriggerType.ExistingItem,
|
||||
ctx.args.addonRunConfigs
|
||||
)(ctx.args.collective, data, None)
|
||||
execResult <- OptionT.liftF(inner.run(ctx.unit))
|
||||
_ <- OptionT.liftF(execResult.combined.raiseErrorIfNeeded[F])
|
||||
} yield Result(
|
||||
execResult.combined.addonResult,
|
||||
execResult.runConfigs.flatMap(_.refs).map(_.archive.nameAndVersion).distinct
|
||||
)).getOrElseF(
|
||||
Async[F].raiseError(
|
||||
PermanentError(
|
||||
new NoSuchElementException(s"Item not found for id: ${ctx.args.itemId.id}!")
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
def makeItemData[F[_]: Async](store: Store[F], item: RItem): F[ItemData] =
|
||||
for {
|
||||
attachs <- store.transact(RAttachment.findByItem(item.id))
|
||||
rmeta <- store.transact(QAttachment.getAttachmentMetaOfItem(item.id))
|
||||
rsource <- store.transact(RAttachmentSource.findByItem(item.id))
|
||||
proposals <- store.transact(QAttachment.getMetaProposals(item.id, item.cid))
|
||||
tags <- store.transact(RTag.findByItem(item.id))
|
||||
} yield ItemData(
|
||||
item = item,
|
||||
attachments = attachs,
|
||||
metas = rmeta,
|
||||
dateLabels = Vector.empty,
|
||||
originFile = rsource.map(r => (r.id, r.fileId)).toMap,
|
||||
givenMeta = proposals,
|
||||
tags = tags.map(_.name).toList,
|
||||
classifyProposals = MetaProposalList.empty,
|
||||
classifyTags = Nil
|
||||
)
|
||||
}
|
40
modules/joex/src/main/scala/docspell/joex/addon/Result.scala
Normal file
40
modules/joex/src/main/scala/docspell/joex/addon/Result.scala
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright 2020 Eike K. & Contributors
|
||||
*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package docspell.joex.addon
|
||||
|
||||
import docspell.addons.AddonResult
|
||||
import docspell.scheduler.JobTaskResultEncoder
|
||||
|
||||
import io.circe.Encoder
|
||||
import io.circe.generic.semiauto.deriveEncoder
|
||||
|
||||
case class Result(addonResult: AddonResult, addons: List[String])
|
||||
|
||||
object Result {
|
||||
val empty: Result =
|
||||
Result(AddonResult.empty, Nil)
|
||||
|
||||
implicit val jsonEncoder: Encoder[Result] =
|
||||
deriveEncoder
|
||||
|
||||
implicit val jobTaskResultEncoder: JobTaskResultEncoder[Result] =
|
||||
JobTaskResultEncoder.fromJson[Result].withMessage { result =>
|
||||
result.addonResult match {
|
||||
case AddonResult.Success(_) =>
|
||||
s"Executed ${result.addons.size} addon(s) successfully."
|
||||
|
||||
case AddonResult.ExecutionError(rc) =>
|
||||
s"Addon execution finished with non-zero return code: $rc"
|
||||
|
||||
case AddonResult.ExecutionFailed(ex) =>
|
||||
s"Addon execution failed: ${ex.getMessage}"
|
||||
|
||||
case AddonResult.DecodingError(msg) =>
|
||||
s"Addon output failed to read: $msg"
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright 2020 Eike K. & Contributors
|
||||
*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package docspell.joex.addon
|
||||
|
||||
import cats.effect._
|
||||
import cats.syntax.all._
|
||||
|
||||
import docspell.addons.Middleware
|
||||
import docspell.backend.joex.{AddonOps, LoggerExtension}
|
||||
import docspell.backend.ops.OAddons
|
||||
import docspell.common.{Ident, ScheduledAddonTaskArgs}
|
||||
import docspell.scheduler.Task
|
||||
|
||||
object ScheduledAddonTask extends AddonTaskExtension with LoggerExtension {
|
||||
type Args = ScheduledAddonTaskArgs
|
||||
|
||||
val name: Ident = OAddons.scheduledAddonTaskName
|
||||
|
||||
def apply[F[_]: Async](ops: AddonOps[F]): Task[F, Args, Result] =
|
||||
Task { ctx =>
|
||||
for {
|
||||
execRes <- ops.execById(ctx.args.collective, ctx.args.addonTaskId, ctx.logger)(
|
||||
Middleware.identity[F]
|
||||
)
|
||||
_ <- execRes.result.combineAll.raiseErrorIfNeeded[F]
|
||||
} yield Result(
|
||||
execRes.result.combineAll.addonResult,
|
||||
execRes.runConfigs.flatMap(_.refs.map(_.archive.nameAndVersion))
|
||||
)
|
||||
}
|
||||
|
||||
def onCancel[F[_]]: Task[F, Args, Unit] =
|
||||
Task.log(_.warn(s"Cancelling ${name.id} task"))
|
||||
|
||||
}
|
@ -12,6 +12,7 @@ import fs2.io.file.Path
|
||||
|
||||
import docspell.analysis.split.TextSplitter
|
||||
import docspell.common._
|
||||
import docspell.common.util.File
|
||||
import docspell.store.queries.QCollective
|
||||
|
||||
import io.circe.generic.semiauto._
|
||||
|
@ -12,6 +12,7 @@ import cats.implicits._
|
||||
import fs2.io.file.Path
|
||||
|
||||
import docspell.common._
|
||||
import docspell.common.util.File
|
||||
import docspell.store.Store
|
||||
import docspell.store.queries.QCollective
|
||||
import docspell.store.records.REquipment
|
||||
|
@ -14,6 +14,7 @@ import fs2.io.file.Path
|
||||
|
||||
import docspell.analysis.classifier.{ClassifierModel, TextClassifier}
|
||||
import docspell.common._
|
||||
import docspell.common.util.File
|
||||
import docspell.logging.Logger
|
||||
import docspell.store.Store
|
||||
import docspell.store.records.RClassifierModel
|
||||
|
@ -109,7 +109,7 @@ object MultiUploadArchiveTask {
|
||||
)(file: ProcessItemArgs.File): Stream[F, ProcessItemArgs] =
|
||||
store.fileRepo
|
||||
.getBytes(file.fileMetaId)
|
||||
.through(Zip.unzipP[F](8192, args.meta.fileFilter.getOrElse(Glob.all)))
|
||||
.through(Zip.unzip[F](8192, args.meta.fileFilter.getOrElse(Glob.all)))
|
||||
.flatMap { entry =>
|
||||
val hint = MimeTypeHint(entry.name.some, entry.mime.asString.some)
|
||||
entry.data
|
||||
|
@ -69,7 +69,7 @@ object AttachmentPreview {
|
||||
}
|
||||
|
||||
case mt =>
|
||||
ctx.logger.warn(s"Not a pdf file, but ${mt.asString}, cannot get page count.") *>
|
||||
ctx.logger.warn(s"Not a pdf file, but ${mt.asString}, cannot create preview.") *>
|
||||
(None: Option[RAttachmentPreview]).pure[F]
|
||||
}
|
||||
|
||||
|
@ -146,7 +146,7 @@ object ExtractArchive {
|
||||
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
|
||||
ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *>
|
||||
zipData
|
||||
.through(Zip.unzipP[F](8192, glob))
|
||||
.through(Zip.unzip[F](8192, glob))
|
||||
.zipWithIndex
|
||||
.flatMap(handleEntry(ctx, store, ra, pos, archive, None))
|
||||
.foldMonoid
|
||||
|
@ -26,7 +26,8 @@ import io.circe.{Encoder, Json}
|
||||
* @param dateLabels
|
||||
* a separate list of found dates
|
||||
* @param originFile
|
||||
* a mapping from an attachment id to a filemeta-id containng the source or origin file
|
||||
* a mapping from an attachment id to a filemeta-id containing the source or origin
|
||||
* file
|
||||
* @param givenMeta
|
||||
* meta data to this item that was not "guessed" from an attachment but given and thus
|
||||
* is always correct
|
||||
@ -49,7 +50,7 @@ case class ItemData(
|
||||
) {
|
||||
|
||||
/** sort by weight; order of equal weights is not important, just choose one others are
|
||||
* then suggestions doc-date is only set when given explicitely, not from "guessing"
|
||||
* then suggestions doc-date is only set when given explicitly, not from "guessing"
|
||||
*/
|
||||
def finalProposals: MetaProposalList =
|
||||
MetaProposalList
|
||||
@ -98,7 +99,7 @@ object ItemData {
|
||||
dates.map(dl => dl.label.copy(label = dl.date.toString))
|
||||
}
|
||||
|
||||
// Used to encode the result passed to the job-done event
|
||||
// Used to encode the result passed to the job-done event and to supply to addons
|
||||
implicit val jsonEncoder: Encoder[ItemData] =
|
||||
Encoder.instance { data =>
|
||||
val metaMap = data.metas.groupMap(_.id)(identity)
|
||||
@ -108,10 +109,12 @@ object ItemData {
|
||||
"collective" -> data.item.cid.asJson,
|
||||
"source" -> data.item.source.asJson,
|
||||
"attachments" -> data.attachments
|
||||
.sortBy(_.position)
|
||||
.map(a =>
|
||||
Json.obj(
|
||||
"id" -> a.id.asJson,
|
||||
"name" -> a.name.asJson,
|
||||
"position" -> a.position.asJson,
|
||||
"content" -> metaMap.get(a.id).flatMap(_.head.content).asJson,
|
||||
"language" -> metaMap.get(a.id).flatMap(_.head.language).asJson,
|
||||
"pages" -> metaMap.get(a.id).flatMap(_.head.pages).asJson
|
||||
@ -123,6 +126,18 @@ object ItemData {
|
||||
"assumedCorrOrg" -> data.finalProposals
|
||||
.find(MetaProposalType.CorrOrg)
|
||||
.map(_.values.head.ref)
|
||||
.asJson,
|
||||
"assumedCorrPerson" -> data.finalProposals
|
||||
.find(MetaProposalType.CorrPerson)
|
||||
.map(_.values.head.ref)
|
||||
.asJson,
|
||||
"assumedConcPerson" -> data.finalProposals
|
||||
.find(MetaProposalType.ConcPerson)
|
||||
.map(_.values.head.ref)
|
||||
.asJson,
|
||||
"assumedConcEquip" -> data.finalProposals
|
||||
.find(MetaProposalType.ConcEquip)
|
||||
.map(_.values.head.ref)
|
||||
.asJson
|
||||
)
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ import cats.implicits._
|
||||
import fs2.Stream
|
||||
|
||||
import docspell.analysis.TextAnalyser
|
||||
import docspell.backend.joex.AddonOps
|
||||
import docspell.backend.ops.OItem
|
||||
import docspell.common.{ItemState, ProcessItemArgs}
|
||||
import docspell.ftsclient.FtsClient
|
||||
@ -41,7 +42,8 @@ object ItemHandler {
|
||||
itemOps: OItem[F],
|
||||
fts: FtsClient[F],
|
||||
analyser: TextAnalyser[F],
|
||||
regexNer: RegexNerFile[F]
|
||||
regexNer: RegexNerFile[F],
|
||||
addons: AddonOps[F]
|
||||
): Task[F, Args, Option[ItemData]] =
|
||||
logBeginning[F].flatMap(_ =>
|
||||
DuplicateCheck[F](store)
|
||||
@ -52,7 +54,17 @@ object ItemHandler {
|
||||
CreateItem[F](store).contramap(_ => args.pure[F])
|
||||
create
|
||||
.flatMap(itemStateTask(store, ItemState.Processing))
|
||||
.flatMap(safeProcess[F](cfg, store, itemOps, fts, analyser, regexNer))
|
||||
.flatMap(
|
||||
safeProcess[F](
|
||||
cfg,
|
||||
store,
|
||||
itemOps,
|
||||
fts,
|
||||
analyser,
|
||||
regexNer,
|
||||
addons
|
||||
)
|
||||
)
|
||||
.map(_.some)
|
||||
}
|
||||
)
|
||||
@ -76,11 +88,14 @@ object ItemHandler {
|
||||
itemOps: OItem[F],
|
||||
fts: FtsClient[F],
|
||||
analyser: TextAnalyser[F],
|
||||
regexNer: RegexNerFile[F]
|
||||
regexNer: RegexNerFile[F],
|
||||
addons: AddonOps[F]
|
||||
)(data: ItemData): Task[F, Args, ItemData] =
|
||||
isLastRetry[F].flatMap {
|
||||
case true =>
|
||||
ProcessItem[F](cfg, itemOps, fts, analyser, regexNer, store)(data).attempt
|
||||
ProcessItem[F](cfg, itemOps, fts, analyser, regexNer, addons, store)(
|
||||
data
|
||||
).attempt
|
||||
.flatMap {
|
||||
case Right(d) =>
|
||||
Task.pure(d)
|
||||
@ -91,7 +106,9 @@ object ItemHandler {
|
||||
.andThen(_ => Sync[F].raiseError(ex))
|
||||
}
|
||||
case false =>
|
||||
ProcessItem[F](cfg, itemOps, fts, analyser, regexNer, store)(data)
|
||||
ProcessItem[F](cfg, itemOps, fts, analyser, regexNer, addons, store)(
|
||||
data
|
||||
)
|
||||
.flatMap(itemStateTask(store, ItemState.Created))
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,9 @@ package docspell.joex.process
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.addons.AddonTriggerType
|
||||
import docspell.analysis.TextAnalyser
|
||||
import docspell.backend.joex.AddonOps
|
||||
import docspell.backend.ops.OItem
|
||||
import docspell.common.ProcessItemArgs
|
||||
import docspell.ftsclient.FtsClient
|
||||
@ -26,6 +28,7 @@ object ProcessItem {
|
||||
fts: FtsClient[F],
|
||||
analyser: TextAnalyser[F],
|
||||
regexNer: RegexNerFile[F],
|
||||
addonOps: AddonOps[F],
|
||||
store: Store[F]
|
||||
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||
ExtractArchive(store)(item)
|
||||
@ -35,6 +38,7 @@ object ProcessItem {
|
||||
.flatMap(SetGivenData.onlyNew[F](itemOps))
|
||||
.flatMap(Task.setProgress(99))
|
||||
.flatMap(RemoveEmptyItem(itemOps))
|
||||
.flatMap(RunAddons(addonOps, store, AddonTriggerType.FinalProcessItem))
|
||||
|
||||
def processAttachments[F[_]: Async](
|
||||
cfg: Config,
|
||||
|
@ -10,7 +10,9 @@ import cats.data.OptionT
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.addons.AddonTriggerType
|
||||
import docspell.analysis.TextAnalyser
|
||||
import docspell.backend.joex.AddonOps
|
||||
import docspell.backend.ops.OItem
|
||||
import docspell.common._
|
||||
import docspell.ftsclient.FtsClient
|
||||
@ -34,13 +36,24 @@ object ReProcessItem {
|
||||
itemOps: OItem[F],
|
||||
analyser: TextAnalyser[F],
|
||||
regexNer: RegexNerFile[F],
|
||||
addonOps: AddonOps[F],
|
||||
store: Store[F]
|
||||
): Task[F, Args, Unit] =
|
||||
Task
|
||||
.log[F, Args](_.info("===== Start reprocessing ======"))
|
||||
.flatMap(_ =>
|
||||
loadItem[F](store)
|
||||
.flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer, store))
|
||||
.flatMap(
|
||||
safeProcess[F](
|
||||
cfg,
|
||||
fts,
|
||||
itemOps,
|
||||
analyser,
|
||||
regexNer,
|
||||
addonOps,
|
||||
store
|
||||
)
|
||||
)
|
||||
.map(_ => ())
|
||||
)
|
||||
|
||||
@ -99,6 +112,7 @@ object ReProcessItem {
|
||||
itemOps: OItem[F],
|
||||
analyser: TextAnalyser[F],
|
||||
regexNer: RegexNerFile[F],
|
||||
addonOps: AddonOps[F],
|
||||
store: Store[F],
|
||||
data: ItemData
|
||||
): Task[F, Args, ItemData] = {
|
||||
@ -129,6 +143,7 @@ object ReProcessItem {
|
||||
.processAttachments[F](cfg, fts, analyser, regexNer, store)(data)
|
||||
.flatMap(LinkProposal[F](store))
|
||||
.flatMap(SetGivenData[F](itemOps))
|
||||
.flatMap(RunAddons[F](addonOps, store, AddonTriggerType.FinalReprocessItem))
|
||||
.contramap[Args](convertArgs(lang))
|
||||
}
|
||||
}
|
||||
@ -153,11 +168,21 @@ object ReProcessItem {
|
||||
itemOps: OItem[F],
|
||||
analyser: TextAnalyser[F],
|
||||
regexNer: RegexNerFile[F],
|
||||
addonOps: AddonOps[F],
|
||||
store: Store[F]
|
||||
)(data: ItemData): Task[F, Args, ItemData] =
|
||||
isLastRetry[F].flatMap {
|
||||
case true =>
|
||||
processFiles[F](cfg, fts, itemOps, analyser, regexNer, store, data).attempt
|
||||
processFiles[F](
|
||||
cfg,
|
||||
fts,
|
||||
itemOps,
|
||||
analyser,
|
||||
regexNer,
|
||||
addonOps,
|
||||
store,
|
||||
data
|
||||
).attempt
|
||||
.flatMap {
|
||||
case Right(d) =>
|
||||
Task.pure(d)
|
||||
@ -167,7 +192,16 @@ object ReProcessItem {
|
||||
).andThen(_ => Sync[F].raiseError(ex))
|
||||
}
|
||||
case false =>
|
||||
processFiles[F](cfg, fts, itemOps, analyser, regexNer, store, data)
|
||||
processFiles[F](
|
||||
cfg,
|
||||
fts,
|
||||
itemOps,
|
||||
analyser,
|
||||
regexNer,
|
||||
addonOps,
|
||||
store,
|
||||
data
|
||||
)
|
||||
}
|
||||
|
||||
private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] =
|
||||
|
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright 2020 Eike K. & Contributors
|
||||
*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package docspell.joex.process
|
||||
|
||||
import cats.effect._
|
||||
import cats.syntax.all._
|
||||
|
||||
import docspell.addons.AddonTriggerType
|
||||
import docspell.backend.joex.AddonOps
|
||||
import docspell.common.ProcessItemArgs
|
||||
import docspell.joex.addon.GenericItemAddonTask
|
||||
import docspell.scheduler.Task
|
||||
import docspell.store.Store
|
||||
|
||||
/** Run registered addons in the context of item processing. The addon has access to the
|
||||
* current item data and can apply custom processing logic.
|
||||
*/
|
||||
object RunAddons {
|
||||
type Args = ProcessItemArgs
|
||||
|
||||
def apply[F[_]: Async](
|
||||
ops: AddonOps[F],
|
||||
store: Store[F],
|
||||
trigger: AddonTriggerType
|
||||
)(
|
||||
data: ItemData
|
||||
): Task[F, Args, ItemData] =
|
||||
if (data.item.state.isInvalid && data.attachments.isEmpty) {
|
||||
Task.pure(data)
|
||||
} else
|
||||
Task { ctx =>
|
||||
val inner = GenericItemAddonTask(ops, store, trigger, Set.empty)(
|
||||
ctx.args.meta.collective,
|
||||
data,
|
||||
ctx.args.meta.some
|
||||
)
|
||||
inner.run(ctx.unit)
|
||||
}
|
||||
}
|
@ -44,7 +44,7 @@ object SetGivenData {
|
||||
for {
|
||||
_ <- ctx.logger.info("Starting setting given data")
|
||||
_ <- ctx.logger.debug(s"Set item folder: '${folderId.map(_.id)}'")
|
||||
e <- ops.setFolder(itemId, folderId, collective).attempt
|
||||
e <- ops.setFolder(itemId, folderId.map(_.id), collective).attempt
|
||||
_ <- e.fold(
|
||||
ex => ctx.logger.warn(s"Error setting folder: ${ex.getMessage}"),
|
||||
res =>
|
||||
|
@ -10,7 +10,7 @@ import cats.effect._
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.common.{Duration, Ident, Timestamp}
|
||||
import docspell.joex.JoexApp
|
||||
import docspell.joex.{Config, JoexApp}
|
||||
import docspell.joexapi.model._
|
||||
import docspell.store.records.RJobLog
|
||||
|
||||
@ -20,7 +20,7 @@ import org.http4s.dsl.Http4sDsl
|
||||
|
||||
object JoexRoutes {
|
||||
|
||||
def apply[F[_]: Async](app: JoexApp[F]): HttpRoutes[F] = {
|
||||
def apply[F[_]: Async](cfg: Config, app: JoexApp[F]): HttpRoutes[F] = {
|
||||
val dsl = new Http4sDsl[F] {}
|
||||
import dsl._
|
||||
HttpRoutes.of[F] {
|
||||
@ -64,6 +64,11 @@ object JoexRoutes {
|
||||
BasicResult(flag, if (flag) "Cancel request submitted" else "Job not found")
|
||||
)
|
||||
} yield resp
|
||||
|
||||
case GET -> Root / "addon" / "config" =>
|
||||
val data =
|
||||
AddonSupport(cfg.appId, cfg.addons.executorConfig.runner)
|
||||
Ok(data)
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user