5
.gitignore
vendored
@ -15,6 +15,11 @@ _site/
|
|||||||
/website/site/static/syntax-*.css
|
/website/site/static/syntax-*.css
|
||||||
/website/site/static/webfonts/
|
/website/site/static/webfonts/
|
||||||
/website/site/static/files/*.woff*
|
/website/site/static/files/*.woff*
|
||||||
|
/website/site/static/examples/
|
||||||
|
/website/site/templates/shortcodes/addon-output
|
||||||
|
/website/site/templates/shortcodes/item-data
|
||||||
|
/website/site/templates/shortcodes/item-args
|
||||||
|
/website/site/templates/shortcodes/file-meta
|
||||||
/website/site/templates/shortcodes/server.conf
|
/website/site/templates/shortcodes/server.conf
|
||||||
/website/site/templates/shortcodes/sample-exim.conf
|
/website/site/templates/shortcodes/sample-exim.conf
|
||||||
/website/site/templates/shortcodes/joex.conf
|
/website/site/templates/shortcodes/joex.conf
|
||||||
|
22
build.sbt
@ -966,8 +966,28 @@ val website = project
|
|||||||
)
|
)
|
||||||
IO.append(target, IO.readBytes(changelog))
|
IO.append(target, IO.readBytes(changelog))
|
||||||
Seq(target)
|
Seq(target)
|
||||||
}.taskValue
|
}.taskValue,
|
||||||
|
zolaPrepare := {
|
||||||
|
val log = streams.value.log
|
||||||
|
log.info("Generating examples…")
|
||||||
|
val templateOut = baseDirectory.value / "site" / "templates" / "shortcodes"
|
||||||
|
IO.createDirectory(templateOut)
|
||||||
|
|
||||||
|
// sbt crashes when interpolating values into the string in `toTask`
|
||||||
|
// this is the reason for the followingy construct…
|
||||||
|
(Compile / run).toTask(s" addon-output /tmp/addon-output.json").value
|
||||||
|
(Compile / run).toTask(s" item-data /tmp/item-data.json").value
|
||||||
|
(Compile / run).toTask(s" item-args /tmp/item-args.json").value
|
||||||
|
(Compile / run).toTask(s" file-meta /tmp/file-meta.json").value
|
||||||
|
|
||||||
|
val inputs = List("addon-output", "item-data", "item-args", "file-meta")
|
||||||
|
|
||||||
|
inputs.foreach { name =>
|
||||||
|
IO.move(file(s"/tmp/$name.json"), templateOut / name)
|
||||||
|
}
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
.dependsOn(addonlib, joex)
|
||||||
|
|
||||||
val root = project
|
val root = project
|
||||||
.in(file("."))
|
.in(file("."))
|
||||||
|
@ -33,6 +33,7 @@ services:
|
|||||||
- DOCSPELL_SERVER_INTEGRATION__ENDPOINT_ENABLED=true
|
- DOCSPELL_SERVER_INTEGRATION__ENDPOINT_ENABLED=true
|
||||||
- DOCSPELL_SERVER_INTEGRATION__ENDPOINT_HTTP__HEADER_ENABLED=true
|
- DOCSPELL_SERVER_INTEGRATION__ENDPOINT_HTTP__HEADER_ENABLED=true
|
||||||
- DOCSPELL_SERVER_INTEGRATION__ENDPOINT_HTTP__HEADER_HEADER__VALUE=integration-password123
|
- DOCSPELL_SERVER_INTEGRATION__ENDPOINT_HTTP__HEADER_HEADER__VALUE=integration-password123
|
||||||
|
- DOCSPELL_SERVER_BACKEND_ADDONS_ENABLED=false
|
||||||
depends_on:
|
depends_on:
|
||||||
- solr
|
- solr
|
||||||
|
|
||||||
@ -53,10 +54,15 @@ services:
|
|||||||
- DOCSPELL_JOEX_JDBC_PASSWORD=dbpass
|
- DOCSPELL_JOEX_JDBC_PASSWORD=dbpass
|
||||||
- DOCSPELL_JOEX_JDBC_URL=jdbc:postgresql://db:5432/dbname
|
- DOCSPELL_JOEX_JDBC_URL=jdbc:postgresql://db:5432/dbname
|
||||||
- DOCSPELL_JOEX_JDBC_USER=dbuser
|
- DOCSPELL_JOEX_JDBC_USER=dbuser
|
||||||
|
- DOCSPELL_JOEX_ADDONS_EXECUTOR__CONFIG_RUNNER=docker,trivial
|
||||||
ports:
|
ports:
|
||||||
- "7878:7878"
|
- "7878:7878"
|
||||||
depends_on:
|
depends_on:
|
||||||
- solr
|
- solr
|
||||||
|
## Uncomment when using the "docker" runner with addons
|
||||||
|
# volumes:
|
||||||
|
# - /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
# - /tmp:/tmp
|
||||||
|
|
||||||
# The consumedir container watches a directory for files to upload
|
# The consumedir container watches a directory for files to upload
|
||||||
# to docspell restserver. This uses the `dsc` tool. For information
|
# to docspell restserver. This uses the `dsc` tool. For information
|
||||||
|
@ -16,6 +16,8 @@ if [ -z "$2" ] || [ "$2" == "--push" ]; then
|
|||||||
if [ ! -z "$push" ]; then
|
if [ ! -z "$push" ]; then
|
||||||
echo "Running with $push !"
|
echo "Running with $push !"
|
||||||
fi
|
fi
|
||||||
|
elif [ "$2" == "--load" ]; then
|
||||||
|
push="$2"
|
||||||
else
|
else
|
||||||
echo "Don't understand second argument: $2"
|
echo "Don't understand second argument: $2"
|
||||||
exit 1
|
exit 1
|
||||||
@ -32,7 +34,7 @@ cd "$(dirname "$0")"
|
|||||||
|
|
||||||
trap "{ docker buildx rm docspell-builder; }" EXIT
|
trap "{ docker buildx rm docspell-builder; }" EXIT
|
||||||
|
|
||||||
platforms="linux/amd64,linux/arm64,linux/arm/v7"
|
platforms=${PLATFORMS:-"linux/amd64,linux/arm64,linux/arm/v7"}
|
||||||
docker buildx create --name docspell-builder --use
|
docker buildx create --name docspell-builder --use
|
||||||
|
|
||||||
case $version in
|
case $version in
|
||||||
|
@ -12,6 +12,7 @@ RUN JDKPKG="openjdk11-jre"; \
|
|||||||
tzdata \
|
tzdata \
|
||||||
bash \
|
bash \
|
||||||
curl \
|
curl \
|
||||||
|
docker \
|
||||||
ghostscript \
|
ghostscript \
|
||||||
tesseract-ocr \
|
tesseract-ocr \
|
||||||
tesseract-ocr-data-deu \
|
tesseract-ocr-data-deu \
|
||||||
@ -54,7 +55,7 @@ RUN JDKPKG="openjdk11-jre"; \
|
|||||||
&& pip3 install ocrmypdf \
|
&& pip3 install ocrmypdf \
|
||||||
&& curl -Ls $UNO_URL -o /usr/local/bin/unoconv \
|
&& curl -Ls $UNO_URL -o /usr/local/bin/unoconv \
|
||||||
&& chmod +x /usr/local/bin/unoconv \
|
&& chmod +x /usr/local/bin/unoconv \
|
||||||
&& apk del curl libxml2-dev libxslt-dev zlib-dev g++ python3-dev py3-pip libffi-dev qpdf-dev openssl-dev \
|
&& apk del libxml2-dev libxslt-dev zlib-dev g++ python3-dev py3-pip libffi-dev qpdf-dev openssl-dev \
|
||||||
&& ln -s /usr/bin/python3 /usr/bin/python
|
&& ln -s /usr/bin/python3 /usr/bin/python
|
||||||
|
|
||||||
WORKDIR /opt
|
WORKDIR /opt
|
||||||
|
@ -46,7 +46,12 @@ object AddonExecutor {
|
|||||||
in.cacheDir,
|
in.cacheDir,
|
||||||
in.addons
|
in.addons
|
||||||
)
|
)
|
||||||
rs <- ctx.traverse(c => runAddon(logger.withAddon(c), in.env)(c))
|
rs <-
|
||||||
|
if (cfg.failFast) ctx.foldLeftM(List.empty[AddonResult]) { (res, c) =>
|
||||||
|
if (res.headOption.exists(_.isFailure)) res.pure[F]
|
||||||
|
else runAddon(logger.withAddon(c), in.env)(c).map(r => r :: res)
|
||||||
|
}
|
||||||
|
else ctx.traverse(c => runAddon(logger.withAddon(c), in.env)(c))
|
||||||
pure = ctx.foldl(true)((b, c) => b && c.meta.isPure)
|
pure = ctx.foldl(true)((b, c) => b && c.meta.isPure)
|
||||||
} yield AddonExecutionResult(rs, pure)
|
} yield AddonExecutionResult(rs, pure)
|
||||||
}
|
}
|
||||||
|
@ -15,7 +15,8 @@ case class AddonExecutorConfig(
|
|||||||
runTimeout: Duration,
|
runTimeout: Duration,
|
||||||
nspawn: NSpawn,
|
nspawn: NSpawn,
|
||||||
nixRunner: NixConfig,
|
nixRunner: NixConfig,
|
||||||
dockerRunner: DockerConfig
|
dockerRunner: DockerConfig,
|
||||||
|
failFast: Boolean
|
||||||
)
|
)
|
||||||
|
|
||||||
object AddonExecutorConfig {
|
object AddonExecutorConfig {
|
||||||
|
@ -25,6 +25,7 @@ sealed trait AddonResult {
|
|||||||
}
|
}
|
||||||
|
|
||||||
object AddonResult {
|
object AddonResult {
|
||||||
|
val emptySuccess: AddonResult = success(AddonOutput.empty)
|
||||||
|
|
||||||
/** The addon was run successful, but decoding its stdout failed. */
|
/** The addon was run successful, but decoding its stdout failed. */
|
||||||
case class DecodingError(message: String) extends AddonResult {
|
case class DecodingError(message: String) extends AddonResult {
|
||||||
|
@ -7,7 +7,11 @@
|
|||||||
package docspell.addons
|
package docspell.addons
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
|
import cats.syntax.all._
|
||||||
|
|
||||||
|
import docspell.addons.out.AddonOutput
|
||||||
|
import docspell.common.UrlReader
|
||||||
|
import docspell.common.bc.{BackendCommand, ItemAction}
|
||||||
import docspell.logging.{Level, TestLoggingConfig}
|
import docspell.logging.{Level, TestLoggingConfig}
|
||||||
|
|
||||||
import munit._
|
import munit._
|
||||||
@ -60,4 +64,76 @@ class AddonExecutorTest extends CatsEffectSuite with Fixtures with TestLoggingCo
|
|||||||
_ = assertEquals(r.runnerType, List(RunnerType.Docker, RunnerType.NixFlake))
|
_ = assertEquals(r.runnerType, List(RunnerType.Docker, RunnerType.NixFlake))
|
||||||
} yield ()
|
} yield ()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tempDir.test("fail early if configured so") { dir =>
|
||||||
|
val cfg = testExecutorConfig(RunnerType.Trivial).copy(failFast = true)
|
||||||
|
val exec = AddonExecutor[IO](cfg, UrlReader.defaultReader).execute(logger)
|
||||||
|
val testOut = AddonOutput(commands =
|
||||||
|
List(
|
||||||
|
BackendCommand.item(id("xyz-item"), List(ItemAction.AddTags(Set("tag1", "tag2"))))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
val result = createInputEnv(
|
||||||
|
dir,
|
||||||
|
AddonGenerator.failingAddon("addon1"),
|
||||||
|
AddonGenerator.successAddon("addon2", output = testOut.some)
|
||||||
|
).use(exec.run)
|
||||||
|
result.map { res =>
|
||||||
|
assert(res.isFailure)
|
||||||
|
assert(res.pure)
|
||||||
|
assertEquals(res.addonResult, AddonResult.executionError(1))
|
||||||
|
assertEquals(res.addonResults.size, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tempDir.test("do not stop after failing addons") { dir =>
|
||||||
|
val cfg = testExecutorConfig(RunnerType.Trivial).copy(failFast = false)
|
||||||
|
val exec = AddonExecutor[IO](cfg, UrlReader.defaultReader).execute(logger)
|
||||||
|
val testOut = AddonOutput(commands =
|
||||||
|
List(
|
||||||
|
BackendCommand.item(id("xyz-item"), List(ItemAction.AddTags(Set("tag1", "tag2"))))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
val result = createInputEnv(
|
||||||
|
dir,
|
||||||
|
AddonGenerator.failingAddon("addon1"),
|
||||||
|
AddonGenerator.successAddon("addon2", output = testOut.some)
|
||||||
|
).use(exec.run)
|
||||||
|
result.map { res =>
|
||||||
|
assert(res.isFailure)
|
||||||
|
assert(res.pure)
|
||||||
|
assertEquals(res.addonResult, AddonResult.executionError(1))
|
||||||
|
assertEquals(res.addonResults.size, 2)
|
||||||
|
assertEquals(res.addonResults.head, AddonResult.executionError(1))
|
||||||
|
assertEquals(res.addonResults(1), AddonResult.success(testOut))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tempDir.test("combine outputs") { dir =>
|
||||||
|
val cfg = testExecutorConfig(RunnerType.Trivial).copy(failFast = false)
|
||||||
|
val exec = AddonExecutor[IO](cfg, UrlReader.defaultReader).execute(logger)
|
||||||
|
val testOut1 = AddonOutput(commands =
|
||||||
|
List(
|
||||||
|
BackendCommand.item(id("xyz-item"), List(ItemAction.AddTags(Set("tag1", "tag2"))))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
val testOut2 = AddonOutput(commands =
|
||||||
|
List(
|
||||||
|
BackendCommand.item(id("xyz-item"), List(ItemAction.SetName("new item name")))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
val result = createInputEnv(
|
||||||
|
dir,
|
||||||
|
AddonGenerator.successAddon("addon1", output = testOut1.some),
|
||||||
|
AddonGenerator.successAddon("addon2", output = testOut2.some)
|
||||||
|
).use(exec.run)
|
||||||
|
result.map { res =>
|
||||||
|
assert(res.isSuccess)
|
||||||
|
assert(res.pure)
|
||||||
|
assertEquals(res.addonResult, AddonResult.success(testOut1.combine(testOut2)))
|
||||||
|
assertEquals(res.addonResults.size, 2)
|
||||||
|
assertEquals(res.addonResults.head, AddonResult.success(testOut1))
|
||||||
|
assertEquals(res.addonResults(1), AddonResult.success(testOut2))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,115 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.addons
|
||||||
|
|
||||||
|
import cats.effect.{IO, Resource}
|
||||||
|
import cats.syntax.all._
|
||||||
|
import fs2.Stream
|
||||||
|
import fs2.io.file.{Files, Path, PosixPermissions}
|
||||||
|
|
||||||
|
import docspell.addons.out.AddonOutput
|
||||||
|
import docspell.common.LenientUri
|
||||||
|
import docspell.files.Zip
|
||||||
|
|
||||||
|
import io.circe.syntax._
|
||||||
|
|
||||||
|
object AddonGenerator {
|
||||||
|
private[this] val logger = docspell.logging.getLogger[IO]
|
||||||
|
|
||||||
|
def successAddon(
|
||||||
|
name: String,
|
||||||
|
version: String = "1.0",
|
||||||
|
output: Option[AddonOutput] = None
|
||||||
|
): Resource[IO, AddonArchive] =
|
||||||
|
output match {
|
||||||
|
case None =>
|
||||||
|
generate(name, version, false)("exit 0")
|
||||||
|
case Some(out) =>
|
||||||
|
generate(name, version, true)(
|
||||||
|
s"""
|
||||||
|
|cat <<-EOF
|
||||||
|
|${out.asJson.noSpaces}
|
||||||
|
|EOF""".stripMargin
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
def failingAddon(
|
||||||
|
name: String,
|
||||||
|
version: String = "1.0",
|
||||||
|
pure: Boolean = true
|
||||||
|
): Resource[IO, AddonArchive] =
|
||||||
|
generate(name, version, pure)("exit 1")
|
||||||
|
|
||||||
|
def generate(name: String, version: String, collectOutput: Boolean)(
|
||||||
|
script: String
|
||||||
|
): Resource[IO, AddonArchive] =
|
||||||
|
Files[IO].tempDirectory(None, s"addon-gen-$name-$version-", None).evalMap { dir =>
|
||||||
|
for {
|
||||||
|
yml <- createDescriptor(dir, name, version, collectOutput)
|
||||||
|
bin <- createScript(dir, script)
|
||||||
|
zip <- createZip(dir, List(yml, bin))
|
||||||
|
url = LenientUri.fromJava(zip.toNioPath.toUri.toURL)
|
||||||
|
} yield AddonArchive(url, name, version)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def createZip(dir: Path, files: List[Path]) =
|
||||||
|
Stream
|
||||||
|
.emits(files)
|
||||||
|
.map(f => (f.fileName.toString, Files[IO].readAll(f)))
|
||||||
|
.covary[IO]
|
||||||
|
.through(Zip.zip[IO](logger, 8192))
|
||||||
|
.through(Files[IO].writeAll(dir / "addon.zip"))
|
||||||
|
.compile
|
||||||
|
.drain
|
||||||
|
.as(dir / "addon.zip")
|
||||||
|
|
||||||
|
private def createDescriptor(
|
||||||
|
dir: Path,
|
||||||
|
name: String,
|
||||||
|
version: String,
|
||||||
|
collectOutput: Boolean
|
||||||
|
): IO[Path] = {
|
||||||
|
val meta = AddonMeta(
|
||||||
|
meta = AddonMeta.Meta(name, version, None),
|
||||||
|
triggers = Set(AddonTriggerType.ExistingItem: AddonTriggerType).some,
|
||||||
|
args = None,
|
||||||
|
runner =
|
||||||
|
AddonMeta.Runner(None, None, AddonMeta.TrivialRunner(true, "addon.sh").some).some,
|
||||||
|
options =
|
||||||
|
AddonMeta.Options(networking = !collectOutput, collectOutput = collectOutput).some
|
||||||
|
)
|
||||||
|
|
||||||
|
Stream
|
||||||
|
.emit(meta.asJson.noSpaces)
|
||||||
|
.covary[IO]
|
||||||
|
.through(fs2.text.utf8.encode)
|
||||||
|
.through(Files[IO].writeAll(dir / "docspell-addon.json"))
|
||||||
|
.compile
|
||||||
|
.drain
|
||||||
|
.as(dir / "docspell-addon.json")
|
||||||
|
}
|
||||||
|
|
||||||
|
private def createScript(dir: Path, content: String): IO[Path] = {
|
||||||
|
val scriptFile = dir / "addon.sh"
|
||||||
|
Stream
|
||||||
|
.emit(s"""
|
||||||
|
|#!/usr/bin/env bash
|
||||||
|
|
|
||||||
|
|$content
|
||||||
|
|
|
||||||
|
|""".stripMargin)
|
||||||
|
.covary[IO]
|
||||||
|
.through(fs2.text.utf8.encode)
|
||||||
|
.through(Files[IO].writeAll(scriptFile))
|
||||||
|
.compile
|
||||||
|
.drain
|
||||||
|
.as(scriptFile)
|
||||||
|
.flatTap(f =>
|
||||||
|
Files[IO].setPosixPermissions(f, PosixPermissions.fromOctal("777").get)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
@ -13,7 +13,8 @@ import fs2.io.file.{Files, Path, PosixPermissions}
|
|||||||
import docspell.addons.AddonExecutorConfig._
|
import docspell.addons.AddonExecutorConfig._
|
||||||
import docspell.addons.AddonMeta._
|
import docspell.addons.AddonMeta._
|
||||||
import docspell.addons.AddonTriggerType._
|
import docspell.addons.AddonTriggerType._
|
||||||
import docspell.common.{Duration, LenientUri}
|
import docspell.common.exec.Env
|
||||||
|
import docspell.common.{Duration, Ident, LenientUri}
|
||||||
import docspell.logging.TestLoggingConfig
|
import docspell.logging.TestLoggingConfig
|
||||||
|
|
||||||
import munit.CatsEffectSuite
|
import munit.CatsEffectSuite
|
||||||
@ -22,6 +23,8 @@ trait Fixtures extends TestLoggingConfig { self: CatsEffectSuite =>
|
|||||||
|
|
||||||
val files: Files[IO] = Files[IO]
|
val files: Files[IO] = Files[IO]
|
||||||
|
|
||||||
|
def id(str: String): Ident = Ident.unsafe(str)
|
||||||
|
|
||||||
val dummyAddonUrl =
|
val dummyAddonUrl =
|
||||||
LenientUri.fromJava(getClass.getResource("/docspell-dummy-addon-master.zip"))
|
LenientUri.fromJava(getClass.getResource("/docspell-dummy-addon-master.zip"))
|
||||||
|
|
||||||
@ -59,13 +62,24 @@ trait Fixtures extends TestLoggingConfig { self: CatsEffectSuite =>
|
|||||||
runner: RunnerType,
|
runner: RunnerType,
|
||||||
runners: RunnerType*
|
runners: RunnerType*
|
||||||
): AddonExecutorConfig = {
|
): AddonExecutorConfig = {
|
||||||
val nspawn = NSpawn(true, "sudo", "systemd-nspawn", Duration.millis(100))
|
val nspawn = NSpawn(false, "sudo", "systemd-nspawn", Duration.millis(100))
|
||||||
AddonExecutorConfig(
|
AddonExecutorConfig(
|
||||||
runner :: runners.toList,
|
runner = runner :: runners.toList,
|
||||||
Duration.minutes(2),
|
runTimeout = Duration.minutes(2),
|
||||||
nspawn,
|
nspawn = nspawn,
|
||||||
NixConfig("nix", Duration.minutes(2)),
|
nixRunner = NixConfig("nix", Duration.minutes(2)),
|
||||||
DockerConfig("docker", Duration.minutes(2))
|
dockerRunner = DockerConfig("docker", Duration.minutes(2)),
|
||||||
|
failFast = true
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def createInputEnv(
|
||||||
|
dir: Path,
|
||||||
|
addon: Resource[IO, AddonArchive],
|
||||||
|
more: Resource[IO, AddonArchive]*
|
||||||
|
): Resource[IO, InputEnv] =
|
||||||
|
(addon :: more.toList)
|
||||||
|
.traverse(_.map(a => AddonRef(a, "")))
|
||||||
|
.map(addons => InputEnv(addons, dir, dir, dir, Env.empty))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -83,7 +83,7 @@ object OJob {
|
|||||||
else
|
else
|
||||||
pubsub.publish1IgnoreErrors(
|
pubsub.publish1IgnoreErrors(
|
||||||
JobDone.topic,
|
JobDone.topic,
|
||||||
JobDone(job.id, job.group, job.task, job.args, JobState.Cancelled)
|
JobDone(job.id, job.group, job.task, job.args, JobState.Cancelled, None)
|
||||||
)
|
)
|
||||||
} yield JobCancelResult.removed
|
} yield JobCancelResult.removed
|
||||||
|
|
||||||
|
@ -829,6 +829,11 @@ Docpell Update Check
|
|||||||
container-wait = "100 millis"
|
container-wait = "100 millis"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# When multiple addons are executed sequentially, stop after the
|
||||||
|
# first failing result. If this is false, then subsequent addons
|
||||||
|
# will be run for their side effects only.
|
||||||
|
fail-fast = true
|
||||||
|
|
||||||
# The timeout for running an addon.
|
# The timeout for running an addon.
|
||||||
run-timeout = "15 minutes"
|
run-timeout = "15 minutes"
|
||||||
|
|
||||||
|
@ -14,6 +14,8 @@ import docspell.pubsub.api.PubSubT
|
|||||||
import docspell.restserver.ws.OutputEvent
|
import docspell.restserver.ws.OutputEvent
|
||||||
import docspell.scheduler.msg.{JobDone, JobSubmitted}
|
import docspell.scheduler.msg.{JobDone, JobSubmitted}
|
||||||
|
|
||||||
|
import io.circe.parser
|
||||||
|
|
||||||
/** Subscribes to those events from docspell that are forwarded to the websocket endpoints
|
/** Subscribes to those events from docspell that are forwarded to the websocket endpoints
|
||||||
*/
|
*/
|
||||||
object Subscriptions {
|
object Subscriptions {
|
||||||
@ -27,7 +29,14 @@ object Subscriptions {
|
|||||||
def jobDone[F[_]](pubSub: PubSubT[F]): Stream[F, OutputEvent] =
|
def jobDone[F[_]](pubSub: PubSubT[F]): Stream[F, OutputEvent] =
|
||||||
pubSub
|
pubSub
|
||||||
.subscribe(JobDone.topic)
|
.subscribe(JobDone.topic)
|
||||||
.map(m => OutputEvent.JobDone(m.body.group, m.body.task))
|
.map(m =>
|
||||||
|
OutputEvent.JobDone(
|
||||||
|
m.body.group,
|
||||||
|
m.body.task,
|
||||||
|
parser.parse(m.body.args).toOption,
|
||||||
|
m.body.result
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def jobSubmitted[F[_]](pubSub: PubSubT[F]): Stream[F, OutputEvent] =
|
def jobSubmitted[F[_]](pubSub: PubSubT[F]): Stream[F, OutputEvent] =
|
||||||
pubSub
|
pubSub
|
||||||
|
@ -40,12 +40,20 @@ object OutputEvent {
|
|||||||
Msg("job-submitted", task).asJson
|
Msg("job-submitted", task).asJson
|
||||||
}
|
}
|
||||||
|
|
||||||
final case class JobDone(group: Ident, task: Ident) extends OutputEvent {
|
final case class JobDone(
|
||||||
|
group: Ident,
|
||||||
|
task: Ident,
|
||||||
|
args: Option[Json],
|
||||||
|
result: Option[Json]
|
||||||
|
) extends OutputEvent {
|
||||||
def forCollective(token: AuthToken): Boolean =
|
def forCollective(token: AuthToken): Boolean =
|
||||||
token.account.collective == group
|
token.account.collective == group
|
||||||
|
|
||||||
def asJson: Json =
|
def asJson: Json =
|
||||||
Msg("job-done", task).asJson
|
Msg(
|
||||||
|
"job-done",
|
||||||
|
Map("task" -> task.asJson, "args" -> args.asJson, "result" -> result.asJson)
|
||||||
|
).asJson
|
||||||
}
|
}
|
||||||
|
|
||||||
final case class JobsWaiting(collective: Ident, count: Int) extends OutputEvent {
|
final case class JobsWaiting(collective: Ident, count: Int) extends OutputEvent {
|
||||||
|
@ -10,7 +10,7 @@ import docspell.common._
|
|||||||
import docspell.pubsub.api.{Topic, TypedTopic}
|
import docspell.pubsub.api.{Topic, TypedTopic}
|
||||||
|
|
||||||
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
|
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
|
||||||
import io.circe.{Decoder, Encoder}
|
import io.circe.{Decoder, Encoder, Json}
|
||||||
|
|
||||||
/** Message to notify about finished jobs. They have a final state. */
|
/** Message to notify about finished jobs. They have a final state. */
|
||||||
final case class JobDone(
|
final case class JobDone(
|
||||||
@ -18,7 +18,8 @@ final case class JobDone(
|
|||||||
group: Ident,
|
group: Ident,
|
||||||
task: Ident,
|
task: Ident,
|
||||||
args: String,
|
args: String,
|
||||||
state: JobState
|
state: JobState,
|
||||||
|
result: Option[Json]
|
||||||
)
|
)
|
||||||
object JobDone {
|
object JobDone {
|
||||||
implicit val jsonDecoder: Decoder[JobDone] =
|
implicit val jsonDecoder: Decoder[JobDone] =
|
||||||
|
@ -231,7 +231,7 @@ final class SchedulerImpl[F[_]: Async](
|
|||||||
_ <- Sync[F].whenA(JobState.isDone(finishState))(
|
_ <- Sync[F].whenA(JobState.isDone(finishState))(
|
||||||
pubSub.publish1IgnoreErrors(
|
pubSub.publish1IgnoreErrors(
|
||||||
JobDone.topic,
|
JobDone.topic,
|
||||||
JobDone(job.id, job.group, job.task, job.args, finishState)
|
JobDone(job.id, job.group, job.task, job.args, finishState, result.json)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
_ <- Sync[F].whenA(JobState.isDone(finishState))(
|
_ <- Sync[F].whenA(JobState.isDone(finishState))(
|
||||||
|
@ -314,13 +314,16 @@ updateWithSub msg model =
|
|||||||
|
|
||||||
ReceiveWsMessage data ->
|
ReceiveWsMessage data ->
|
||||||
case data of
|
case data of
|
||||||
Ok (JobDone task) ->
|
Ok (JobDone details) ->
|
||||||
let
|
let
|
||||||
isProcessItem =
|
isProcessItem =
|
||||||
task == "process-item"
|
details.task == "process-item"
|
||||||
|
|
||||||
isDownloadZip =
|
isDownloadZip =
|
||||||
task == "download-query-zip"
|
details.task == "download-query-zip"
|
||||||
|
|
||||||
|
isAddonExistingItem =
|
||||||
|
Data.ServerEvent.isAddonExistingItem model.itemDetailModel.detail.item.id details
|
||||||
|
|
||||||
newModel =
|
newModel =
|
||||||
{ model
|
{ model
|
||||||
@ -337,6 +340,9 @@ updateWithSub msg model =
|
|||||||
else if Page.isDashboardPage model.page && isProcessItem then
|
else if Page.isDashboardPage model.page && isProcessItem then
|
||||||
updateDashboard texts Page.Dashboard.Data.reloadDashboardData newModel
|
updateDashboard texts Page.Dashboard.Data.reloadDashboardData newModel
|
||||||
|
|
||||||
|
else if Page.isDetailPage model.page && isAddonExistingItem then
|
||||||
|
updateItemDetail texts (Page.ItemDetail.Data.ReloadItem True) newModel
|
||||||
|
|
||||||
else
|
else
|
||||||
( newModel, Cmd.none, Sub.none )
|
( newModel, Cmd.none, Sub.none )
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ view texts model =
|
|||||||
, a
|
, a
|
||||||
[ class S.successMessageLink
|
[ class S.successMessageLink
|
||||||
, href "#"
|
, href "#"
|
||||||
, onClick ReloadItem
|
, onClick (ReloadItem False)
|
||||||
]
|
]
|
||||||
[ text texts.refreshNow
|
[ text texts.refreshNow
|
||||||
]
|
]
|
||||||
|
@ -281,7 +281,7 @@ initSelectViewModel =
|
|||||||
|
|
||||||
type Msg
|
type Msg
|
||||||
= ToggleMenu
|
= ToggleMenu
|
||||||
| ReloadItem
|
| ReloadItem Bool
|
||||||
| Init
|
| Init
|
||||||
| SetItem ItemDetail
|
| SetItem ItemDetail
|
||||||
| SetActiveAttachment Int
|
| SetActiveAttachment Int
|
||||||
|
@ -387,12 +387,22 @@ update inav env msg model =
|
|||||||
resultModel
|
resultModel
|
||||||
{ model | menuOpen = not model.menuOpen }
|
{ model | menuOpen = not model.menuOpen }
|
||||||
|
|
||||||
ReloadItem ->
|
ReloadItem withFile ->
|
||||||
if model.item.id == "" then
|
if model.item.id == "" then
|
||||||
resultModel model
|
resultModel model
|
||||||
|
|
||||||
else
|
else
|
||||||
resultModelCmd ( model, Api.itemDetail env.flags model.item.id GetItemResp )
|
resultModelCmd
|
||||||
|
( model
|
||||||
|
, Cmd.batch
|
||||||
|
[ Api.itemDetail env.flags model.item.id GetItemResp
|
||||||
|
, if withFile then
|
||||||
|
Ports.refreshFileView "ds-pdf-view-iframe"
|
||||||
|
|
||||||
|
else
|
||||||
|
Cmd.none
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
FolderDropdownMsg m ->
|
FolderDropdownMsg m ->
|
||||||
let
|
let
|
||||||
@ -1002,7 +1012,7 @@ update inav env msg model =
|
|||||||
|
|
||||||
DeleteAttachResp (Ok res) ->
|
DeleteAttachResp (Ok res) ->
|
||||||
if res.success then
|
if res.success then
|
||||||
update inav env ReloadItem model
|
update inav env (ReloadItem False) model
|
||||||
|
|
||||||
else
|
else
|
||||||
resultModel model
|
resultModel model
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Data.ServerEvent exposing (AddonInfo, ServerEvent(..), decode)
|
module Data.ServerEvent exposing (AddonInfo, JobDoneDetails, ServerEvent(..), decode, isAddonExistingItem)
|
||||||
|
|
||||||
import Json.Decode as D
|
import Json.Decode as D
|
||||||
import Json.Decode.Pipeline as P
|
import Json.Decode.Pipeline as P
|
||||||
@ -13,7 +13,7 @@ import Json.Decode.Pipeline as P
|
|||||||
|
|
||||||
type ServerEvent
|
type ServerEvent
|
||||||
= JobSubmitted String
|
= JobSubmitted String
|
||||||
| JobDone String
|
| JobDone JobDoneDetails
|
||||||
| JobsWaiting Int
|
| JobsWaiting Int
|
||||||
| AddonInstalled AddonInfo
|
| AddonInstalled AddonInfo
|
||||||
|
|
||||||
@ -26,6 +26,32 @@ type alias AddonInfo =
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
type alias JobDoneDetails =
|
||||||
|
{ task : String
|
||||||
|
, args : Maybe D.Value
|
||||||
|
, result : Maybe D.Value
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
{-| Return wether the job done details belong to running an addon of
|
||||||
|
that item with the given id.
|
||||||
|
-}
|
||||||
|
isAddonExistingItem : String -> JobDoneDetails -> Bool
|
||||||
|
isAddonExistingItem itemId details =
|
||||||
|
let
|
||||||
|
itemIdDecoder =
|
||||||
|
D.field "itemId" D.string
|
||||||
|
|
||||||
|
-- This decodes the structure from scalas ItemAddonTaskArgs (only itemId)
|
||||||
|
decodedId =
|
||||||
|
Maybe.map (D.decodeValue itemIdDecoder) details.args
|
||||||
|
|> Maybe.andThen Result.toMaybe
|
||||||
|
in
|
||||||
|
details.task
|
||||||
|
== "addon-existing-item"
|
||||||
|
&& (itemId /= "" && decodedId == Just itemId)
|
||||||
|
|
||||||
|
|
||||||
addonInfoDecoder : D.Decoder AddonInfo
|
addonInfoDecoder : D.Decoder AddonInfo
|
||||||
addonInfoDecoder =
|
addonInfoDecoder =
|
||||||
D.succeed AddonInfo
|
D.succeed AddonInfo
|
||||||
@ -51,8 +77,7 @@ decodeTag : String -> D.Decoder ServerEvent
|
|||||||
decodeTag tag =
|
decodeTag tag =
|
||||||
case tag of
|
case tag of
|
||||||
"job-done" ->
|
"job-done" ->
|
||||||
D.field "content" D.string
|
D.field "content" (D.map JobDone decodeJobDoneDetails)
|
||||||
|> D.map JobDone
|
|
||||||
|
|
||||||
"job-submitted" ->
|
"job-submitted" ->
|
||||||
D.field "content" D.string
|
D.field "content" D.string
|
||||||
@ -68,3 +93,11 @@ decodeTag tag =
|
|||||||
|
|
||||||
_ ->
|
_ ->
|
||||||
D.fail ("Unknown tag: " ++ tag)
|
D.fail ("Unknown tag: " ++ tag)
|
||||||
|
|
||||||
|
|
||||||
|
decodeJobDoneDetails : D.Decoder JobDoneDetails
|
||||||
|
decodeJobDoneDetails =
|
||||||
|
D.map3 JobDoneDetails
|
||||||
|
(D.field "task" D.string)
|
||||||
|
(D.field "args" (D.maybe D.value))
|
||||||
|
(D.field "result" (D.maybe D.value))
|
||||||
|
@ -14,6 +14,7 @@ module Page exposing
|
|||||||
, hasSidebar
|
, hasSidebar
|
||||||
, href
|
, href
|
||||||
, isDashboardPage
|
, isDashboardPage
|
||||||
|
, isDetailPage
|
||||||
, isOpen
|
, isOpen
|
||||||
, isSearchPage
|
, isSearchPage
|
||||||
, isSecured
|
, isSecured
|
||||||
@ -175,6 +176,16 @@ isDashboardPage page =
|
|||||||
False
|
False
|
||||||
|
|
||||||
|
|
||||||
|
isDetailPage : Page -> Bool
|
||||||
|
isDetailPage page =
|
||||||
|
case page of
|
||||||
|
ItemDetailPage _ ->
|
||||||
|
True
|
||||||
|
|
||||||
|
_ ->
|
||||||
|
False
|
||||||
|
|
||||||
|
|
||||||
pageName : Page -> String
|
pageName : Page -> String
|
||||||
pageName page =
|
pageName page =
|
||||||
case page of
|
case page of
|
||||||
|
@ -38,6 +38,7 @@ type Msg
|
|||||||
| ItemResp (Result Http.Error ItemDetail)
|
| ItemResp (Result Http.Error ItemDetail)
|
||||||
| ScrollResult (Result Dom.Error ())
|
| ScrollResult (Result Dom.Error ())
|
||||||
| UiSettingsUpdated
|
| UiSettingsUpdated
|
||||||
|
| ReloadItem Bool
|
||||||
|
|
||||||
|
|
||||||
type alias UpdateResult =
|
type alias UpdateResult =
|
||||||
|
@ -47,6 +47,13 @@ update inav env msg model =
|
|||||||
, selectedItems = env.selectedItems
|
, selectedItems = env.selectedItems
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ReloadItem withFiles ->
|
||||||
|
let
|
||||||
|
m =
|
||||||
|
ItemDetailMsg (Comp.ItemDetail.Model.ReloadItem withFiles)
|
||||||
|
in
|
||||||
|
update inav env m model
|
||||||
|
|
||||||
ItemDetailMsg lmsg ->
|
ItemDetailMsg lmsg ->
|
||||||
let
|
let
|
||||||
result =
|
result =
|
||||||
|
@ -11,6 +11,7 @@ port module Ports exposing
|
|||||||
, printElement
|
, printElement
|
||||||
, receiveCheckQueryResult
|
, receiveCheckQueryResult
|
||||||
, receiveServerEvent
|
, receiveServerEvent
|
||||||
|
, refreshFileView
|
||||||
, removeAccount
|
, removeAccount
|
||||||
, setAccount
|
, setAccount
|
||||||
, setUiTheme
|
, setUiTheme
|
||||||
@ -54,6 +55,11 @@ port printElement : String -> Cmd msg
|
|||||||
port receiveWsMessage : (D.Value -> msg) -> Sub msg
|
port receiveWsMessage : (D.Value -> msg) -> Sub msg
|
||||||
|
|
||||||
|
|
||||||
|
{-| Given an ID of an element that is either EMBED or IFRAME the js will reload its src
|
||||||
|
-}
|
||||||
|
port refreshFileView : String -> Cmd msg
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
--- Higher level functions based on ports
|
--- Higher level functions based on ports
|
||||||
|
|
||||||
|
@ -121,6 +121,16 @@ elmApp.ports.printElement.subscribe(function(id) {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
elmApp.ports.refreshFileView.subscribe(function(id) {
|
||||||
|
var el = document.getElementById(id);
|
||||||
|
if (el) {
|
||||||
|
var tag = el.tagName;
|
||||||
|
if (tag === "EMBED" || tag === "IFRAME") {
|
||||||
|
var url = el.src;
|
||||||
|
el.src = url;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
var dsWebSocket = null;
|
var dsWebSocket = null;
|
||||||
function closeWS() {
|
function closeWS() {
|
||||||
@ -146,6 +156,8 @@ function initWS() {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Websockets are not used yet for communicating to the server
|
||||||
// elmApp.ports.sendWsMessage.subscribe(function(msg) {
|
// elmApp.ports.sendWsMessage.subscribe(function(msg) {
|
||||||
// socket.send(msg);
|
// socket.send(msg);
|
||||||
// });
|
// });
|
||||||
|
@ -23,6 +23,7 @@ object ZolaPlugin extends AutoPlugin {
|
|||||||
"'python -m SimpleHTTPServer 1234' for example."
|
"'python -m SimpleHTTPServer 1234' for example."
|
||||||
)
|
)
|
||||||
val zolaCheck = taskKey[Unit]("Runs zola check to check links")
|
val zolaCheck = taskKey[Unit]("Runs zola check to check links")
|
||||||
|
val zolaPrepare = taskKey[Unit]("Some task to run before generating docs")
|
||||||
}
|
}
|
||||||
|
|
||||||
import autoImport._
|
import autoImport._
|
||||||
@ -33,10 +34,12 @@ object ZolaPlugin extends AutoPlugin {
|
|||||||
zolaOutputDir := target.value / "zola-site",
|
zolaOutputDir := target.value / "zola-site",
|
||||||
zolaCommand := "zola",
|
zolaCommand := "zola",
|
||||||
zolaTestBaseUrl := "http://localhost:1234",
|
zolaTestBaseUrl := "http://localhost:1234",
|
||||||
|
zolaPrepare := {},
|
||||||
zolaBuild := {
|
zolaBuild := {
|
||||||
val logger = streams.value.log
|
val logger = streams.value.log
|
||||||
logger.info("Building web site using zola ...")
|
logger.info("Building web site using zola ...")
|
||||||
(Compile / resources).value
|
(Compile / resources).value
|
||||||
|
zolaPrepare.value
|
||||||
buildSite(zolaCommand.value, zolaRootDir.value, zolaOutputDir.value, None, logger)
|
buildSite(zolaCommand.value, zolaRootDir.value, zolaOutputDir.value, None, logger)
|
||||||
logger.info("Website built")
|
logger.info("Website built")
|
||||||
},
|
},
|
||||||
@ -45,6 +48,7 @@ object ZolaPlugin extends AutoPlugin {
|
|||||||
val baseurl = zolaTestBaseUrl.value
|
val baseurl = zolaTestBaseUrl.value
|
||||||
logger.info("Building web site (test) using zola ...")
|
logger.info("Building web site (test) using zola ...")
|
||||||
(Compile / resources).value
|
(Compile / resources).value
|
||||||
|
zolaPrepare.value
|
||||||
buildSite(
|
buildSite(
|
||||||
zolaCommand.value,
|
zolaCommand.value,
|
||||||
zolaRootDir.value,
|
zolaRootDir.value,
|
||||||
|
@ -19,7 +19,7 @@ out_base="$1"
|
|||||||
work_dir=$(mktemp -dt screenshot2-script.XXXXXX)
|
work_dir=$(mktemp -dt screenshot2-script.XXXXXX)
|
||||||
export HOME=$work_dir
|
export HOME=$work_dir
|
||||||
export RATIO="16:9"
|
export RATIO="16:9"
|
||||||
export WAIT_SEC=4
|
export WAIT_SEC=${WAIT_SEC:-4}
|
||||||
#export TOP_CUT=400
|
#export TOP_CUT=400
|
||||||
|
|
||||||
dsc write-default-config
|
dsc write-default-config
|
||||||
|
581
website/site/content/blog/2022-05-16_audio_file_addon.md
Normal file
@ -0,0 +1,581 @@
|
|||||||
|
+++
|
||||||
|
title = "Addon for audio file support"
|
||||||
|
[extra]
|
||||||
|
author = "eikek"
|
||||||
|
+++
|
||||||
|
|
||||||
|
# 1st Addon: Audio file support
|
||||||
|
|
||||||
|
Since version 0.36.0 Docspell can be extended by
|
||||||
|
[addons](@/docs/addons/basics.md) - external programs that are
|
||||||
|
executed at some defined point in Docspell. This is a walk through the
|
||||||
|
first addon that was created, mainly as an example: providing support
|
||||||
|
for audio files.
|
||||||
|
|
||||||
|
<!-- more -->
|
||||||
|
|
||||||
|
I think it is interesting to provide support for audio files for a
|
||||||
|
DMS, although admittedly I don't have much of a use :). But this is
|
||||||
|
the kind of use-case that addons are for.
|
||||||
|
|
||||||
|
# The idea
|
||||||
|
|
||||||
|
The idea is very simple: the real work is done by external programs,
|
||||||
|
most notably [coqui's stt](https://github.com/coqui-ai/STT) a deep
|
||||||
|
learning toolkit originally created at Mozilla. It provides a command
|
||||||
|
line tool that accepts a WAV file and spits out text. Perfect!
|
||||||
|
|
||||||
|
With this text, a PDF file can be created and a preview image which is
|
||||||
|
already enough for basic support. You can see the pdf in the web-ui
|
||||||
|
and search for the text via SOLR or PostgreSQL.
|
||||||
|
|
||||||
|
Because a WAV file is not the most popular format today, `ffmpeg` can
|
||||||
|
be used to transform any other audio to WAV.
|
||||||
|
|
||||||
|
The only thing now is to create a program that checks the uploaded
|
||||||
|
files, filters out all audio files and runs them through the mentioned
|
||||||
|
programs. So let's do this.
|
||||||
|
|
||||||
|
# Preparation
|
||||||
|
|
||||||
|
Addons are external programs and can be written in whatever language….
|
||||||
|
For me this is a good opportunity to refresh my rusty scheme know-how
|
||||||
|
a bit. So this addon is written in Scheme, in particular
|
||||||
|
[guile](https://www.gnu.org/software/guile/). Programming in scheme is
|
||||||
|
fun and guile provides good integration into the (posix) OS and also
|
||||||
|
has a nice JSON module. I had the [reference
|
||||||
|
docs](https://www.gnu.org/software/guile/docs/docs-2.2/guile-ref/index.html)
|
||||||
|
open all the time - look at them for further details on the used
|
||||||
|
functions.
|
||||||
|
|
||||||
|
It's usually good to play around with the tools at first. For stt, we
|
||||||
|
first need to download a *model*. This will be used to "detect" the
|
||||||
|
text in the audio data. They have a [page](https://coqui.ai/models)
|
||||||
|
where we can download model files for any supported language. For the
|
||||||
|
addon, we will implement English and German.
|
||||||
|
|
||||||
|
When creating a PDF with wkhtmltopdf, we prettify it a little by
|
||||||
|
embedding the plain text into some html template. This will also take
|
||||||
|
care to specifiy UTF-8 as default encoding directly in the HTML
|
||||||
|
template.
|
||||||
|
|
||||||
|
FFMpeg just works as usual. It figures out the input format
|
||||||
|
automatically and knows from the extension of the output file what to
|
||||||
|
do.
|
||||||
|
|
||||||
|
You can find the full code
|
||||||
|
[here](https://github.com/docspell/audio-files-addon/blob/master/src/addon.scm).
|
||||||
|
The following shows excerpts from it with some explanation.
|
||||||
|
|
||||||
|
# The script
|
||||||
|
|
||||||
|
## Helpers
|
||||||
|
|
||||||
|
After the preamble, there are two helper functions.
|
||||||
|
|
||||||
|
```lisp
|
||||||
|
(define* (errln formatstr . args)
|
||||||
|
(apply format (current-error-port) formatstr args)
|
||||||
|
(newline))
|
||||||
|
|
||||||
|
;; Macro for executing system commands and making this program exit in
|
||||||
|
;; case of failure.
|
||||||
|
(define-syntax sysexec
|
||||||
|
(syntax-rules ()
|
||||||
|
((sysexec exp ...)
|
||||||
|
(let ((rc (apply system* (list exp ...))))
|
||||||
|
(unless (eqv? rc EXIT_SUCCESS)
|
||||||
|
(format (current-error-port) "> '~a …' failed with: ~#*~:*~d~%" exp ... rc)
|
||||||
|
(exit 1))
|
||||||
|
#t))))
|
||||||
|
```
|
||||||
|
|
||||||
|
As this addon wants to pass data back to Docspell via stdout, we use
|
||||||
|
the stderr for logging and printing general information. The function
|
||||||
|
`errln` (short for "error line" :)) allows to conveniently print to
|
||||||
|
stderr and the second wraps the `system*` procedure such that the
|
||||||
|
script fails whenever the external program fails. It is somewhat
|
||||||
|
similar to `set -e` in bash.
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
Next is the declaration of external dependencies. At first all
|
||||||
|
external programs are listed. This is important for later, when the
|
||||||
|
script is packaged via nix. Nix will substitute these commands with
|
||||||
|
absolute paths. Then it's good to not have them scattered around.
|
||||||
|
|
||||||
|
It also reads in the expected environment variables (only those we
|
||||||
|
need) that are provided by Docspell. Since this addon only makes sense
|
||||||
|
to work on an item, it quits early should some env vars are missing.
|
||||||
|
|
||||||
|
```lisp
|
||||||
|
(define *curl* "curl")
|
||||||
|
(define *ffmpeg* "ffmpeg")
|
||||||
|
(define *stt* "stt")
|
||||||
|
(define *wkhtmltopdf* "wkhtmltopdf")
|
||||||
|
|
||||||
|
;; Getting some environment variables
|
||||||
|
(define *output-dir* (getenv "OUTPUT_DIR"))
|
||||||
|
(define *tmp-dir* (getenv "TMP_DIR"))
|
||||||
|
(define *cache-dir* (getenv "CACHE_DIR"))
|
||||||
|
|
||||||
|
(define *item-data-json* (getenv "ITEM_DATA_JSON"))
|
||||||
|
(define *original-files-json* (getenv "ITEM_ORIGINAL_JSON"))
|
||||||
|
(define *original-files-dir* (getenv "ITEM_ORIGINAL_DIR"))
|
||||||
|
|
||||||
|
;; fail early if not in the right context
|
||||||
|
(when (not *item-data-json*)
|
||||||
|
(errln "No item data json file found.")
|
||||||
|
(exit 1))
|
||||||
|
```
|
||||||
|
|
||||||
|
## Input/Output
|
||||||
|
|
||||||
|
The input and output schemas can be defined now. This uses the
|
||||||
|
[guile-json](https://github.com/aconchillo/guile-json) module. It
|
||||||
|
provides very convenient features for reading and writing json.
|
||||||
|
|
||||||
|
It is possible to define a record via `define-json-type` that
|
||||||
|
generates readers and writers to/from JSON. For example, the record
|
||||||
|
`<itemdata>` is defined to be an object with only one field `id`. The
|
||||||
|
function `json->scm` reads in json into scheme datastructures and then
|
||||||
|
the generated function `scm->itemdata` creates the record from it. For
|
||||||
|
every record, accessor functions exists. For example: `(itemdata-id
|
||||||
|
data)` would lookup the field `id` in the given itemdata record
|
||||||
|
`data`.
|
||||||
|
|
||||||
|
Here we need it to get the item-id and the list of file properties
|
||||||
|
belonging to the original uploaded files.
|
||||||
|
|
||||||
|
Another interesting definition is the `<output>` record. This captures
|
||||||
|
(a subset of) the schema of what Docspell receives from this addon as
|
||||||
|
a result. A full example of this data is
|
||||||
|
[here](@/docs/addons/writing.md#output). We don't need `commands` or
|
||||||
|
`newItems`, so this schema only cares about the `files` attribute.
|
||||||
|
|
||||||
|
|
||||||
|
```lisp
|
||||||
|
(define-json-type <itemdata>
|
||||||
|
(id))
|
||||||
|
|
||||||
|
;; The array of original files
|
||||||
|
(define-json-type <original-file>
|
||||||
|
(id)
|
||||||
|
(name)
|
||||||
|
(position)
|
||||||
|
(language)
|
||||||
|
(mimetype)
|
||||||
|
(length)
|
||||||
|
(checksum))
|
||||||
|
|
||||||
|
;; The output record, what is returned to docspell
|
||||||
|
(define-json-type <itemfiles>
|
||||||
|
(itemId)
|
||||||
|
(textFiles)
|
||||||
|
(pdfFiles))
|
||||||
|
(define-json-type <output>
|
||||||
|
(files "files" #(<itemfiles>)))
|
||||||
|
|
||||||
|
;; Parses the JSON containing the item information
|
||||||
|
(define *itemdata-json*
|
||||||
|
(scm->itemdata (call-with-input-file *item-data-json* json->scm)))
|
||||||
|
|
||||||
|
;; The JSON file containing meta data for all source files as vector.
|
||||||
|
(define *original-meta-json*
|
||||||
|
(let ((props (vector->list (call-with-input-file *original-files-json* json->scm))))
|
||||||
|
(map scm->original-file props)))
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Finding the audio file
|
||||||
|
|
||||||
|
The previously parsed json array `*original-meta-json*` can now be
|
||||||
|
used to find any audio files within the original uploaded files, as
|
||||||
|
done in `find-audio-files`. It simply goes through the list and keeps
|
||||||
|
those files whose mimetype starts with `audio/`. The mimetype is
|
||||||
|
provided by Docspell in the file properties in `ITEM_ORIGINAL_JSON`.
|
||||||
|
|
||||||
|
Before converting to wav with ffmpeg, it is quickly checked if it's
|
||||||
|
not a wav already.
|
||||||
|
|
||||||
|
|
||||||
|
```lisp
|
||||||
|
(define (is-wav? mime)
|
||||||
|
"Test whether the mimetype MIME is denoting a wav file."
|
||||||
|
(or (string-suffix? "/wav" mime)
|
||||||
|
(string-suffix? "/x-wav" mime)
|
||||||
|
(string-suffix? "/vnd.wav" mime)))
|
||||||
|
|
||||||
|
(define (find-audio-files)
|
||||||
|
"Find all source files that are audio files."
|
||||||
|
(filter! (lambda (el)
|
||||||
|
(string-prefix?
|
||||||
|
"audio/"
|
||||||
|
(original-file-mimetype el)))
|
||||||
|
*original-meta-json*))
|
||||||
|
|
||||||
|
(define (convert-wav id mime)
|
||||||
|
"Run ffmpeg to convert to wav."
|
||||||
|
(let ((src-file (format #f "~a/~a" *original-files-dir* id))
|
||||||
|
(out-file (format #f "~a/in.wav" *tmp-dir*)))
|
||||||
|
(if (is-wav? mime)
|
||||||
|
src-file
|
||||||
|
(begin
|
||||||
|
(errln "Running ffmpeg to convert wav file...")
|
||||||
|
(sysexec *ffmpeg* "-loglevel" "error" "-y" "-i" src-file out-file)
|
||||||
|
out-file))))
|
||||||
|
```
|
||||||
|
|
||||||
|
## Speech to text
|
||||||
|
|
||||||
|
Once we have a wav file, we can run speech-to-text recognition on it.
|
||||||
|
As said above, we need to download a model first, which is depending
|
||||||
|
on a language. Luckily, Docspell provides the language of the file.
|
||||||
|
This is the lanugage either given directly by the user when uploading
|
||||||
|
or it's the collective's default language.
|
||||||
|
|
||||||
|
In the following snippet, we get the language as arguments. We will
|
||||||
|
get it later from the file properties.
|
||||||
|
|
||||||
|
As seen below, the model file is stored to the `CACHE_DIR`. This is
|
||||||
|
provided by Docspell and will survive the execution of this script.
|
||||||
|
All other directories involved will be deleted eventually. The
|
||||||
|
`CACHE_DIR` is the place to store intermediate results you don't want
|
||||||
|
to loose between addon runs. But as any cache, it may not exist the
|
||||||
|
next time the addon is run. Docspell doesn't clear it automatically,
|
||||||
|
though.
|
||||||
|
|
||||||
|
The last function simply executes the `stt` external command and puts
|
||||||
|
stdout into a file.
|
||||||
|
|
||||||
|
```lisp
|
||||||
|
(define (get-model language)
|
||||||
|
(let* ((lang (or language "eng"))
|
||||||
|
(file (format #f "~a/model_~a.pbmm" *cache-dir* lang)))
|
||||||
|
(unless (file-exists? file)
|
||||||
|
(download-model lang file))
|
||||||
|
file))
|
||||||
|
|
||||||
|
(define (download-model lang file)
|
||||||
|
"Download model files per language. Nix has currently stt 0.9.3 packaged."
|
||||||
|
(let ((url (cond
|
||||||
|
((string= lang "eng") "https://coqui.gateway.scarf.sh/english/coqui/v0.9.3/model.pbmm")
|
||||||
|
((string= lang "deu") "https://coqui.gateway.scarf.sh/german/AASHISHAG/v0.9.0/model.pbmm")
|
||||||
|
(else (error "Unsupported language: " lang)))))
|
||||||
|
(errln "Downloading model file for language: ~a" lang)
|
||||||
|
(sysexec *curl* "-SsL" "-o" file url)
|
||||||
|
file))
|
||||||
|
|
||||||
|
(define (extract-text model input out)
|
||||||
|
"Runs stt for speech-to-text and writes the text into the file OUT."
|
||||||
|
(errln "Extracting text from audio…")
|
||||||
|
(with-output-to-file out
|
||||||
|
(lambda ()
|
||||||
|
(sysexec *stt* "--model" model "--audio" input))))
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Create PDF
|
||||||
|
|
||||||
|
Creating the PDF is straight forward. The extracted text is embedded
|
||||||
|
into a HTML file which is then passed to `wkhtmltopdf`. Since we don't
|
||||||
|
need this file for anything else, it is stored to the `TMP_DIR`.
|
||||||
|
|
||||||
|
```lisp
|
||||||
|
(define (create-pdf txt-file out)
|
||||||
|
(define (line str)
|
||||||
|
(format #t "~a\n" str))
|
||||||
|
(errln "Creating pdf file…")
|
||||||
|
(let ((tmphtml (format #f "~a/text.html" *tmp-dir*)))
|
||||||
|
(with-output-to-file tmphtml
|
||||||
|
(lambda ()
|
||||||
|
(line "<!DOCTYPE html>")
|
||||||
|
(line "<html>")
|
||||||
|
(line " <head><meta charset=\"UTF-8\"></head>")
|
||||||
|
(line " <body style=\"padding: 2em; font-size: large;\">")
|
||||||
|
(line " <div style=\"padding: 0.5em; font-size:normal; font-weight: bold; border: 1px solid black;\">")
|
||||||
|
(line " Extracted from audio using stt on ")
|
||||||
|
(display (strftime "%c" (localtime (current-time))))
|
||||||
|
(line " </div>")
|
||||||
|
(line " <p>")
|
||||||
|
(display (call-with-input-file txt-file read-string))
|
||||||
|
(line " </p>")
|
||||||
|
(line "</body></html>")))
|
||||||
|
(sysexec *wkhtmltopdf* tmphtml out)))
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Putting it together
|
||||||
|
|
||||||
|
The main function now puts everything together. The `process-file`
|
||||||
|
function is called for every file that is returned from
|
||||||
|
`(find-audio-files)`. It will extract the necessary information (like
|
||||||
|
the language) from the json document via record accessors (e.g.
|
||||||
|
`original-file-lanugage file)`) and then calls the functions defined
|
||||||
|
above. At last it creates a `<itemfile>` record with `make-itemfiles`.
|
||||||
|
|
||||||
|
An `<itemfile>` record contains now the important information for
|
||||||
|
Docspell. It requires the item-id and a mapping from attachment-ids to
|
||||||
|
files in `OUTPUT_DIR`. For each attachment identified by its ID,
|
||||||
|
Docspell replaces the extracted text with the contents of the given
|
||||||
|
file and replaces the converted PDF file, respectively. In the code
|
||||||
|
below, two lists of such mappings are defined - the first for the text
|
||||||
|
files, the second for the converted pdf. The files must be specified
|
||||||
|
relative to `OUTPUT_DIR`.
|
||||||
|
|
||||||
|
That means `process-all` returns a list of `<itemfile>` records which
|
||||||
|
is then used to create the `<output>` record. And finally, a
|
||||||
|
`output->json` function will turn the record into proper JSON which is
|
||||||
|
send to stdout.
|
||||||
|
|
||||||
|
```lisp
|
||||||
|
(define (process-file itemid file)
|
||||||
|
"Processing a single audio file."
|
||||||
|
(let* ((id (original-file-id file))
|
||||||
|
(mime (original-file-mimetype file))
|
||||||
|
(lang (original-file-language file))
|
||||||
|
(txt-file (format #f "~a/~a.txt" *output-dir* id))
|
||||||
|
(pdf-file (format #f "~a/~a.pdf" *output-dir* id))
|
||||||
|
(wav (convert-wav id mime))
|
||||||
|
(model (get-model lang)))
|
||||||
|
(extract-text model wav txt-file)
|
||||||
|
(create-pdf txt-file pdf-file)
|
||||||
|
(make-itemfiles itemid
|
||||||
|
`((,id . ,(format #f "~a.txt" id)))
|
||||||
|
`((,id . ,(format #f "~a.pdf" id))))))
|
||||||
|
|
||||||
|
(define (process-all)
|
||||||
|
(let ((item-id (itemdata-id *itemdata-json*)))
|
||||||
|
(map (lambda (file)
|
||||||
|
(process-file item-id file))
|
||||||
|
(find-audio-files))))
|
||||||
|
|
||||||
|
(define (main args)
|
||||||
|
(let ((out (make-output (process-all))))
|
||||||
|
(format #t "~a" (output->json out))))
|
||||||
|
```
|
||||||
|
|
||||||
|
Example output:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"files": [
|
||||||
|
{
|
||||||
|
"itemId":"qZDnyGIAJsXr",
|
||||||
|
"textFiles": { "HPFvIDib6eA": "HPFvIDib6eA.txt" },
|
||||||
|
"pdfFiles": { "HPFvIDib6eA": "HPFvIDib6eA.pdf"}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
# Packaging
|
||||||
|
|
||||||
|
Now with that script some additional plumbing is needed to make it an
|
||||||
|
"Addon" for Docspell.
|
||||||
|
|
||||||
|
The external tools - stt, ffmpeg, curl and wkhtmltopdf are required as
|
||||||
|
well as guile to compile and interpret the script. Also the guile-json
|
||||||
|
module must be installed.
|
||||||
|
|
||||||
|
This can turn into a quite tedious task. Luckily, there is
|
||||||
|
[nix](https://nixos.org) that has an answer to this. A user who wants
|
||||||
|
to use this script only needs to install nix. This package manager
|
||||||
|
then takes care of providing the exact dependencies we need (down to
|
||||||
|
the correct version and including guile as the language and runtime).
|
||||||
|
|
||||||
|
## A flake
|
||||||
|
|
||||||
|
Everything is defined in the `flake.nix` in the source root. It looks
|
||||||
|
like this:
|
||||||
|
|
||||||
|
```nix
|
||||||
|
{
|
||||||
|
description = "A docspell addon for basic audio file support";
|
||||||
|
|
||||||
|
inputs = {
|
||||||
|
utils.url = "github:numtide/flake-utils";
|
||||||
|
|
||||||
|
# Nixpkgs / NixOS version to use.
|
||||||
|
nixpkgs.url = "nixpkgs/nixos-21.11";
|
||||||
|
};
|
||||||
|
|
||||||
|
outputs = { self, nixpkgs, utils }:
|
||||||
|
utils.lib.eachSystem ["x86_64-linux"] (system:
|
||||||
|
let
|
||||||
|
pkgs = import nixpkgs {
|
||||||
|
inherit system;
|
||||||
|
overlays = [
|
||||||
|
|
||||||
|
];
|
||||||
|
};
|
||||||
|
name = "audio-files-addon";
|
||||||
|
in rec {
|
||||||
|
packages.${name} = pkgs.callPackage ./nix/addon.nix {
|
||||||
|
inherit name;
|
||||||
|
};
|
||||||
|
|
||||||
|
defaultPackage = packages.${name};
|
||||||
|
|
||||||
|
apps.${name} = utils.lib.mkApp {
|
||||||
|
inherit name;
|
||||||
|
drv = packages.${name};
|
||||||
|
};
|
||||||
|
defaultApp = apps.${name};
|
||||||
|
|
||||||
|
## … omitted for brevity
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
First sad thing is, that only `x86_64` systems are supported. This is
|
||||||
|
due to `stt` not being available on other platforms currently (as
|
||||||
|
provided by nixpkgs).
|
||||||
|
|
||||||
|
The rest is a bit magic: A package and "defaultPackage" is defined
|
||||||
|
with a reference to `nix/addon.nix`. The important part is the line
|
||||||
|
|
||||||
|
```nix
|
||||||
|
inputs = {
|
||||||
|
# Nixpkgs / NixOS version to use.
|
||||||
|
nixpkgs.url = "nixpkgs/nixos-21.11";
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
It says that as input for "building" the script, we take all of
|
||||||
|
[nixpkgs](https://github.com/NixOS/nixpkgs) which is a package
|
||||||
|
collection defined for (and in) nix - including thousands of software
|
||||||
|
packages. We can pick and choose from these. No surprise, all external
|
||||||
|
tools we need are included!
|
||||||
|
|
||||||
|
A flake defines the inputs and outputs of a package. With all of
|
||||||
|
nixpkgs as inputs, we can create a definition to elevate this script
|
||||||
|
into a *package*.
|
||||||
|
|
||||||
|
## Package definition
|
||||||
|
|
||||||
|
The definition for "building" the script is in `nix/addon.nix`:
|
||||||
|
|
||||||
|
```nix
|
||||||
|
{ stdenv, bash, cacert, curl, stt, wkhtmltopdf, ffmpeg, guile, guile-json, lib, name }:
|
||||||
|
|
||||||
|
stdenv.mkDerivation {
|
||||||
|
inherit name;
|
||||||
|
src = lib.sources.cleanSource ../.;
|
||||||
|
|
||||||
|
buildInputs = [ guile guile-json ];
|
||||||
|
|
||||||
|
patchPhase = ''
|
||||||
|
TARGET=src/addon.scm
|
||||||
|
sed -i 's,\*curl\* "curl",\*curl\* "${curl}/bin/curl",g' $TARGET
|
||||||
|
sed -i 's,\*ffmpeg\* "ffmpeg",\*ffmpeg\* "${ffmpeg}/bin/ffmpeg",g' $TARGET
|
||||||
|
sed -i 's,\*stt\* "stt",\*stt\* "${stt}/bin/stt",g' $TARGET
|
||||||
|
sed -i 's,\*wkhtmltopdf\* "wkhtmltopdf",\*wkhtmltopdf\* "${wkhtmltopdf}/bin/wkhtmltopdf",g' $TARGET
|
||||||
|
'';
|
||||||
|
|
||||||
|
buildPhase = ''
|
||||||
|
guild compile -o ${name}.go src/addon.scm
|
||||||
|
'';
|
||||||
|
|
||||||
|
# module name must be same as <filename>.go
|
||||||
|
installPhase = ''
|
||||||
|
mkdir -p $out/{bin,lib}
|
||||||
|
cp ${name}.go $out/lib/
|
||||||
|
|
||||||
|
cat > $out/bin/${name} <<-EOF
|
||||||
|
#!${bash}/bin/bash
|
||||||
|
export SSL_CERT_FILE="${cacert}/etc/ssl/certs/ca-bundle.crt"
|
||||||
|
exec -a "${name}" ${guile}/bin/guile -C ${guile-json}/share/guile/ccache -C $out/lib -e '(${name}) main' -c "" \$@
|
||||||
|
EOF
|
||||||
|
chmod +x $out/bin/${name}
|
||||||
|
'';
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
With a bit of handwaving - this is a bash script that modifies
|
||||||
|
slightly the scheme script and runs a compile on it. We simply declare
|
||||||
|
all packages we need in the first line of `{ … }` - these are
|
||||||
|
arguments that are automatically filled by nix by searching the
|
||||||
|
corresponding package in nixpkgs.
|
||||||
|
|
||||||
|
First the `patchPhase` is executed. It will replace the variables
|
||||||
|
containing the external tools with an absolute path to the version
|
||||||
|
that we currently get from nixpkgs. With this step nix takes care that
|
||||||
|
all these packages are available *at runtime* when executing the
|
||||||
|
script. All versions are finally fixed in `flake.lock` and can be
|
||||||
|
upgraded manually.
|
||||||
|
|
||||||
|
The `buildPhase` runs the guile compiler that produces some
|
||||||
|
intermediate code that will be loaded instead of compiling the script
|
||||||
|
on-the-fly.
|
||||||
|
|
||||||
|
At last, `installPhase` creates a wrapper script that runs guile with
|
||||||
|
the correct load-path pointing to `guile-json` and to our pre-compiled
|
||||||
|
script. Additionally, trusted root certificates are exported to make
|
||||||
|
the curl commands work. This script will be created in `$out`
|
||||||
|
directory that is provided by nix.
|
||||||
|
|
||||||
|
If you now run `nix build` in the source root, it will execute all
|
||||||
|
these phases and produce a symlink pointing to the result. You can
|
||||||
|
then `cat` the resulting file if you are curious.
|
||||||
|
|
||||||
|
This way the script is completely isolated from the system it runs
|
||||||
|
on - as long as the nix package manager is available. It includes all
|
||||||
|
the external tools, as well as the underlying runtime (guile)! The
|
||||||
|
result is a tiny wrapper bash script that can be run "everywhere"
|
||||||
|
(modulo all the restrictions, like non-x86_64 platforms, of course
|
||||||
|
:)).
|
||||||
|
|
||||||
|
|
||||||
|
## Addon Descriptor
|
||||||
|
|
||||||
|
At last, a small yaml file is needed to tell Docspell a little about
|
||||||
|
the addon.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
meta:
|
||||||
|
name: "audio-files-addon"
|
||||||
|
version: "0.1.0"
|
||||||
|
description: |
|
||||||
|
This addon adds support for audio files. Audio files are processed
|
||||||
|
by a speech-to-text engine and a pdf is generated.
|
||||||
|
|
||||||
|
It doesn't expect any user arguments at the moment. It requires
|
||||||
|
internet access to download model files.
|
||||||
|
|
||||||
|
triggers:
|
||||||
|
- final-process-item
|
||||||
|
- final-reprocess-item
|
||||||
|
- existing-item
|
||||||
|
|
||||||
|
runner:
|
||||||
|
nix:
|
||||||
|
enable: true
|
||||||
|
|
||||||
|
docker:
|
||||||
|
enable: false
|
||||||
|
|
||||||
|
trivial:
|
||||||
|
enable: true
|
||||||
|
exec: src/addon.scm
|
||||||
|
|
||||||
|
options:
|
||||||
|
networking: true
|
||||||
|
collectOutput: true
|
||||||
|
```
|
||||||
|
|
||||||
|
This tells Docspell via `triggers` when this addon may be run. This
|
||||||
|
one only makes sense for an item. Thus it can be hooked up to run with
|
||||||
|
every file-processing job or a user can manually trigger it on an
|
||||||
|
item.
|
||||||
|
|
||||||
|
It also tells via `runner:` that it can be build and run via nix, but
|
||||||
|
not via docker (I gave up after an hour to create a Dockerfile…). It
|
||||||
|
could also be run "as-is" but the user then needs to install all these
|
||||||
|
tools and guile manually.
|
||||||
|
|
||||||
|
# Done
|
||||||
|
|
||||||
|
That's it. You can install this addon in Docspell and create a run
|
||||||
|
configuration to let it execute when you want.
|
11
website/site/content/docs/addons/_index.md
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
+++
|
||||||
|
title = "Addons"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "Describes how addons work."
|
||||||
|
weight = 55
|
||||||
|
template = "pages.html"
|
||||||
|
sort_by = "weight"
|
||||||
|
redirect_to = "docs/addons/basics"
|
||||||
|
+++
|
||||||
|
|
||||||
|
No content here.
|
BIN
website/site/content/docs/addons/addon-install-01.png
Normal file
After Width: | Height: | Size: 105 KiB |
BIN
website/site/content/docs/addons/addon-install-01_dark.png
Normal file
After Width: | Height: | Size: 109 KiB |
BIN
website/site/content/docs/addons/addon-install-02.png
Normal file
After Width: | Height: | Size: 155 KiB |
BIN
website/site/content/docs/addons/addon-install-02_dark.png
Normal file
After Width: | Height: | Size: 164 KiB |
BIN
website/site/content/docs/addons/addon-install-03.png
Normal file
After Width: | Height: | Size: 100 KiB |
BIN
website/site/content/docs/addons/addon-install-03_dark.png
Normal file
After Width: | Height: | Size: 100 KiB |
BIN
website/site/content/docs/addons/addon-install-04.png
Normal file
After Width: | Height: | Size: 238 KiB |
BIN
website/site/content/docs/addons/addon-install-04_dark.png
Normal file
After Width: | Height: | Size: 242 KiB |
149
website/site/content/docs/addons/basics.md
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
+++
|
||||||
|
title = "Basics"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "Docspell Addons."
|
||||||
|
weight = 10
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
# Addons
|
||||||
|
|
||||||
|
Addons allow to execute custom software within a defined context in
|
||||||
|
Docspell. The idea is to be able to support new features and amend
|
||||||
|
existing ones.
|
||||||
|
|
||||||
|
{% warningbubble(title="Experimental") %} Addons are considered
|
||||||
|
experimental. The interaction between addons and Docspell is still
|
||||||
|
subject to change.
|
||||||
|
|
||||||
|
The intended audience for addons are developers (to create addons) and
|
||||||
|
technically inclined users to install, configure and use them.
|
||||||
|
{% end %}
|
||||||
|
|
||||||
|
Despite the warning above, addons are a nice way to amend your
|
||||||
|
docspell server with new things, you are encouraged to try it out and
|
||||||
|
give feedback ;-).
|
||||||
|
|
||||||
|
{% infobubble(title="Enable addons manually") %}
|
||||||
|
Addons are disabled by default. They must be enabled in the config
|
||||||
|
file of the restserver!
|
||||||
|
{% end %}
|
||||||
|
|
||||||
|
|
||||||
|
## What is an Addon?
|
||||||
|
|
||||||
|
An addon is a zip file that contains a `docspell-addon.yml` (or .yaml
|
||||||
|
or .json) file in its root. The `docspell-addon.yml` is the *addon
|
||||||
|
descriptor* telling how to run and optionally build the addon. In the
|
||||||
|
ZIP file, an addon provides a program that expects one argument which
|
||||||
|
is a file containing the user input for the addon. Addons can
|
||||||
|
communicate back to docspell via their stdout and/or via directly
|
||||||
|
calling the docspell server as part of their program.
|
||||||
|
|
||||||
|
|
||||||
|
## What can Addons do?
|
||||||
|
|
||||||
|
Addons can accept user input and are arbitrary external programs that
|
||||||
|
can do whatever they want. However, Docspell can embed running addons
|
||||||
|
in restricted environments, where they don't have network for example.
|
||||||
|
Addons can safely communicate to Docspell via their stdout output
|
||||||
|
returning instructions that Docspell will realise.
|
||||||
|
|
||||||
|
Running addons is managed by docspell. Currently they can be executed:
|
||||||
|
|
||||||
|
- as the final step when processing or re-procssing an item. They then
|
||||||
|
have access to all the item data that has been collected during
|
||||||
|
processing (id, extracted text, converted pdfs, etc) and it can work
|
||||||
|
with that. It may, for example, set more tags or custom fields.
|
||||||
|
- trigger manually on some existing item
|
||||||
|
- periodically defined by a schedule. This executes the addons only
|
||||||
|
with the configured user input.
|
||||||
|
- … (maybe more to come)
|
||||||
|
|
||||||
|
Since an addon may not make sense to run on all these situations, it
|
||||||
|
must define a sensible subset via the `triggers` option in its
|
||||||
|
descriptor.
|
||||||
|
|
||||||
|
|
||||||
|
## How are they run
|
||||||
|
|
||||||
|
Addons are always executed by the joex component as an external
|
||||||
|
process, therefore they can be written in any programming or scripting
|
||||||
|
language.
|
||||||
|
|
||||||
|
That means the machine running joex possibly needs to match the
|
||||||
|
requirements of each addon. To ease this, addons can provide a [nix
|
||||||
|
descripton](https://nixos.wiki/wiki/Flakes) or a `Dockerfile`. Then
|
||||||
|
you need to prepare the machine only with two things (nix and docker)
|
||||||
|
to have the prerequisites for running many addons.
|
||||||
|
|
||||||
|
|
||||||
|
# More …
|
||||||
|
|
||||||
|
Addons are a flexible way to extend Docspell and require some
|
||||||
|
technical affinity. However, only "using" addons should not be that
|
||||||
|
hard, but it will always depend on the documentation of the addon and
|
||||||
|
its own complexity.
|
||||||
|
|
||||||
|
As the user, you may have different views: preparing the server to be
|
||||||
|
able to run addons, writing your own addons and finally using them
|
||||||
|
|
||||||
|
The following sections are divided these perspectives:
|
||||||
|
|
||||||
|
## Using Addons
|
||||||
|
|
||||||
|
Addons must be installed and then configured in order before they can
|
||||||
|
be used. [Using Addons](@/docs/addons/using.md) describes this
|
||||||
|
perspective.
|
||||||
|
|
||||||
|
{{ buttonright(href="/docs/addons/using", text="More…") }}
|
||||||
|
|
||||||
|
## Control how addons are run
|
||||||
|
|
||||||
|
As the owner of your server, you want to [control how addons are
|
||||||
|
run](@/docs/addons/control.md). Since addons are arbitrary programs,
|
||||||
|
potentially downloaded from the internet, they can be run in a
|
||||||
|
restricted environment.
|
||||||
|
|
||||||
|
{{ buttonright(href="/docs/addons/control", text="More…") }}
|
||||||
|
|
||||||
|
|
||||||
|
## Write custom addons
|
||||||
|
|
||||||
|
Finally, [writing addons](@/docs/addons/writing.md) requires (among
|
||||||
|
other things) to know how to interact with Docspell and what package
|
||||||
|
format is expected.
|
||||||
|
|
||||||
|
{{ buttonright(href="/docs/addons/writing", text="More…") }}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<!-- ## Goals -->
|
||||||
|
|
||||||
|
<!-- - Convenient for addon creators. Addons can be written in any -->
|
||||||
|
<!-- programming language and have a very light contract: they receive -->
|
||||||
|
<!-- one input argument and _may_ return structured data to instruct -->
|
||||||
|
<!-- docspell what to do. If not they can execute abritrary code to call -->
|
||||||
|
<!-- the server directly. -->
|
||||||
|
<!-- - Server administrators control how they are executed. Since addons -->
|
||||||
|
<!-- may run anything, the execution should be able to locked down when -->
|
||||||
|
<!-- wanted. -->
|
||||||
|
<!-- - Users can install and configure addons via the web interface easily. -->
|
||||||
|
<!-- It should be easy for addon creators to document how users can use -->
|
||||||
|
<!-- them. -->
|
||||||
|
|
||||||
|
|
||||||
|
<!-- # TODOs -->
|
||||||
|
|
||||||
|
<!-- - what if joex is running inside a container alread? -->
|
||||||
|
<!-- - some use cases: -->
|
||||||
|
<!-- - I want an addon to do some stuff when processing files -->
|
||||||
|
<!-- - my files named "something_bla" are always this specific document -->
|
||||||
|
<!-- and so very specific processing would be great -->
|
||||||
|
<!-- - I want XYZ files to work (e.g. mp3?) -->
|
||||||
|
<!-- - I want to generate previews for video files -->
|
||||||
|
<!-- - Example Addons: -->
|
||||||
|
<!-- - swiss qr code detection on invoices -->
|
||||||
|
<!-- - tags via regexes -->
|
||||||
|
<!-- - text extraction from audio? -->
|
||||||
|
<!-- - preview generation for video? -->
|
238
website/site/content/docs/addons/control.md
Normal file
@ -0,0 +1,238 @@
|
|||||||
|
+++
|
||||||
|
title = "Control Runtime"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "Control how addons are run"
|
||||||
|
weight = 30
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
# Control runtime of addons
|
||||||
|
|
||||||
|
Addons are run by the joex component as background tasks in an
|
||||||
|
external process. Depending on the machine it is running on, the addon
|
||||||
|
can be run
|
||||||
|
|
||||||
|
- inside a docker container
|
||||||
|
- inside a systemd-nspawn container
|
||||||
|
- directly on the machine
|
||||||
|
|
||||||
|
Addons can be provided as source packages, where the final program may
|
||||||
|
need to be built. They also can depend on other software. In order to
|
||||||
|
not prepare for each addon, it is recommended to install
|
||||||
|
[nix](https://nixos.org) with [flakes](https://nixos.wiki/wiki/Flakes)
|
||||||
|
and docker on the machine running joex.
|
||||||
|
|
||||||
|
Please also look at addon section in the [default
|
||||||
|
configuration](@/docs/configure/main.md#joex) for joex.
|
||||||
|
|
||||||
|
You need to explicitly enable addons in the restserver config file.
|
||||||
|
|
||||||
|
Docspell uses "runners" to execute an addon. This includes building it
|
||||||
|
if necessary. The following runner exist:
|
||||||
|
|
||||||
|
- `docker`: uses docker to build an run the addon
|
||||||
|
- `nix-flake`: builds via `nix build` and runs the executable in
|
||||||
|
`$out/bin`
|
||||||
|
- `trivial`: simply executes a file inside the addon (as specified in
|
||||||
|
the descriptor)
|
||||||
|
|
||||||
|
In the joex configuration you can specify which runners your system
|
||||||
|
supports.
|
||||||
|
|
||||||
|
## Prepare for *running* addons
|
||||||
|
|
||||||
|
Depending on how you want addons to be run, you need to install either
|
||||||
|
docker and/or systemd-nspawn on the machine running joex.
|
||||||
|
Additionally, the user running joex must be able to use these tools.
|
||||||
|
For docker it usually means to add the user to some group. For
|
||||||
|
systemd-nspawn you most likely want to configure `sudo` to run
|
||||||
|
passwordless the `systemd-nspawn` command.
|
||||||
|
|
||||||
|
Without this, an addon can only be run "directly" on the machine that
|
||||||
|
hosts joex (which might be perfectly fine). The addon then "sees" all
|
||||||
|
files on the machine and could potentially do harm.
|
||||||
|
|
||||||
|
It is recommended to install `nix` and `docker`, if possible. Addons
|
||||||
|
may only run with docker or only without, so supporting both leaves
|
||||||
|
more options.
|
||||||
|
|
||||||
|
|
||||||
|
## Prepare for *building* addons
|
||||||
|
|
||||||
|
Addons can be packaged as source or binary packages. For the former,
|
||||||
|
joex will build the addon first. There are two supported ways to do
|
||||||
|
so:
|
||||||
|
|
||||||
|
- via `docker build` when the addons provides a `Dockerfile` (use
|
||||||
|
runner `docker`)
|
||||||
|
- via `nix build` when the addon provides a `flake.nix` file (use
|
||||||
|
runner `nix-flake`)
|
||||||
|
|
||||||
|
Both build strategies will cache the resulting artifact, so subsequent
|
||||||
|
builds will be (almost) no-ops.
|
||||||
|
|
||||||
|
{% infobubble(title="Note") %}
|
||||||
|
*Building* addons requires to be connected to the internet! Running
|
||||||
|
them may not require a network connection.
|
||||||
|
{% end %}
|
||||||
|
|
||||||
|
If the addon is packaged as a binary, then usually the `trivial`
|
||||||
|
runner (possibly in combination with `systemd-nspawn`) can be used.
|
||||||
|
|
||||||
|
# Runtime
|
||||||
|
|
||||||
|
## Cache directory
|
||||||
|
|
||||||
|
Addons can use a "cache directory" to store data between runs. This
|
||||||
|
directory is not cleaned by docspell. If you have concerns about
|
||||||
|
space, use a cron job or systemd-timer to periodically clean this
|
||||||
|
directory.
|
||||||
|
|
||||||
|
## "Pure" vs "Impure"
|
||||||
|
|
||||||
|
Addons can talk back to Docspell in these ways: they can use the http
|
||||||
|
api, for example with [dsc](@/docs/tools/cli.md), or they can return
|
||||||
|
data to instruct Docspell to apply changes.
|
||||||
|
|
||||||
|
The former requires the addon to be connected to the network to reach
|
||||||
|
the Docspell *restserver*. This allows the addon to do arbitrary
|
||||||
|
changes at any time - this is the "impure" variant.
|
||||||
|
|
||||||
|
The second approach can be run without network connectivity. When
|
||||||
|
using docker or systemd-nspawn, Docspell will run these addons without
|
||||||
|
any network. Thus they can't do anything really, except return data
|
||||||
|
back to Docspell.
|
||||||
|
|
||||||
|
The pure way is much preferred! It allows for more consistent
|
||||||
|
behaviour, because Docspell is in charge for applying any changes.
|
||||||
|
Docspell can apply changes *only if* the addon returned successfully.
|
||||||
|
Addons can also be retried on error, because no changes happened yet.
|
||||||
|
|
||||||
|
It's the decision of the addon author, how the addon will work. It
|
||||||
|
should document whether it is pure or impure. You can also look into
|
||||||
|
the descriptor and check for a `networking: false` setting. As the
|
||||||
|
server administrator, you can configure Docspell to only accept pure
|
||||||
|
addons.
|
||||||
|
|
||||||
|
|
||||||
|
## Runners
|
||||||
|
|
||||||
|
### nix flake runner
|
||||||
|
|
||||||
|
For addons providing a `flake.nix` this runner can build it and find
|
||||||
|
the file to execute. With this `flake.nix` file addons can declare how
|
||||||
|
they should be build and what dependencies are required to run them.
|
||||||
|
|
||||||
|
The resulting executable can be executed via `systemd-nspawn` in a
|
||||||
|
restricted environment or directly on the machine.
|
||||||
|
|
||||||
|
{% infobubble(title="Requires") %}
|
||||||
|
You need to install [nix](https://nixos.org) and enable
|
||||||
|
[flakes](https://nixos.wiki/wiki/Flakes) to use this runner.
|
||||||
|
{% end %}
|
||||||
|
|
||||||
|
### docker
|
||||||
|
|
||||||
|
Addons can provide a Dockerfile or an image. If no image is given,
|
||||||
|
`docker build` will be run to build an image from the `Dockerfile`.
|
||||||
|
Then `docker run` is used to run the addon.
|
||||||
|
|
||||||
|
{% infobubble(title="Requires") %}
|
||||||
|
You need to install `docker` to use this runner.
|
||||||
|
{% end %}
|
||||||
|
|
||||||
|
### trivial
|
||||||
|
|
||||||
|
Addons can simply declare a file to execute. Docspell can use
|
||||||
|
`systemd-nspawn` to run it in an restricted environment, or it can be
|
||||||
|
run directly on the machine. This variant is only useful for very
|
||||||
|
simple addons, that don't require any special dependencies.
|
||||||
|
|
||||||
|
{% infobubble(title="Requires") %}
|
||||||
|
You need to check each addon for its requirements and prepare the
|
||||||
|
machine accordingly.
|
||||||
|
{% end %}
|
||||||
|
|
||||||
|
### Choosing runners
|
||||||
|
|
||||||
|
The config `addons.executor-config.runners` accepts a list of runners.
|
||||||
|
It specifies the preferred runner first. If an addon can be executed
|
||||||
|
via docker and nix, Docspell will choose the runner first in the list.
|
||||||
|
|
||||||
|
If you don't have nix installed, remove the `nix-flake` runner from
|
||||||
|
this list and same for docker, of course.
|
||||||
|
|
||||||
|
|
||||||
|
### systemd-nspawn
|
||||||
|
|
||||||
|
The `systemd-nspawn` can be used to run programs in a lightweight
|
||||||
|
ad-hoc container. It is available on most linux distributions (it is
|
||||||
|
part of systemd…). It doesn't require an image to exist first; this
|
||||||
|
makes it very convenient for running addons in a restricted
|
||||||
|
environment.
|
||||||
|
|
||||||
|
If you enable it in the config file, then all addons are either run
|
||||||
|
via `systemd-nspawn` or docker - and thus always in a restricted
|
||||||
|
environment, where they can only access their own files and the files
|
||||||
|
provided by Docspell.
|
||||||
|
|
||||||
|
The downside is that `systemd-nspawn` needs to be run as root (as far
|
||||||
|
as I know). Therfore, configure `sudo` to allow the user that is
|
||||||
|
running joex to execute `systemd-nspawn` non-interactively.
|
||||||
|
|
||||||
|
{% infobubble(title="Requires") %}
|
||||||
|
Install `systemd-nspawn` and enable the user running joex to use it
|
||||||
|
password-less via sudo.
|
||||||
|
{% end %}
|
||||||
|
|
||||||
|
# Within Docker
|
||||||
|
|
||||||
|
If joex itself is run as a docker container, things get a bit
|
||||||
|
complicated. The default image for joex does not contain `nix`, so the
|
||||||
|
`nix-flake` runner cannot be used out of the box.
|
||||||
|
|
||||||
|
In order to use the `docker` runner, the container must be configured
|
||||||
|
to access the hosts docker daemon. On most systems this can be
|
||||||
|
achieved by bind-mounting the unix socket (usually at
|
||||||
|
`/var/run/docker.sock`) into the container. Here is a snippet from the
|
||||||
|
provided `docker-compose` file:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
joex:
|
||||||
|
image: docspell/joex:latest
|
||||||
|
# ... left out for brevity
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
- /tmp:/tmp
|
||||||
|
```
|
||||||
|
|
||||||
|
Additionally to `/var/run/docker.sock`, it also bind mounts the `/tmp`
|
||||||
|
directory. This is necessary, because docker will be invoked with bind
|
||||||
|
mounts from inside the continer - but these must be available on the
|
||||||
|
host, because the docker client in the container actually runs the
|
||||||
|
command on the host.
|
||||||
|
|
||||||
|
The addon executor uses the systems temp-directory (which is usually
|
||||||
|
`/tmp`) as a base for creating a working and cache directory. Should
|
||||||
|
you change this in joex config file (or your system uses a different
|
||||||
|
default temp-dir), then the bind mount must be adapted as well.
|
||||||
|
|
||||||
|
Another variant is to extend the default joex image and add more
|
||||||
|
programs as needed by addons and then use the `trivial` runner.
|
||||||
|
|
||||||
|
# Summary / tl;dr
|
||||||
|
|
||||||
|
When joex is not inside a container:
|
||||||
|
|
||||||
|
- (optional) Install `systemd-nspawn` - it is provided on many
|
||||||
|
GNU/Linux distributions
|
||||||
|
- Configure `sudo` to allow the user running the joex component to
|
||||||
|
execute `systemd-nspawn` non-interactively (without requiring a
|
||||||
|
password)
|
||||||
|
- Install docker
|
||||||
|
- Install [nix](https://nixos.org) and enable
|
||||||
|
[flakes](https://nixos.wiki/wiki/Flakes)
|
||||||
|
- Allow the user who runs the joex component to use docker and nix. If
|
||||||
|
you install nix as multi-user, then this is already done.
|
||||||
|
- Check the section on addons in the [default
|
||||||
|
configuration](@/docs/configure/main.md#joex) for joex
|
103
website/site/content/docs/addons/using.md
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
+++
|
||||||
|
title = "Usage"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "How to use addons"
|
||||||
|
weight = 20
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
# Using Addons
|
||||||
|
|
||||||
|
This shows with an example, how to install and use an addon. If the ui
|
||||||
|
doesn't show these forms, addons are probably disabled. Addons need to
|
||||||
|
be enabled in the config file of the rest server.
|
||||||
|
|
||||||
|
## Discovering
|
||||||
|
|
||||||
|
Addons can be installed from any URL to a zip file. One way is to use
|
||||||
|
URLs generated by forges like github or gitlab. They provide zip files
|
||||||
|
containing the repository contents. Alternatively an addon may provide
|
||||||
|
specific files in their release section.
|
||||||
|
|
||||||
|
For example, this is the url to the first release of the rotate-pdf
|
||||||
|
addon:
|
||||||
|
|
||||||
|
- <https://github.com/docspell/rotate-pdf-addon/archive/refs/tags/v0.1.0.zip>
|
||||||
|
|
||||||
|
This url points to a fixed version. It is also possible to use urls
|
||||||
|
that are "moving targets":
|
||||||
|
|
||||||
|
- <https://github.com/docspell/rotate-pdf-addon/archive/refs/heads/master.zip>
|
||||||
|
|
||||||
|
The contents behind the above url will very likely change over time.
|
||||||
|
|
||||||
|
For better discoverability, repositories for addons on public forges
|
||||||
|
can be tagged with *docspell-addon*.
|
||||||
|
|
||||||
|
## Install
|
||||||
|
|
||||||
|
With an URL like above, you can go to *Manage Data -> Addons -> New*
|
||||||
|
and insert the url:
|
||||||
|
|
||||||
|
{{ figure2(light="addon-install-01.png", dark="addon-install-01_dark.png") }}
|
||||||
|
|
||||||
|
It might take a while for Docspell to download, extract and verify the
|
||||||
|
addon. The addon will be downloaded into the database. Once installed,
|
||||||
|
the given URL is not used anymore, unless a manual update is issued.
|
||||||
|
|
||||||
|
After this finishes, you cannot change the URL anymore:
|
||||||
|
|
||||||
|
{{ figure2(light="addon-install-02.png", dark="addon-install-02_dark.png") }}
|
||||||
|
|
||||||
|
When using URLs pointing to "moving targets", you could click the
|
||||||
|
*Update Addon* button to re-download the contents at the url. This
|
||||||
|
doesn't make much sense for URLs to fixed versions (in *theory* these
|
||||||
|
could change as well, of course) and it is not without risk. It can be
|
||||||
|
useful for own addons to have them quickly updated.
|
||||||
|
|
||||||
|
Now the addon is installed. It can now be used by creating a *run configuration*.
|
||||||
|
|
||||||
|
## Run Configuration
|
||||||
|
|
||||||
|
A run configuration is comprised of one or more addons, their inputs
|
||||||
|
and some settings regarding their runtime environment.
|
||||||
|
|
||||||
|
The name is used for displaying in the webapp. You can disable/enable
|
||||||
|
a run configuration.
|
||||||
|
|
||||||
|
It is possible that addons use [dsc](@/docs/tools/cli.md) or call the
|
||||||
|
rest-server otherwise. Usually a valid session is required (to set
|
||||||
|
tags or do searches). When selecting to run *on behalf of a user*, a
|
||||||
|
valid authenticator for that user is injected into the environment of
|
||||||
|
the addon run.
|
||||||
|
|
||||||
|
The *Trigger Run* setting specfies when this run configuraiton should
|
||||||
|
be executed. You can choose from options that all addons in the list
|
||||||
|
must support. In this example, only `existing-item` is used. This
|
||||||
|
means the run configuration can be selected to run on any item.
|
||||||
|
|
||||||
|
Other options include:
|
||||||
|
- `final-process-item`: executes automatically as the last step when
|
||||||
|
processing uploaded files
|
||||||
|
- `final-reprocess-item`: like `final-process-item` but applies when
|
||||||
|
an existing item is reprocessed.
|
||||||
|
- `scheduled`: runs periodically based on a schedule (and independent
|
||||||
|
from any item)
|
||||||
|
|
||||||
|
Each addon may require arguments. Click on *Configure* to enable the
|
||||||
|
*Arguments* section and add arguments for the corresponding addon.
|
||||||
|
What to insert here is completely specific to the addon. In this case,
|
||||||
|
it expects a JSON object with only one field `"degree"` that indicates
|
||||||
|
how to rotate. In this example, it should be rotated by 90°
|
||||||
|
counter-clockwise. You need to click *Update* to set it into the addon
|
||||||
|
and then *Submit* to save everything.
|
||||||
|
|
||||||
|
{{ figure2(light="addon-install-03.png", dark="addon-install-03_dark.png") }}
|
||||||
|
|
||||||
|
|
||||||
|
With this run configuration in place, you can try it out on some item:
|
||||||
|
|
||||||
|
{{ figure2(light="addon-install-04.png", dark="addon-install-04_dark.png") }}
|
||||||
|
|
||||||
|
This example configured the *rotate-pdf-addon* to rotate left by 90°.
|
||||||
|
Create a simlar run configuration to rotate to the right.
|
376
website/site/content/docs/addons/writing.md
Normal file
@ -0,0 +1,376 @@
|
|||||||
|
+++
|
||||||
|
title = "Writing"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "How to write addons"
|
||||||
|
weight = 20
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
# Writing Addons
|
||||||
|
|
||||||
|
Writing an addon can be divided into two things:
|
||||||
|
|
||||||
|
- create the program
|
||||||
|
- define how to package and run it
|
||||||
|
|
||||||
|
The next sections describe both parts. For a quick start, check out
|
||||||
|
the example addons.
|
||||||
|
|
||||||
|
As previously written, you can choose a language. The interaction with
|
||||||
|
docspell happens by exchanging JSON data. So, whatever you choose, it
|
||||||
|
should be possible to read and produce JSON with some convenience.
|
||||||
|
|
||||||
|
|
||||||
|
# Writing the program
|
||||||
|
|
||||||
|
## Interface to Docspell
|
||||||
|
|
||||||
|
The interface to Docspell is JSON data. The addon receives all inputs
|
||||||
|
as JSON and may return a JSON object as output (via stdout).
|
||||||
|
|
||||||
|
An addon can be executed in different contexts. Depending on this, the
|
||||||
|
available inputs differ. The addon always receives one argument, which
|
||||||
|
is a file containing the user supplied data (it may be empty). A user
|
||||||
|
is able to provide data to every addon from the web-ui.
|
||||||
|
|
||||||
|
All other things are provided as environment variables. There are
|
||||||
|
environment variables that are always provided and some are only
|
||||||
|
available for specific contexts.
|
||||||
|
|
||||||
|
For example, an addon that is executed in the context of an item
|
||||||
|
(maybe after processing or when a user selects an addon to run "on an
|
||||||
|
item"), Docspell prepares all data for the corresponding item and
|
||||||
|
makes it available to the addon. In contrast, an addon executed
|
||||||
|
periodically by a schedule, won't have this data available.
|
||||||
|
|
||||||
|
|
||||||
|
## Basic Environment
|
||||||
|
|
||||||
|
The following environment variables are always provided by Docspell:
|
||||||
|
|
||||||
|
- `ADDON_DIR` points to the directory containing the extracted addon
|
||||||
|
zip file
|
||||||
|
- `TMPDIR` / `TMP_DIR` a directory for storing temporary data
|
||||||
|
- `OUTPUT_DIR` a directory for storing files that should be processed
|
||||||
|
by docspell
|
||||||
|
- `CACHE_DIR` a directory for storing data that should stay between
|
||||||
|
addon runs
|
||||||
|
|
||||||
|
It is very much recommended to always use these environment variables
|
||||||
|
when reading and writing data. This keeps Docspell in control about
|
||||||
|
the exact location.
|
||||||
|
|
||||||
|
The working directory will be set to a directory that is also
|
||||||
|
temporary, but please don't rely on that. Use the environment
|
||||||
|
variables.
|
||||||
|
|
||||||
|
## Item data
|
||||||
|
|
||||||
|
When executed in the context of an item. Meaning for triggers:
|
||||||
|
`final-process-item`, `final-reprocess-item`, `existing-item`.
|
||||||
|
|
||||||
|
### `ITEM_DATA_JSON`
|
||||||
|
|
||||||
|
This environment variable points to a JSON file containing information
|
||||||
|
about the current item. If it is run at processing time, it includes
|
||||||
|
all information gathered so far by Docspell.
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
{{ incl_json(path="templates/shortcodes/item-data") }}
|
||||||
|
|
||||||
|
|
||||||
|
### `ITEM_ARGS_JSON`
|
||||||
|
|
||||||
|
This environment variable points to a JSON file that contains the user
|
||||||
|
supplied information with an upload request. That is, a user may
|
||||||
|
specify tags or a language when uploading files. This would be in this
|
||||||
|
file.
|
||||||
|
|
||||||
|
*This is only available for uploads. Trigger `final-process-item`.*
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
{{ incl_json(path="templates/shortcodes/item-args") }}
|
||||||
|
|
||||||
|
|
||||||
|
### `ITEM_ORIGINAL_JSON` and `ITEM_PDF_JSON`
|
||||||
|
|
||||||
|
These JSON files contains a list of objects. Each object provides
|
||||||
|
properties about a file - either the original file or the converted
|
||||||
|
pdf. The structure is the same.
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
{{ incl_json(path="templates/shortcodes/file-meta") }}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Directories
|
||||||
|
|
||||||
|
These environment variables point to directories that contain the attachment files.
|
||||||
|
|
||||||
|
- `ITEM_PDF_DIR` contains all converted pdf files, the attachment id is the filename
|
||||||
|
- `ITEM_ORIGINAL_DIR` contains all original files, the attachment id is the filename
|
||||||
|
|
||||||
|
For example, to obtain a converted pdf file, lookup the id in
|
||||||
|
`ITEM_PDF_JSON` and then construct the file name via
|
||||||
|
`ITEM_PDF_DIR/{id}`.
|
||||||
|
|
||||||
|
|
||||||
|
## Session for dsc
|
||||||
|
|
||||||
|
An addon may use [dsc](@/docs/tools/cli.md) which requires for many
|
||||||
|
commands a valid session identifier. Usually this is obtained by
|
||||||
|
logging in (i.e. using `dsc login`). This is not really feasible from
|
||||||
|
inside an addon, of course. Therefore you can configure an addon to
|
||||||
|
run on behalf of some user when creating the run configuration.
|
||||||
|
Docspell then generates a valid session identifier and puts it into
|
||||||
|
the environment. The [dsc](@/docs/tools/cli.md) tool will pick them up
|
||||||
|
automatically.
|
||||||
|
|
||||||
|
It will also setup the URL to connect to some restserver. (If you have
|
||||||
|
multiple rest-servers running, it will pick one randomly).
|
||||||
|
|
||||||
|
- `DSC_SESSION` env variable containing a session identifier. It's
|
||||||
|
validity is coupled on the configured timeout.
|
||||||
|
- `DSC_DOCSPELL_URL` the base url to some rest server
|
||||||
|
|
||||||
|
That means when using an addon in this way, you can simply use `dsc`
|
||||||
|
without worrying about authentication or the correct URL to connect
|
||||||
|
to.
|
||||||
|
|
||||||
|
|
||||||
|
## Output
|
||||||
|
|
||||||
|
Docspell doesn't interpret the returncode of an addon, except checking
|
||||||
|
for being equal to `0` which indicates a successful run.
|
||||||
|
|
||||||
|
In order to do change data in Docspell, the addon program can run
|
||||||
|
`dsc` (for example) to change some state - like setting tags etc. But
|
||||||
|
the preferred approach would be to return instructions for Docspell.
|
||||||
|
Docspell will execute the instructions when the addon terminates
|
||||||
|
successfully - that is with return code `0`.
|
||||||
|
|
||||||
|
These instructions are in a JSON object which needs to go to stdout.
|
||||||
|
You can use stderr in an addon for logging/debugging purposes. But if
|
||||||
|
you specify `collectOutput: true` in the descriptior, then stdout must
|
||||||
|
only return this specific JSON (or nothing, empty output is ignored).
|
||||||
|
|
||||||
|
You find the complete structure below. It consists of these parts:
|
||||||
|
|
||||||
|
- `commands`: let's you declare actions to do for an item or attachment
|
||||||
|
- `files`: defines files relative to `OUTPUT_DIR` that should be
|
||||||
|
processed
|
||||||
|
- `newItems`: declares files relative to `OUTPUT_DIR` that should be
|
||||||
|
processed as new uploads
|
||||||
|
|
||||||
|
The `commands` allows to set tags, fields and other things. All parts
|
||||||
|
are optional, you don't need to return the complete structure. Just
|
||||||
|
returning `commands` or only `files` is ok.
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
{{ incl_json(path="templates/shortcodes/addon-output") }}
|
||||||
|
|
||||||
|
|
||||||
|
# Descriptor
|
||||||
|
|
||||||
|
An addon must provide an *addon descriptior*, which is a yaml or json
|
||||||
|
file looking like this:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# The meta section is required. Name and version must not contain
|
||||||
|
# whitespace
|
||||||
|
meta:
|
||||||
|
name: "name-of-addon"
|
||||||
|
version: "2.21"
|
||||||
|
description: |
|
||||||
|
Describe the purpose and how it must be used here
|
||||||
|
|
||||||
|
# Defining when this addon is run. This is used to guide the user
|
||||||
|
# interface in selecting an addon. At least one is required to specify.
|
||||||
|
#
|
||||||
|
# Possible values:
|
||||||
|
# - scheduled: requires to enter a timer to run this addon periodically
|
||||||
|
# - final-process-item: the final step when processing an item
|
||||||
|
# - final-reprocess-item: the final step when reprocessing an item
|
||||||
|
# - existing-item: A user selects the addon to run on an item
|
||||||
|
triggers:
|
||||||
|
- final-process-item
|
||||||
|
- final-reprocess-item
|
||||||
|
- existing-item
|
||||||
|
|
||||||
|
# How to build and run this addon (optional). If missing, auto
|
||||||
|
# detection will enable a nix runner if a `flake.nix` is found in the
|
||||||
|
# source root and docker if a `Dockerfile` is found.
|
||||||
|
#
|
||||||
|
# Both runners are compared to what is enabled at the server.
|
||||||
|
runner:
|
||||||
|
# Building the program using nix flakes. This requires a flake.nix
|
||||||
|
# file in the source root with a default package and a flake-enabled
|
||||||
|
# nix on the joex machine.
|
||||||
|
#
|
||||||
|
# The program is build via `nix build`. If the joex machine has
|
||||||
|
# systemd-nspawn installed, it is used to run the addon inside a
|
||||||
|
# container. Otherwise the addon is run directly on the machine.
|
||||||
|
nix:
|
||||||
|
enable: true
|
||||||
|
|
||||||
|
# Docker based runner can define a custom image to use. If a `build`
|
||||||
|
# key exists pointing to a Dockerfile, the image is build before. If
|
||||||
|
# the docker image is complex, you can build it independently and
|
||||||
|
# provide the pre-build image.
|
||||||
|
#
|
||||||
|
# The program is run via `docker run` passing the arguments to the
|
||||||
|
# addon. Thus it expects the entrypoint to be correctly configured
|
||||||
|
# to the executable. You may use `args` in order to prepend
|
||||||
|
# additional arguments, like the path to an executable if the image
|
||||||
|
# requires that. The joex machine must have docker installed and the
|
||||||
|
# user running joex must be allowed to use docker. You must either
|
||||||
|
# define an image with an appropriate entry point or a dockerfile.
|
||||||
|
docker:
|
||||||
|
enable: false
|
||||||
|
#image: myorg/myimage:latest
|
||||||
|
build: Dockerfile
|
||||||
|
|
||||||
|
# Trivial runner that simply executes the file specified with
|
||||||
|
# `exec`. Nothing is build before. This runner usually requires that
|
||||||
|
# the joex machine contains all dependencies needed to run the
|
||||||
|
# addon. You may need to install additional software on the machine
|
||||||
|
# running joex.
|
||||||
|
trivial:
|
||||||
|
enable: false
|
||||||
|
exec: src/addon.sh
|
||||||
|
|
||||||
|
# Optional arguments/options given to the program. The program
|
||||||
|
# receives at least one argument, which is a file to the user input as
|
||||||
|
# supplied in the application. The arguments here are prepended.
|
||||||
|
args:
|
||||||
|
|
||||||
|
|
||||||
|
options:
|
||||||
|
# If false, the program is run inside a private network, blocking
|
||||||
|
# traffic to the host and networks reachable from there. This only
|
||||||
|
# applies if the addon can be run inside a container.
|
||||||
|
#
|
||||||
|
# If the addon runs side effects (such as using dsc to set tags),
|
||||||
|
# this must be set to `true`.
|
||||||
|
#
|
||||||
|
# Default is false.
|
||||||
|
networking: true
|
||||||
|
|
||||||
|
# If true, the stdout of the program is parsed into a JSON structure
|
||||||
|
# that is interpreted as actions executed by the task that runs the
|
||||||
|
# addon. If the addon runs side effects only, set this to `false`
|
||||||
|
# and the output is ignored.
|
||||||
|
#
|
||||||
|
# It is recommended to use this approach, if possible. It allows
|
||||||
|
# docspell itself to apply any changes and the addon can run
|
||||||
|
# completely isolated.
|
||||||
|
#
|
||||||
|
# Default is false.
|
||||||
|
collectOutput: true
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# Packaging
|
||||||
|
|
||||||
|
Docspell can use different ways to build and run the addon:
|
||||||
|
`nix-flake`, `docker` and `trivial`. The first two allow to package
|
||||||
|
the addon in a defined way (with a single dependency, either nix or
|
||||||
|
docker) and then execute it independently from the underlying system.
|
||||||
|
This makes it possible to execute the addon on a variety of systems.
|
||||||
|
This is especially useful for addons that are meant to be public and
|
||||||
|
reusable by different people.
|
||||||
|
|
||||||
|
The "trivial" runner is only executing some program specified in
|
||||||
|
`docspell-addon.yaml`, directly on the joex machine (or via
|
||||||
|
`systemd-nspawn`). The machine running joex must then provide all
|
||||||
|
necessary dependencies and it must be compatible to run the addon. It
|
||||||
|
may be useful especially for personal addons.
|
||||||
|
|
||||||
|
|
||||||
|
## nix flake
|
||||||
|
|
||||||
|
Using [nix](https://nixos.org) with
|
||||||
|
[flakes](https://nixos.wiki/wiki/Flakes) enabled, is the recommended
|
||||||
|
approach. It is very flexible and reproducible while sharing most
|
||||||
|
dependencies (in contrast to docker where each image contains the same
|
||||||
|
packages again and again).
|
||||||
|
|
||||||
|
Docspell runs `nix build` to build the addon and then executes the
|
||||||
|
file produced to `$out/bin`.
|
||||||
|
|
||||||
|
|
||||||
|
## docker
|
||||||
|
|
||||||
|
For docker it is recommended to provide pre-build images. Docspell can
|
||||||
|
build images from provided `Dockerfile`, but for larger images it
|
||||||
|
might be better to do this apriori.
|
||||||
|
|
||||||
|
Docspell will run the addon using `docker run …` passing it only the
|
||||||
|
user-input file as argument. Thus the image must define an appropriate
|
||||||
|
`ENTRYPOINT`.
|
||||||
|
|
||||||
|
# Examples
|
||||||
|
## Minimal Addon
|
||||||
|
|
||||||
|
The steps below create a minimal addon:
|
||||||
|
|
||||||
|
1. Create a bash script `addon.sh` with this content:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
echo "Hello world!"
|
||||||
|
```
|
||||||
|
2. Make it executable:
|
||||||
|
```bash
|
||||||
|
chmod +x addon.sh
|
||||||
|
```
|
||||||
|
3. Create a yaml file `docspell-addon.yaml` with this content:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
meta:
|
||||||
|
name: "minimal-addon"
|
||||||
|
version: "0.1.0"
|
||||||
|
triggers:
|
||||||
|
- existing-item
|
||||||
|
- scheduled
|
||||||
|
runner:
|
||||||
|
trivial:
|
||||||
|
enable: true
|
||||||
|
exec: addon.sh
|
||||||
|
```
|
||||||
|
4. Create a zip file containing these two files:
|
||||||
|
```bash
|
||||||
|
zip addon.zip docspell-addon.yaml addon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
The addon is now ready. Make it available via an url (use some file
|
||||||
|
sharing tool, upload it somewhere etc) and then it can be installed
|
||||||
|
and run.
|
||||||
|
|
||||||
|
## Non-Minimal Addon
|
||||||
|
|
||||||
|
The minimal example above is good to see what is required, but it is
|
||||||
|
not very useful…. Please see this post about the [audio file
|
||||||
|
addon](@/blog/2022-05-16_audio_file_addon.md) that walks through a
|
||||||
|
more useful addon.
|
||||||
|
|
||||||
|
# Misc
|
||||||
|
|
||||||
|
## Advantages of "pure" addons
|
||||||
|
|
||||||
|
Although the output structure is not set in stone, it is recommended
|
||||||
|
to use this in contrast to directly changing state via `dsc`.
|
||||||
|
|
||||||
|
- outputs of all addons are collected and only applied if all were
|
||||||
|
successful; in contrast side effects are always applied even if the
|
||||||
|
addon fails shortly after
|
||||||
|
- since addons are executed as joex tasks, their result can be send as
|
||||||
|
events to another http server for further processing.
|
||||||
|
- addons can run in an isolated environment without network (no data
|
||||||
|
can go out)
|
||||||
|
|
||||||
|
## Use addons in other addons?
|
||||||
|
|
||||||
|
This can be achieved very conveniently by using `nix`. If addons are
|
||||||
|
defined as a nik flake, they can be easily consumed by each other.
|
@ -82,6 +82,7 @@ template = "docs.html"
|
|||||||
- zip
|
- zip
|
||||||
- [eml](https://en.wikipedia.org/wiki/Email#Filename_extensions)
|
- [eml](https://en.wikipedia.org/wiki/Email#Filename_extensions)
|
||||||
(e-mail files in plain text MIME)
|
(e-mail files in plain text MIME)
|
||||||
|
- Extend Docspell via [addons](@/docs/addons/basics.md)
|
||||||
- Tooling:
|
- Tooling:
|
||||||
- [Command Line Interface](@/docs/tools/cli.md) allowing to upload
|
- [Command Line Interface](@/docs/tools/cli.md) allowing to upload
|
||||||
files, watch folders and many more!
|
files, watch folders and many more!
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
``` bash
|
|
||||||
{% set data = load_data(path=path) %}
|
{% set data = load_data(path=path) %}
|
||||||
|
``` bash
|
||||||
{{ data | safe }}
|
{{ data | safe }}
|
||||||
```
|
```
|
||||||
|
4
website/site/templates/shortcodes/incl_json.md
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
{% set data = load_data(path=path) %}
|
||||||
|
``` json
|
||||||
|
{{ data | safe }}
|
||||||
|
```
|
@ -0,0 +1,86 @@
|
|||||||
|
package docspell.website
|
||||||
|
|
||||||
|
import cats.syntax.all._
|
||||||
|
import docspell.addons.out.{AddonOutput, ItemFile, NewFile, NewItem}
|
||||||
|
import docspell.addons.out.NewFile.{Meta => FileMeta}
|
||||||
|
import docspell.addons.out.NewItem.{Meta => ItemMeta}
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.common.bc.{AttachmentAction, BackendCommand, ItemAction}
|
||||||
|
import io.circe.syntax._
|
||||||
|
|
||||||
|
object AddonOutputExample extends Helper {
|
||||||
|
|
||||||
|
val example = AddonOutput(
|
||||||
|
commands = List(
|
||||||
|
BackendCommand.ItemUpdate(
|
||||||
|
itemId = id("XabZ-item-id"),
|
||||||
|
actions = List(
|
||||||
|
ItemAction.AddTags(Set("tag1", "tag2")),
|
||||||
|
ItemAction.ReplaceTags(Set("tagX", "tagY")),
|
||||||
|
ItemAction.RemoveTags(Set("tag0", "tag9")),
|
||||||
|
ItemAction.RemoveTagsCategory(Set("doc-type")),
|
||||||
|
ItemAction.SetFolder("folder-name".some),
|
||||||
|
ItemAction.SetCorrOrg(id("OaIy-org-ID").some),
|
||||||
|
ItemAction.SetCorrPerson(id("OaIy-person-ID").some),
|
||||||
|
ItemAction.SetConcPerson(id("AEiae-person-ID").some),
|
||||||
|
ItemAction.SetConcEquipment(id("AEiae-equipment-ID").some),
|
||||||
|
ItemAction.SetField(id("eur"), "12.99"),
|
||||||
|
ItemAction.SetName("new item name"),
|
||||||
|
ItemAction.SetNotes("replace notes with this".some),
|
||||||
|
ItemAction.AddNotes("More notes appended", Some("-----"))
|
||||||
|
)
|
||||||
|
),
|
||||||
|
BackendCommand
|
||||||
|
.AttachmentUpdate(
|
||||||
|
itemId = id("XabZ-item-id"),
|
||||||
|
attachId = id("Atca-attach-id"),
|
||||||
|
actions = List(
|
||||||
|
AttachmentAction.SetExtractedText("replace extracted text with this".some)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
files = List(
|
||||||
|
ItemFile(
|
||||||
|
id("iZtb-item-id"),
|
||||||
|
textFiles = Map("attach-id" -> "newtext.txt"),
|
||||||
|
pdfFiles = Map("attach-id" -> "better.pdf"),
|
||||||
|
previewImages = Map("attach-id" -> "better-preview.png"),
|
||||||
|
newFiles = List(
|
||||||
|
NewFile(
|
||||||
|
metadata = FileMeta(
|
||||||
|
language = Some(Language.English),
|
||||||
|
skipDuplicate = Some(true),
|
||||||
|
attachmentsOnly = Some(false)
|
||||||
|
),
|
||||||
|
file = "new-file1.docx"
|
||||||
|
),
|
||||||
|
NewFile(
|
||||||
|
metadata = FileMeta(
|
||||||
|
language = Some(Language.German),
|
||||||
|
skipDuplicate = Some(true),
|
||||||
|
attachmentsOnly = Some(false)
|
||||||
|
),
|
||||||
|
file = "new-file2.pdf"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
newItems = List(
|
||||||
|
NewItem(
|
||||||
|
metadata = ItemMeta(
|
||||||
|
language = Some(Language.English),
|
||||||
|
direction = Direction.Incoming.some,
|
||||||
|
folderId = id("my-folder").some,
|
||||||
|
source = "the-addon-x".some,
|
||||||
|
skipDuplicate = true.some,
|
||||||
|
tags = List("tag1", "tag2").some,
|
||||||
|
attachmentsOnly = None
|
||||||
|
).some,
|
||||||
|
files = List("a-file.pdf", "another.jpg")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def exampleJson =
|
||||||
|
example.asJson.spaces2
|
||||||
|
}
|
@ -0,0 +1,23 @@
|
|||||||
|
package docspell.website
|
||||||
|
|
||||||
|
import docspell.addons.out._
|
||||||
|
import docspell.common.bc._
|
||||||
|
import io.circe.syntax._
|
||||||
|
|
||||||
|
object AddonOutputMiniExample extends Helper {
|
||||||
|
|
||||||
|
val example = AddonOutput(
|
||||||
|
commands = List(
|
||||||
|
BackendCommand.ItemUpdate(
|
||||||
|
itemId = id("XabZ-item-id"),
|
||||||
|
actions = List(
|
||||||
|
ItemAction.AddTags(Set("tag1", "tag2"))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def exampleJson =
|
||||||
|
example.asJson.spaces2
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,37 @@
|
|||||||
|
package docspell.website
|
||||||
|
|
||||||
|
import cats.syntax.option._
|
||||||
|
import docspell.common.{ByteSize, Language, MimeType}
|
||||||
|
import docspell.store.queries.AttachedFile
|
||||||
|
import io.circe.syntax._
|
||||||
|
import scodec.bits.ByteVector
|
||||||
|
|
||||||
|
object FileMetaExample extends Helper {
|
||||||
|
|
||||||
|
val example1 = AttachedFile(
|
||||||
|
id = randomId,
|
||||||
|
name = "the filename.png".some,
|
||||||
|
position = 0,
|
||||||
|
language = Language.English.some,
|
||||||
|
mimetype = MimeType.png,
|
||||||
|
length = ByteSize(454654L),
|
||||||
|
checksum = ByteVector.fromValidHex("caffe0caffe").digest("sha256")
|
||||||
|
)
|
||||||
|
|
||||||
|
val example2 = AttachedFile(
|
||||||
|
id = randomId,
|
||||||
|
name = "other filename.png".some,
|
||||||
|
position = 1,
|
||||||
|
language = Language.English.some,
|
||||||
|
mimetype = MimeType.pdf,
|
||||||
|
length = ByteSize(1232214L),
|
||||||
|
checksum = ByteVector.fromValidHex("eff0eff0eff").digest("sha256")
|
||||||
|
)
|
||||||
|
|
||||||
|
val example = List(
|
||||||
|
example1,
|
||||||
|
example2
|
||||||
|
)
|
||||||
|
|
||||||
|
val exampleJson = example.asJson.spaces2
|
||||||
|
}
|
29
website/src/main/scala/docspell/website/Helper.scala
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
package docspell.website
|
||||||
|
|
||||||
|
import docspell.common.{IdRef, Ident, Timestamp}
|
||||||
|
import scodec.bits.ByteVector
|
||||||
|
|
||||||
|
import java.time.LocalDate
|
||||||
|
import scala.util.Random
|
||||||
|
|
||||||
|
trait Helper {
|
||||||
|
|
||||||
|
def id(str: String): Ident = Ident.unsafe(str)
|
||||||
|
|
||||||
|
val date20220514 = Timestamp.atUtc(LocalDate.of(2022, 5, 14).atTime(11, 22, 12))
|
||||||
|
|
||||||
|
val cid = id("collective")
|
||||||
|
|
||||||
|
implicit final class StringExt(self: String) {
|
||||||
|
def id: Ident = Ident.unsafe(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
def idRef(name: String): IdRef = IdRef(randomId, name)
|
||||||
|
|
||||||
|
def randomId = {
|
||||||
|
val buffer = Array.ofDim[Byte](6)
|
||||||
|
new Random().nextBytes(buffer)
|
||||||
|
id(ByteVector.view(buffer).toBase58)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,25 @@
|
|||||||
|
package docspell.website
|
||||||
|
|
||||||
|
import cats.syntax.option._
|
||||||
|
import docspell.common.{Language, ProcessItemArgs}
|
||||||
|
import io.circe.syntax._
|
||||||
|
|
||||||
|
object ItemArgsExample extends Helper {
|
||||||
|
|
||||||
|
val example = ProcessItemArgs.ProcessMeta(
|
||||||
|
collective = cid,
|
||||||
|
itemId = None,
|
||||||
|
language = Language.English,
|
||||||
|
direction = None,
|
||||||
|
sourceAbbrev = "scanner",
|
||||||
|
folderId = None,
|
||||||
|
validFileTypes = Seq.empty,
|
||||||
|
skipDuplicate = true,
|
||||||
|
fileFilter = None,
|
||||||
|
tags = List("given-tag-1").some,
|
||||||
|
reprocess = false,
|
||||||
|
attachmentsOnly = None
|
||||||
|
)
|
||||||
|
|
||||||
|
val exampleJson = example.asJson.spaces2
|
||||||
|
}
|
@ -0,0 +1,75 @@
|
|||||||
|
package docspell.website
|
||||||
|
|
||||||
|
import cats.syntax.option._
|
||||||
|
import docspell.common.MetaProposal.Candidate
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.joex.process.ItemData
|
||||||
|
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem}
|
||||||
|
import io.circe.syntax._
|
||||||
|
|
||||||
|
object ItemDataExample extends Helper {
|
||||||
|
|
||||||
|
private val proposals: MetaProposalList = MetaProposalList(
|
||||||
|
List(
|
||||||
|
MetaProposal(MetaProposalType.CorrOrg, Candidate(idRef("Acme AG"), Set.empty)),
|
||||||
|
MetaProposal(
|
||||||
|
MetaProposalType.ConcPerson,
|
||||||
|
Candidate(idRef("Derek Jeter"), Set.empty)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
private val givenProposals: MetaProposalList = MetaProposalList.empty
|
||||||
|
|
||||||
|
val example = ItemData(
|
||||||
|
item = RItem(
|
||||||
|
id = id("UyZ-item-id"),
|
||||||
|
cid = cid,
|
||||||
|
name = "yearly report 2021",
|
||||||
|
itemDate = date20220514.some,
|
||||||
|
source = "webapp",
|
||||||
|
direction = Direction.Incoming,
|
||||||
|
state = ItemState.Processing,
|
||||||
|
corrOrg = None,
|
||||||
|
corrPerson = None,
|
||||||
|
concPerson = None,
|
||||||
|
concEquipment = None,
|
||||||
|
inReplyTo = None,
|
||||||
|
dueDate = None,
|
||||||
|
created = date20220514,
|
||||||
|
updated = date20220514,
|
||||||
|
notes = None,
|
||||||
|
folderId = None
|
||||||
|
),
|
||||||
|
attachments = Vector(
|
||||||
|
RAttachment(
|
||||||
|
id = id("Apa-attach-id"),
|
||||||
|
itemId = id("UyZ-item-id"),
|
||||||
|
fileId = FileKey(cid, FileCategory.AttachmentConvert, id("abcxyz")),
|
||||||
|
position = 0,
|
||||||
|
created = date20220514,
|
||||||
|
name = "report_year_2021.pdf".some
|
||||||
|
)
|
||||||
|
),
|
||||||
|
metas = Vector(
|
||||||
|
RAttachmentMeta(
|
||||||
|
id = id("Apa-attach-id"),
|
||||||
|
content = "this is the extracted text …".some,
|
||||||
|
nerlabels = Nil,
|
||||||
|
proposals = proposals,
|
||||||
|
pages = 2.some,
|
||||||
|
language = Language.English.some
|
||||||
|
)
|
||||||
|
),
|
||||||
|
dateLabels = Vector.empty,
|
||||||
|
originFile = Map(
|
||||||
|
id("Apa-attach-id") -> FileKey(cid, FileCategory.AttachmentSource, "yanetar".id)
|
||||||
|
),
|
||||||
|
givenMeta = givenProposals,
|
||||||
|
tags = List("tag-1"),
|
||||||
|
classifyProposals = MetaProposalList.empty,
|
||||||
|
classifyTags = List("invoice")
|
||||||
|
)
|
||||||
|
|
||||||
|
val exampleJson = example.asJson.spaces2
|
||||||
|
}
|
64
website/src/main/scala/docspell/website/Main.scala
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
package docspell.website
|
||||||
|
|
||||||
|
import cats.effect.{ExitCode, IO, IOApp}
|
||||||
|
import fs2.io.file.{Files, Path}
|
||||||
|
import fs2.Stream
|
||||||
|
import io.circe.Encoder
|
||||||
|
import io.circe.syntax._
|
||||||
|
|
||||||
|
object Main extends IOApp {
|
||||||
|
override def run(args: List[String]) =
|
||||||
|
args match {
|
||||||
|
case "addon-output" :: file :: Nil =>
|
||||||
|
if (file.isEmpty) ok(stdout(AddonOutputExample.exampleJson))
|
||||||
|
else ok(AddonOutputExample.example.writeFile(file))
|
||||||
|
|
||||||
|
case "addon-output-tags" :: file :: Nil =>
|
||||||
|
if (file.isEmpty) ok(stdout(AddonOutputMiniExample.exampleJson))
|
||||||
|
else ok(AddonOutputMiniExample.example.writeFile(file))
|
||||||
|
|
||||||
|
case "item-data" :: file :: Nil =>
|
||||||
|
if (file.isEmpty) ok(stdout(ItemDataExample.exampleJson))
|
||||||
|
else ok(ItemDataExample.example.writeFile(file))
|
||||||
|
|
||||||
|
case "item-args" :: file :: Nil =>
|
||||||
|
if (file.isEmpty) ok(stdout(ItemArgsExample.exampleJson))
|
||||||
|
else ok(ItemArgsExample.example.writeFile(file))
|
||||||
|
|
||||||
|
case "file-meta" :: file :: Nil =>
|
||||||
|
if (file.isEmpty) ok(stdout(FileMetaExample.exampleJson))
|
||||||
|
else ok(FileMetaExample.example.writeFile(file))
|
||||||
|
|
||||||
|
case v :: Nil =>
|
||||||
|
err(stderr(s"Unknown example: $v"))
|
||||||
|
|
||||||
|
case _ =>
|
||||||
|
err(stderr("Specify what example to print"))
|
||||||
|
}
|
||||||
|
|
||||||
|
def stdout(str: String, args: Any*): Unit =
|
||||||
|
Console.out.println(str.format(args: _*))
|
||||||
|
|
||||||
|
def stderr(str: String, args: Any*): Unit =
|
||||||
|
Console.err.println(str.format(args: _*))
|
||||||
|
|
||||||
|
def ok(f: IO[Unit]): IO[ExitCode] =
|
||||||
|
f.as(ExitCode.Success)
|
||||||
|
|
||||||
|
def ok(p: => Unit): IO[ExitCode] =
|
||||||
|
ok(IO(p))
|
||||||
|
|
||||||
|
def err(p: => Unit): IO[ExitCode] =
|
||||||
|
IO(p).as(ExitCode.Error)
|
||||||
|
|
||||||
|
implicit class WriteOps[A: Encoder](self: A) {
|
||||||
|
def writeFile(file: String): IO[Unit] =
|
||||||
|
Stream
|
||||||
|
.emit(self.asJson.spaces2)
|
||||||
|
.covary[IO]
|
||||||
|
.through(fs2.text.utf8.encode)
|
||||||
|
.through(Files[IO].writeAll(Path(file)))
|
||||||
|
.compile
|
||||||
|
.drain
|
||||||
|
}
|
||||||
|
}
|