Addon docs

This commit is contained in:
eikek 2022-05-16 15:01:28 +02:00
parent d077cc30cd
commit 5abbe92f2b
28 changed files with 1834 additions and 3 deletions

5
.gitignore vendored
View File

@ -15,6 +15,11 @@ _site/
/website/site/static/syntax-*.css
/website/site/static/webfonts/
/website/site/static/files/*.woff*
/website/site/static/examples/
/website/site/templates/shortcodes/addon-output
/website/site/templates/shortcodes/item-data
/website/site/templates/shortcodes/item-args
/website/site/templates/shortcodes/file-meta
/website/site/templates/shortcodes/server.conf
/website/site/templates/shortcodes/sample-exim.conf
/website/site/templates/shortcodes/joex.conf

View File

@ -966,8 +966,28 @@ val website = project
)
IO.append(target, IO.readBytes(changelog))
Seq(target)
}.taskValue
}.taskValue,
zolaPrepare := {
val log = streams.value.log
log.info("Generating examples…")
val templateOut = baseDirectory.value / "site" / "templates" / "shortcodes"
IO.createDirectory(templateOut)
// sbt crashes when interpolating values into the string in `toTask`
// this is the reason for the followingy construct…
(Compile / run).toTask(s" addon-output /tmp/addon-output.json").value
(Compile / run).toTask(s" item-data /tmp/item-data.json").value
(Compile / run).toTask(s" item-args /tmp/item-args.json").value
(Compile / run).toTask(s" file-meta /tmp/file-meta.json").value
val inputs = List("addon-output", "item-data", "item-args", "file-meta")
inputs.foreach { name =>
IO.move(file(s"/tmp/$name.json"), templateOut / name)
}
}
)
.dependsOn(addonlib, joex)
val root = project
.in(file("."))

View File

@ -23,6 +23,7 @@ object ZolaPlugin extends AutoPlugin {
"'python -m SimpleHTTPServer 1234' for example."
)
val zolaCheck = taskKey[Unit]("Runs zola check to check links")
val zolaPrepare = taskKey[Unit]("Some task to run before generating docs")
}
import autoImport._
@ -33,10 +34,12 @@ object ZolaPlugin extends AutoPlugin {
zolaOutputDir := target.value / "zola-site",
zolaCommand := "zola",
zolaTestBaseUrl := "http://localhost:1234",
zolaPrepare := {},
zolaBuild := {
val logger = streams.value.log
logger.info("Building web site using zola ...")
(Compile / resources).value
zolaPrepare.value
buildSite(zolaCommand.value, zolaRootDir.value, zolaOutputDir.value, None, logger)
logger.info("Website built")
},
@ -45,6 +48,7 @@ object ZolaPlugin extends AutoPlugin {
val baseurl = zolaTestBaseUrl.value
logger.info("Building web site (test) using zola ...")
(Compile / resources).value
zolaPrepare.value
buildSite(
zolaCommand.value,
zolaRootDir.value,

View File

@ -19,7 +19,7 @@ out_base="$1"
work_dir=$(mktemp -dt screenshot2-script.XXXXXX)
export HOME=$work_dir
export RATIO="16:9"
export WAIT_SEC=4
export WAIT_SEC=${WAIT_SEC:-4}
#export TOP_CUT=400
dsc write-default-config

View File

@ -0,0 +1,581 @@
+++
title = "Addon for audio file support"
[extra]
author = "eikek"
+++
# 1st Addon: Audio file support
Since version 0.36.0 Docspell can be extended by
[addons](@/docs/addons/basics.md) - external programs that are
executed at some defined point in Docspell. This is a walk through the
first addon that was created, mainly as an example: providing support
for audio files.
<!-- more -->
I think it is interesting to provide support for audio files for a
DMS, although admittedly I don't have much of a use :). But this is
the kind of use-case that addons are for.
# The idea
The idea is very simple: the real work is done by external programs,
most notably [coqui's stt](https://github.com/coqui-ai/STT) a deep
learning toolkit originally created at Mozilla. It provides a command
line tool that accepts a WAV file and spits out text. Perfect!
With this text, a PDF file can be created and a preview image which is
already enough for basic support. You can see the pdf in the web-ui
and search for the text via SOLR or PostgreSQL.
Because a WAV file is not the most popular format today, `ffmpeg` can
be used to transform any other audio to WAV.
The only thing now is to create a program that checks the uploaded
files, filters out all audio files and runs them through the mentioned
programs. So let's do this.
# Preparation
Addons are external programs and can be written in whatever language….
For me this is a good opportunity to refresh my rusty scheme know-how
a bit. So this addon is written in Scheme, in particular
[guile](https://www.gnu.org/software/guile/). Programming in scheme is
fun and guile provides good integration into the (posix) OS and also
has a nice JSON module. I had the [reference
docs](https://www.gnu.org/software/guile/docs/docs-2.2/guile-ref/index.html)
open all the time - look at them for further details on the used
functions.
It's usually good to play around with the tools at first. For stt, we
first need to download a *model*. This will be used to "detect" the
text in the audio data. They have a [page](https://coqui.ai/models)
where we can download model files for any supported language. For the
addon, we will implement English and German.
When creating a PDF with wkhtmltopdf, we prettify it a little by
embedding the plain text into some html template. This will also take
care to specifiy UTF-8 as default encoding directly in the HTML
template.
FFMpeg just works as usual. It figures out the input format
automatically and knows from the extension of the output file what to
do.
You can find the full code
[here](https://github.com/docspell/audio-files-addon/blob/master/src/addon.scm).
The following shows excerpts from it with some explanation.
# The script
## Helpers
After the preamble, there are two helper functions.
```lisp
(define* (errln formatstr . args)
(apply format (current-error-port) formatstr args)
(newline))
;; Macro for executing system commands and making this program exit in
;; case of failure.
(define-syntax sysexec
(syntax-rules ()
((sysexec exp ...)
(let ((rc (apply system* (list exp ...))))
(unless (eqv? rc EXIT_SUCCESS)
(format (current-error-port) "> '~a …' failed with: ~#*~:*~d~%" exp ... rc)
(exit 1))
#t))))
```
As this addon wants to pass data back to Docspell via stdout, we use
the stderr for logging and printing general information. The function
`errln` (short for "error line" :)) allows to conveniently print to
stderr and the second wraps the `system*` procedure such that the
script fails whenever the external program fails. It is somewhat
similar to `set -e` in bash.
## Dependencies
Next is the declaration of external dependencies. At first all
external programs are listed. This is important for later, when the
script is packaged via nix. Nix will substitute these commands with
absolute paths. Then it's good to not have them scattered around.
It also reads in the expected environment variables (only those we
need) that are provided by Docspell. Since this addon only makes sense
to work on an item, it quits early should some env vars are missing.
```lisp
(define *curl* "curl")
(define *ffmpeg* "ffmpeg")
(define *stt* "stt")
(define *wkhtmltopdf* "wkhtmltopdf")
;; Getting some environment variables
(define *output-dir* (getenv "OUTPUT_DIR"))
(define *tmp-dir* (getenv "TMP_DIR"))
(define *cache-dir* (getenv "CACHE_DIR"))
(define *item-data-json* (getenv "ITEM_DATA_JSON"))
(define *original-files-json* (getenv "ITEM_ORIGINAL_JSON"))
(define *original-files-dir* (getenv "ITEM_ORIGINAL_DIR"))
;; fail early if not in the right context
(when (not *item-data-json*)
(errln "No item data json file found.")
(exit 1))
```
## Input/Output
The input and output schemas can be defined now. This uses the
[guile-json](https://github.com/aconchillo/guile-json) module. It
provides very convenient features for reading and writing json.
It is possible to define a record via `define-json-type` that
generates readers and writers to/from JSON. For example, the record
`<itemdata>` is defined to be an object with only one field `id`. The
function `json->scm` reads in json into scheme datastructures and then
the generated function `scm->itemdata` creates the record from it. For
every record, accessor functions exists. For example: `(itemdata-id
data)` would lookup the field `id` in the given itemdata record
`data`.
Here we need it to get the item-id and the list of file properties
belonging to the original uploaded files.
Another interesting definition is the `<output>` record. This captures
(a subset of) the schema of what Docspell receives from this addon as
a result. A full example of this data is
[here](@/docs/addons/writing.md#output). We don't need `commands` or
`newItems`, so this schema only cares about the `files` attribute.
```lisp
(define-json-type <itemdata>
(id))
;; The array of original files
(define-json-type <original-file>
(id)
(name)
(position)
(language)
(mimetype)
(length)
(checksum))
;; The output record, what is returned to docspell
(define-json-type <itemfiles>
(itemId)
(textFiles)
(pdfFiles))
(define-json-type <output>
(files "files" #(<itemfiles>)))
;; Parses the JSON containing the item information
(define *itemdata-json*
(scm->itemdata (call-with-input-file *item-data-json* json->scm)))
;; The JSON file containing meta data for all source files as vector.
(define *original-meta-json*
(let ((props (vector->list (call-with-input-file *original-files-json* json->scm))))
(map scm->original-file props)))
```
## Finding the audio file
The previously parsed json array `*original-meta-json*` can now be
used to find any audio files within the original uploaded files, as
done in `find-audio-files`. It simply goes through the list and keeps
those files whose mimetype starts with `audio/`. The mimetype is
provided by Docspell in the file properties in `ITEM_ORIGINAL_JSON`.
Before converting to wav with ffmpeg, it is quickly checked if it's
not a wav already.
```lisp
(define (is-wav? mime)
"Test whether the mimetype MIME is denoting a wav file."
(or (string-suffix? "/wav" mime)
(string-suffix? "/x-wav" mime)
(string-suffix? "/vnd.wav" mime)))
(define (find-audio-files)
"Find all source files that are audio files."
(filter! (lambda (el)
(string-prefix?
"audio/"
(original-file-mimetype el)))
*original-meta-json*))
(define (convert-wav id mime)
"Run ffmpeg to convert to wav."
(let ((src-file (format #f "~a/~a" *original-files-dir* id))
(out-file (format #f "~a/in.wav" *tmp-dir*)))
(if (is-wav? mime)
src-file
(begin
(errln "Running ffmpeg to convert wav file...")
(sysexec *ffmpeg* "-loglevel" "error" "-y" "-i" src-file out-file)
out-file))))
```
## Speech to text
Once we have a wav file, we can run speech-to-text recognition on it.
As said above, we need to download a model first, which is depending
on a language. Luckily, Docspell provides the language of the file.
This is the lanugage either given directly by the user when uploading
or it's the collective's default language.
In the following snippet, we get the language as arguments. We will
get it later from the file properties.
As seen below, the model file is stored to the `CACHE_DIR`. This is
provided by Docspell and will survive the execution of this script.
All other directories involved will be deleted eventually. The
`CACHE_DIR` is the place to store intermediate results you don't want
to loose between addon runs. But as any cache, it may not exist the
next time the addon is run. Docspell doesn't clear it automatically,
though.
The last function simply executes the `stt` external command and puts
stdout into a file.
```lisp
(define (get-model language)
(let* ((lang (or language "eng"))
(file (format #f "~a/model_~a.pbmm" *cache-dir* lang)))
(unless (file-exists? file)
(download-model lang file))
file))
(define (download-model lang file)
"Download model files per language. Nix has currently stt 0.9.3 packaged."
(let ((url (cond
((string= lang "eng") "https://coqui.gateway.scarf.sh/english/coqui/v0.9.3/model.pbmm")
((string= lang "deu") "https://coqui.gateway.scarf.sh/german/AASHISHAG/v0.9.0/model.pbmm")
(else (error "Unsupported language: " lang)))))
(errln "Downloading model file for language: ~a" lang)
(sysexec *curl* "-SsL" "-o" file url)
file))
(define (extract-text model input out)
"Runs stt for speech-to-text and writes the text into the file OUT."
(errln "Extracting text from audio…")
(with-output-to-file out
(lambda ()
(sysexec *stt* "--model" model "--audio" input))))
```
## Create PDF
Creating the PDF is straight forward. The extracted text is embedded
into a HTML file which is then passed to `wkhtmltopdf`. Since we don't
need this file for anything else, it is stored to the `TMP_DIR`.
```lisp
(define (create-pdf txt-file out)
(define (line str)
(format #t "~a\n" str))
(errln "Creating pdf file…")
(let ((tmphtml (format #f "~a/text.html" *tmp-dir*)))
(with-output-to-file tmphtml
(lambda ()
(line "<!DOCTYPE html>")
(line "<html>")
(line " <head><meta charset=\"UTF-8\"></head>")
(line " <body style=\"padding: 2em; font-size: large;\">")
(line " <div style=\"padding: 0.5em; font-size:normal; font-weight: bold; border: 1px solid black;\">")
(line " Extracted from audio using stt on ")
(display (strftime "%c" (localtime (current-time))))
(line " </div>")
(line " <p>")
(display (call-with-input-file txt-file read-string))
(line " </p>")
(line "</body></html>")))
(sysexec *wkhtmltopdf* tmphtml out)))
```
## Putting it together
The main function now puts everything together. The `process-file`
function is called for every file that is returned from
`(find-audio-files)`. It will extract the necessary information (like
the language) from the json document via record accessors (e.g.
`original-file-lanugage file)`) and then calls the functions defined
above. At last it creates a `<itemfile>` record with `make-itemfiles`.
An `<itemfile>` record contains now the important information for
Docspell. It requires the item-id and a mapping from attachment-ids to
files in `OUTPUT_DIR`. For each attachment identified by its ID,
Docspell replaces the extracted text with the contents of the given
file and replaces the converted PDF file, respectively. In the code
below, two lists of such mappings are defined - the first for the text
files, the second for the converted pdf. The files must be specified
relative to `OUTPUT_DIR`.
That means `process-all` returns a list of `<itemfile>` records which
is then used to create the `<output>` record. And finally, a
`output->json` function will turn the record into proper JSON which is
send to stdout.
```lisp
(define (process-file itemid file)
"Processing a single audio file."
(let* ((id (original-file-id file))
(mime (original-file-mimetype file))
(lang (original-file-language file))
(txt-file (format #f "~a/~a.txt" *output-dir* id))
(pdf-file (format #f "~a/~a.pdf" *output-dir* id))
(wav (convert-wav id mime))
(model (get-model lang)))
(extract-text model wav txt-file)
(create-pdf txt-file pdf-file)
(make-itemfiles itemid
`((,id . ,(format #f "~a.txt" id)))
`((,id . ,(format #f "~a.pdf" id))))))
(define (process-all)
(let ((item-id (itemdata-id *itemdata-json*)))
(map (lambda (file)
(process-file item-id file))
(find-audio-files))))
(define (main args)
(let ((out (make-output (process-all))))
(format #t "~a" (output->json out))))
```
Example output:
```json
{
"files": [
{
"itemId":"qZDnyGIAJsXr",
"textFiles": { "HPFvIDib6eA": "HPFvIDib6eA.txt" },
"pdfFiles": { "HPFvIDib6eA": "HPFvIDib6eA.pdf"}
}
]
}
```
# Packaging
Now with that script some additional plumbing is needed to make it an
"Addon" for Docspell.
The external tools - stt, ffmpeg, curl and wkhtmltopdf are required as
well as guile to compile and interpret the script. Also the guile-json
module must be installed.
This can turn into a quite tedious task. Luckily, there is
[nix](https://nixos.org) that has an answer to this. A user who wants
to use this script only needs to install nix. This package manager
then takes care of providing the exact dependencies we need (down to
the correct version and including guile as the language and runtime).
## A flake
Everything is defined in the `flake.nix` in the source root. It looks
like this:
```nix
{
description = "A docspell addon for basic audio file support";
inputs = {
utils.url = "github:numtide/flake-utils";
# Nixpkgs / NixOS version to use.
nixpkgs.url = "nixpkgs/nixos-21.11";
};
outputs = { self, nixpkgs, utils }:
utils.lib.eachSystem ["x86_64-linux"] (system:
let
pkgs = import nixpkgs {
inherit system;
overlays = [
];
};
name = "audio-files-addon";
in rec {
packages.${name} = pkgs.callPackage ./nix/addon.nix {
inherit name;
};
defaultPackage = packages.${name};
apps.${name} = utils.lib.mkApp {
inherit name;
drv = packages.${name};
};
defaultApp = apps.${name};
## … omitted for brevity
}
);
}
```
First sad thing is, that only `x86_64` systems are supported. This is
due to `stt` not being available on other platforms currently (as
provided by nixpkgs).
The rest is a bit magic: A package and "defaultPackage" is defined
with a reference to `nix/addon.nix`. The important part is the line
```nix
inputs = {
# Nixpkgs / NixOS version to use.
nixpkgs.url = "nixpkgs/nixos-21.11";
};
```
It says that as input for "building" the script, we take all of
[nixpkgs](https://github.com/NixOS/nixpkgs) which is a package
collection defined for (and in) nix - including thousands of software
packages. We can pick and choose from these. No surprise, all external
tools we need are included!
A flake defines the inputs and outputs of a package. With all of
nixpkgs as inputs, we can create a definition to elevate this script
into a *package*.
## Package definition
The definition for "building" the script is in `nix/addon.nix`:
```nix
{ stdenv, bash, cacert, curl, stt, wkhtmltopdf, ffmpeg, guile, guile-json, lib, name }:
stdenv.mkDerivation {
inherit name;
src = lib.sources.cleanSource ../.;
buildInputs = [ guile guile-json ];
patchPhase = ''
TARGET=src/addon.scm
sed -i 's,\*curl\* "curl",\*curl\* "${curl}/bin/curl",g' $TARGET
sed -i 's,\*ffmpeg\* "ffmpeg",\*ffmpeg\* "${ffmpeg}/bin/ffmpeg",g' $TARGET
sed -i 's,\*stt\* "stt",\*stt\* "${stt}/bin/stt",g' $TARGET
sed -i 's,\*wkhtmltopdf\* "wkhtmltopdf",\*wkhtmltopdf\* "${wkhtmltopdf}/bin/wkhtmltopdf",g' $TARGET
'';
buildPhase = ''
guild compile -o ${name}.go src/addon.scm
'';
# module name must be same as <filename>.go
installPhase = ''
mkdir -p $out/{bin,lib}
cp ${name}.go $out/lib/
cat > $out/bin/${name} <<-EOF
#!${bash}/bin/bash
export SSL_CERT_FILE="${cacert}/etc/ssl/certs/ca-bundle.crt"
exec -a "${name}" ${guile}/bin/guile -C ${guile-json}/share/guile/ccache -C $out/lib -e '(${name}) main' -c "" \$@
EOF
chmod +x $out/bin/${name}
'';
}
```
With a bit of handwaving - this is a bash script that modifies
slightly the scheme script and runs a compile on it. We simply declare
all packages we need in the first line of `{ … }` - these are
arguments that are automatically filled by nix by searching the
corresponding package in nixpkgs.
First the `patchPhase` is executed. It will replace the variables
containing the external tools with an absolute path to the version
that we currently get from nixpkgs. With this step nix takes care that
all these packages are available *at runtime* when executing the
script. All versions are finally fixed in `flake.lock` and can be
upgraded manually.
The `buildPhase` runs the guile compiler that produces some
intermediate code that will be loaded instead of compiling the script
on-the-fly.
At last, `installPhase` creates a wrapper script that runs guile with
the correct load-path pointing to `guile-json` and to our pre-compiled
script. Additionally, trusted root certificates are exported to make
the curl commands work. This script will be created in `$out`
directory that is provided by nix.
If you now run `nix build` in the source root, it will execute all
these phases and produce a symlink pointing to the result. You can
then `cat` the resulting file if you are curious.
This way the script is completely isolated from the system it runs
on - as long as the nix package manager is available. It includes all
the external tools, as well as the underlying runtime (guile)! The
result is a tiny wrapper bash script that can be run "everywhere"
(modulo all the restrictions, like non-x86_64 platforms, of course
:)).
## Addon Descriptor
At last, a small yaml file is needed to tell Docspell a little about
the addon.
```yaml
meta:
name: "audio-files-addon"
version: "0.1.0"
description: |
This addon adds support for audio files. Audio files are processed
by a speech-to-text engine and a pdf is generated.
It doesn't expect any user arguments at the moment. It requires
internet access to download model files.
triggers:
- final-process-item
- final-reprocess-item
- existing-item
runner:
nix:
enable: true
docker:
enable: false
trivial:
enable: true
exec: src/addon.scm
options:
networking: true
collectOutput: true
```
This tells Docspell via `triggers` when this addon may be run. This
one only makes sense for an item. Thus it can be hooked up to run with
every file-processing job or a user can manually trigger it on an
item.
It also tells via `runner:` that it can be build and run via nix, but
not via docker (I gave up after an hour to create a Dockerfile…). It
could also be run "as-is" but the user then needs to install all these
tools and guile manually.
# Done
That's it. You can install this addon in Docspell and create a run
configuration to let it execute when you want.

View File

@ -0,0 +1,11 @@
+++
title = "Addons"
insert_anchor_links = "right"
description = "Describes how addons work."
weight = 55
template = "pages.html"
sort_by = "weight"
redirect_to = "docs/addons/basics"
+++
No content here.

Binary file not shown.

After

Width:  |  Height:  |  Size: 105 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 109 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 155 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 164 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 238 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 242 KiB

View File

@ -0,0 +1,149 @@
+++
title = "Basics"
insert_anchor_links = "right"
description = "Docspell Addons."
weight = 10
template = "docs.html"
+++
# Addons
Addons allow to execute custom software within a defined context in
Docspell. The idea is to be able to support new features and amend
existing ones.
{% warningbubble(title="Experimental") %} Addons are considered
experimental. The interaction between addons and Docspell is still
subject to change.
The intended audience for addons are developers (to create addons) and
technically inclined users to install, configure and use them.
{% end %}
Despite the warning above, addons are a nice way to amend your
docspell server with new things, you are encouraged to try it out and
give feedback ;-).
{% infobubble(title="Enable addons manually") %}
Addons are disabled by default. They must be enabled in the config
file of the restserver!
{% end %}
## What is an Addon?
An addon is a zip file that contains a `docspell-addon.yml` (or .yaml
or .json) file in its root. The `docspell-addon.yml` is the *addon
descriptor* telling how to run and optionally build the addon. In the
ZIP file, an addon provides a program that expects one argument which
is a file containing the user input for the addon. Addons can
communicate back to docspell via their stdout and/or via directly
calling the docspell server as part of their program.
## What can Addons do?
Addons can accept user input and are arbitrary external programs that
can do whatever they want. However, Docspell can embed running addons
in restricted environments, where they don't have network for example.
Addons can safely communicate to Docspell via their stdout output
returning instructions that Docspell will realise.
Running addons is managed by docspell. Currently they can be executed:
- as the final step when processing or re-procssing an item. They then
have access to all the item data that has been collected during
processing (id, extracted text, converted pdfs, etc) and it can work
with that. It may, for example, set more tags or custom fields.
- trigger manually on some existing item
- periodically defined by a schedule. This executes the addons only
with the configured user input.
- … (maybe more to come)
Since an addon may not make sense to run on all these situations, it
must define a sensible subset via the `triggers` option in its
descriptor.
## How are they run
Addons are always executed by the joex component as an external
process, therefore they can be written in any programming or scripting
language.
That means the machine running joex possibly needs to match the
requirements of each addon. To ease this, addons can provide a [nix
descripton](https://nixos.wiki/wiki/Flakes) or a `Dockerfile`. Then
you need to prepare the machine only with two things (nix and docker)
to have the prerequisites for running many addons.
# More …
Addons are a flexible way to extend Docspell and require some
technical affinity. However, only "using" addons should not be that
hard, but it will always depend on the documentation of the addon and
its own complexity.
As the user, you may have different views: preparing the server to be
able to run addons, writing your own addons and finally using them
The following sections are divided these perspectives:
## Using Addons
Addons must be installed and then configured in order before they can
be used. [Using Addons](@/docs/addons/using.md) describes this
perspective.
{{ buttonright(href="/docs/addons/using", text="More…") }}
## Control how addons are run
As the owner of your server, you want to [control how addons are
run](@/docs/addons/control.md). Since addons are arbitrary programs,
potentially downloaded from the internet, they can be run in a
restricted environment.
{{ buttonright(href="/docs/addons/control", text="More…") }}
## Write custom addons
Finally, [writing addons](@/docs/addons/writing.md) requires (among
other things) to know how to interact with Docspell and what package
format is expected.
{{ buttonright(href="/docs/addons/writing", text="More…") }}
<!-- ## Goals -->
<!-- - Convenient for addon creators. Addons can be written in any -->
<!-- programming language and have a very light contract: they receive -->
<!-- one input argument and _may_ return structured data to instruct -->
<!-- docspell what to do. If not they can execute abritrary code to call -->
<!-- the server directly. -->
<!-- - Server administrators control how they are executed. Since addons -->
<!-- may run anything, the execution should be able to locked down when -->
<!-- wanted. -->
<!-- - Users can install and configure addons via the web interface easily. -->
<!-- It should be easy for addon creators to document how users can use -->
<!-- them. -->
<!-- # TODOs -->
<!-- - what if joex is running inside a container alread? -->
<!-- - some use cases: -->
<!-- - I want an addon to do some stuff when processing files -->
<!-- - my files named "something_bla" are always this specific document -->
<!-- and so very specific processing would be great -->
<!-- - I want XYZ files to work (e.g. mp3?) -->
<!-- - I want to generate previews for video files -->
<!-- - Example Addons: -->
<!-- - swiss qr code detection on invoices -->
<!-- - tags via regexes -->
<!-- - text extraction from audio? -->
<!-- - preview generation for video? -->

View File

@ -0,0 +1,238 @@
+++
title = "Control Runtime"
insert_anchor_links = "right"
description = "Control how addons are run"
weight = 30
template = "docs.html"
+++
# Control runtime of addons
Addons are run by the joex component as background tasks in an
external process. Depending on the machine it is running on, the addon
can be run
- inside a docker container
- inside a systemd-nspawn container
- directly on the machine
Addons can be provided as source packages, where the final program may
need to be built. They also can depend on other software. In order to
not prepare for each addon, it is recommended to install
[nix](https://nixos.org) with [flakes](https://nixos.wiki/wiki/Flakes)
and docker on the machine running joex.
Please also look at addon section in the [default
configuration](@/docs/configure/main.md#joex) for joex.
You need to explicitly enable addons in the restserver config file.
Docspell uses "runners" to execute an addon. This includes building it
if necessary. The following runner exist:
- `docker`: uses docker to build an run the addon
- `nix-flake`: builds via `nix build` and runs the executable in
`$out/bin`
- `trivial`: simply executes a file inside the addon (as specified in
the descriptor)
In the joex configuration you can specify which runners your system
supports.
## Prepare for *running* addons
Depending on how you want addons to be run, you need to install either
docker and/or systemd-nspawn on the machine running joex.
Additionally, the user running joex must be able to use these tools.
For docker it usually means to add the user to some group. For
systemd-nspawn you most likely want to configure `sudo` to run
passwordless the `systemd-nspawn` command.
Without this, an addon can only be run "directly" on the machine that
hosts joex (which might be perfectly fine). The addon then "sees" all
files on the machine and could potentially do harm.
It is recommended to install `nix` and `docker`, if possible. Addons
may only run with docker or only without, so supporting both leaves
more options.
## Prepare for *building* addons
Addons can be packaged as source or binary packages. For the former,
joex will build the addon first. There are two supported ways to do
so:
- via `docker build` when the addons provides a `Dockerfile` (use
runner `docker`)
- via `nix build` when the addon provides a `flake.nix` file (use
runner `nix-flake`)
Both build strategies will cache the resulting artifact, so subsequent
builds will be (almost) no-ops.
{% infobubble(title="Note") %}
*Building* addons requires to be connected to the internet! Running
them may not require a network connection.
{% end %}
If the addon is packaged as a binary, then usually the `trivial`
runner (possibly in combination with `systemd-nspawn`) can be used.
# Runtime
## Cache directory
Addons can use a "cache directory" to store data between runs. This
directory is not cleaned by docspell. If you have concerns about
space, use a cron job or systemd-timer to periodically clean this
directory.
## "Pure" vs "Impure"
Addons can talk back to Docspell in these ways: they can use the http
api, for example with [dsc](@/docs/tools/cli.md), or they can return
data to instruct Docspell to apply changes.
The former requires the addon to be connected to the network to reach
the Docspell *restserver*. This allows the addon to do arbitrary
changes at any time - this is the "impure" variant.
The second approach can be run without network connectivity. When
using docker or systemd-nspawn, Docspell will run these addons without
any network. Thus they can't do anything really, except return data
back to Docspell.
The pure way is much preferred! It allows for more consistent
behaviour, because Docspell is in charge for applying any changes.
Docspell can apply changes *only if* the addon returned successfully.
Addons can also be retried on error, because no changes happened yet.
It's the decision of the addon author, how the addon will work. It
should document whether it is pure or impure. You can also look into
the descriptor and check for a `networking: false` setting. As the
server administrator, you can configure Docspell to only accept pure
addons.
## Runners
### nix flake runner
For addons providing a `flake.nix` this runner can build it and find
the file to execute. With this `flake.nix` file addons can declare how
they should be build and what dependencies are required to run them.
The resulting executable can be executed via `systemd-nspawn` in a
restricted environment or directly on the machine.
{% infobubble(title="Requires") %}
You need to install [nix](https://nixos.org) and enable
[flakes](https://nixos.wiki/wiki/Flakes) to use this runner.
{% end %}
### docker
Addons can provide a Dockerfile or an image. If no image is given,
`docker build` will be run to build an image from the `Dockerfile`.
Then `docker run` is used to run the addon.
{% infobubble(title="Requires") %}
You need to install `docker` to use this runner.
{% end %}
### trivial
Addons can simply declare a file to execute. Docspell can use
`systemd-nspawn` to run it in an restricted environment, or it can be
run directly on the machine. This variant is only useful for very
simple addons, that don't require any special dependencies.
{% infobubble(title="Requires") %}
You need to check each addon for its requirements and prepare the
machine accordingly.
{% end %}
### Choosing runners
The config `addons.executor-config.runners` accepts a list of runners.
It specifies the preferred runner first. If an addon can be executed
via docker and nix, Docspell will choose the runner first in the list.
If you don't have nix installed, remove the `nix-flake` runner from
this list and same for docker, of course.
### systemd-nspawn
The `systemd-nspawn` can be used to run programs in a lightweight
ad-hoc container. It is available on most linux distributions (it is
part of systemd…). It doesn't require an image to exist first; this
makes it very convenient for running addons in a restricted
environment.
If you enable it in the config file, then all addons are either run
via `systemd-nspawn` or docker - and thus always in a restricted
environment, where they can only access their own files and the files
provided by Docspell.
The downside is that `systemd-nspawn` needs to be run as root (as far
as I know). Therfore, configure `sudo` to allow the user that is
running joex to execute `systemd-nspawn` non-interactively.
{% infobubble(title="Requires") %}
Install `systemd-nspawn` and enable the user running joex to use it
password-less via sudo.
{% end %}
# Within Docker
If joex itself is run as a docker container, things get a bit
complicated. The default image for joex does not contain `nix`, so the
`nix-flake` runner cannot be used out of the box.
In order to use the `docker` runner, the container must be configured
to access the hosts docker daemon. On most systems this can be
achieved by bind-mounting the unix socket (usually at
`/var/run/docker.sock`) into the container. Here is a snippet from the
provided `docker-compose` file:
```yaml
joex:
image: docspell/joex:latest
# ... left out for brevity
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /tmp:/tmp
```
Additionally to `/var/run/docker.sock`, it also bind mounts the `/tmp`
directory. This is necessary, because docker will be invoked with bind
mounts from inside the continer - but these must be available on the
host, because the docker client in the container actually runs the
command on the host.
The addon executor uses the systems temp-directory (which is usually
`/tmp`) as a base for creating a working and cache directory. Should
you change this in joex config file (or your system uses a different
default temp-dir), then the bind mount must be adapted as well.
Another variant is to extend the default joex image and add more
programs as needed by addons and then use the `trivial` runner.
# Summary / tl;dr
When joex is not inside a container:
- (optional) Install `systemd-nspawn` - it is provided on many
GNU/Linux distributions
- Configure `sudo` to allow the user running the joex component to
execute `systemd-nspawn` non-interactively (without requiring a
password)
- Install docker
- Install [nix](https://nixos.org) and enable
[flakes](https://nixos.wiki/wiki/Flakes)
- Allow the user who runs the joex component to use docker and nix. If
you install nix as multi-user, then this is already done.
- Check the section on addons in the [default
configuration](@/docs/configure/main.md#joex) for joex

View File

@ -0,0 +1,103 @@
+++
title = "Usage"
insert_anchor_links = "right"
description = "How to use addons"
weight = 20
template = "docs.html"
+++
# Using Addons
This shows with an example, how to install and use an addon. If the ui
doesn't show these forms, addons are probably disabled. Addons need to
be enabled in the config file of the rest server.
## Discovering
Addons can be installed from any URL to a zip file. One way is to use
URLs generated by forges like github or gitlab. They provide zip files
containing the repository contents. Alternatively an addon may provide
specific files in their release section.
For example, this is the url to the first release of the rotate-pdf
addon:
- <https://github.com/docspell/rotate-pdf-addon/archive/refs/tags/v0.1.0.zip>
This url points to a fixed version. It is also possible to use urls
that are "moving targets":
- <https://github.com/docspell/rotate-pdf-addon/archive/refs/heads/master.zip>
The contents behind the above url will very likely change over time.
For better discoverability, repositories for addons on public forges
can be tagged with *docspell-addon*.
## Install
With an URL like above, you can go to *Manage Data -> Addons -> New*
and insert the url:
{{ figure2(light="addon-install-01.png", dark="addon-install-01_dark.png") }}
It might take a while for Docspell to download, extract and verify the
addon. The addon will be downloaded into the database. Once installed,
the given URL is not used anymore, unless a manual update is issued.
After this finishes, you cannot change the URL anymore:
{{ figure2(light="addon-install-02.png", dark="addon-install-02_dark.png") }}
When using URLs pointing to "moving targets", you could click the
*Update Addon* button to re-download the contents at the url. This
doesn't make much sense for URLs to fixed versions (in *theory* these
could change as well, of course) and it is not without risk. It can be
useful for own addons to have them quickly updated.
Now the addon is installed. It can now be used by creating a *run configuration*.
## Run Configuration
A run configuration is comprised of one or more addons, their inputs
and some settings regarding their runtime environment.
The name is used for displaying in the webapp. You can disable/enable
a run configuration.
It is possible that addons use [dsc](@/docs/tools/cli.md) or call the
rest-server otherwise. Usually a valid session is required (to set
tags or do searches). When selecting to run *on behalf of a user*, a
valid authenticator for that user is injected into the environment of
the addon run.
The *Trigger Run* setting specfies when this run configuraiton should
be executed. You can choose from options that all addons in the list
must support. In this example, only `existing-item` is used. This
means the run configuration can be selected to run on any item.
Other options include:
- `final-process-item`: executes automatically as the last step when
processing uploaded files
- `final-reprocess-item`: like `final-process-item` but applies when
an existing item is reprocessed.
- `scheduled`: runs periodically based on a schedule (and independent
from any item)
Each addon may require arguments. Click on *Configure* to enable the
*Arguments* section and add arguments for the corresponding addon.
What to insert here is completely specific to the addon. In this case,
it expects a JSON object with only one field `"degree"` that indicates
how to rotate. In this example, it should be rotated by 90°
counter-clockwise. You need to click *Update* to set it into the addon
and then *Submit* to save everything.
{{ figure2(light="addon-install-03.png", dark="addon-install-03_dark.png") }}
With this run configuration in place, you can try it out on some item:
{{ figure2(light="addon-install-04.png", dark="addon-install-04_dark.png") }}
This example configured the *rotate-pdf-addon* to rotate left by 90°.
Create a simlar run configuration to rotate to the right.

View File

@ -0,0 +1,376 @@
+++
title = "Writing"
insert_anchor_links = "right"
description = "How to write addons"
weight = 20
template = "docs.html"
+++
# Writing Addons
Writing an addon can be divided into two things:
- create the program
- define how to package and run it
The next sections describe both parts. For a quick start, check out
the example addons.
As previously written, you can choose a language. The interaction with
docspell happens by exchanging JSON data. So, whatever you choose, it
should be possible to read and produce JSON with some convenience.
# Writing the program
## Interface to Docspell
The interface to Docspell is JSON data. The addon receives all inputs
as JSON and may return a JSON object as output (via stdout).
An addon can be executed in different contexts. Depending on this, the
available inputs differ. The addon always receives one argument, which
is a file containing the user supplied data (it may be empty). A user
is able to provide data to every addon from the web-ui.
All other things are provided as environment variables. There are
environment variables that are always provided and some are only
available for specific contexts.
For example, an addon that is executed in the context of an item
(maybe after processing or when a user selects an addon to run "on an
item"), Docspell prepares all data for the corresponding item and
makes it available to the addon. In contrast, an addon executed
periodically by a schedule, won't have this data available.
## Basic Environment
The following environment variables are always provided by Docspell:
- `ADDON_DIR` points to the directory containing the extracted addon
zip file
- `TMPDIR` / `TMP_DIR` a directory for storing temporary data
- `OUTPUT_DIR` a directory for storing files that should be processed
by docspell
- `CACHE_DIR` a directory for storing data that should stay between
addon runs
It is very much recommended to always use these environment variables
when reading and writing data. This keeps Docspell in control about
the exact location.
The working directory will be set to a directory that is also
temporary, but please don't rely on that. Use the environment
variables.
## Item data
When executed in the context of an item. Meaning for triggers:
`final-process-item`, `final-reprocess-item`, `existing-item`.
### `ITEM_DATA_JSON`
This environment variable points to a JSON file containing information
about the current item. If it is run at processing time, it includes
all information gathered so far by Docspell.
**Example**
{{ incl_json(path="templates/shortcodes/item-data") }}
### `ITEM_ARGS_JSON`
This environment variable points to a JSON file that contains the user
supplied information with an upload request. That is, a user may
specify tags or a language when uploading files. This would be in this
file.
*This is only available for uploads. Trigger `final-process-item`.*
**Example**
{{ incl_json(path="templates/shortcodes/item-args") }}
### `ITEM_ORIGINAL_JSON` and `ITEM_PDF_JSON`
These JSON files contains a list of objects. Each object provides
properties about a file - either the original file or the converted
pdf. The structure is the same.
**Example**
{{ incl_json(path="templates/shortcodes/file-meta") }}
### Directories
These environment variables point to directories that contain the attachment files.
- `ITEM_PDF_DIR` contains all converted pdf files, the attachment id is the filename
- `ITEM_ORIGINAL_DIR` contains all original files, the attachment id is the filename
For example, to obtain a converted pdf file, lookup the id in
`ITEM_PDF_JSON` and then construct the file name via
`ITEM_PDF_DIR/{id}`.
## Session for dsc
An addon may use [dsc](@/docs/tools/cli.md) which requires for many
commands a valid session identifier. Usually this is obtained by
logging in (i.e. using `dsc login`). This is not really feasible from
inside an addon, of course. Therefore you can configure an addon to
run on behalf of some user when creating the run configuration.
Docspell then generates a valid session identifier and puts it into
the environment. The [dsc](@/docs/tools/cli.md) tool will pick them up
automatically.
It will also setup the URL to connect to some restserver. (If you have
multiple rest-servers running, it will pick one randomly).
- `DSC_SESSION` env variable containing a session identifier. It's
validity is coupled on the configured timeout.
- `DSC_DOCSPELL_URL` the base url to some rest server
That means when using an addon in this way, you can simply use `dsc`
without worrying about authentication or the correct URL to connect
to.
## Output
Docspell doesn't interpret the returncode of an addon, except checking
for being equal to `0` which indicates a successful run.
In order to do change data in Docspell, the addon program can run
`dsc` (for example) to change some state - like setting tags etc. But
the preferred approach would be to return instructions for Docspell.
Docspell will execute the instructions when the addon terminates
successfully - that is with return code `0`.
These instructions are in a JSON object which needs to go to stdout.
You can use stderr in an addon for logging/debugging purposes. But if
you specify `collectOutput: true` in the descriptior, then stdout must
only return this specific JSON (or nothing, empty output is ignored).
You find the complete structure below. It consists of these parts:
- `commands`: let's you declare actions to do for an item or attachment
- `files`: defines files relative to `OUTPUT_DIR` that should be
processed
- `newItems`: declares files relative to `OUTPUT_DIR` that should be
processed as new uploads
The `commands` allows to set tags, fields and other things. All parts
are optional, you don't need to return the complete structure. Just
returning `commands` or only `files` is ok.
**Example**
{{ incl_json(path="templates/shortcodes/addon-output") }}
# Descriptor
An addon must provide an *addon descriptior*, which is a yaml or json
file looking like this:
```yaml
# The meta section is required. Name and version must not contain
# whitespace
meta:
name: "name-of-addon"
version: "2.21"
description: |
Describe the purpose and how it must be used here
# Defining when this addon is run. This is used to guide the user
# interface in selecting an addon. At least one is required to specify.
#
# Possible values:
# - scheduled: requires to enter a timer to run this addon periodically
# - final-process-item: the final step when processing an item
# - final-reprocess-item: the final step when reprocessing an item
# - existing-item: A user selects the addon to run on an item
triggers:
- final-process-item
- final-reprocess-item
- existing-item
# How to build and run this addon (optional). If missing, auto
# detection will enable a nix runner if a `flake.nix` is found in the
# source root and docker if a `Dockerfile` is found.
#
# Both runners are compared to what is enabled at the server.
runner:
# Building the program using nix flakes. This requires a flake.nix
# file in the source root with a default package and a flake-enabled
# nix on the joex machine.
#
# The program is build via `nix build`. If the joex machine has
# systemd-nspawn installed, it is used to run the addon inside a
# container. Otherwise the addon is run directly on the machine.
nix:
enable: true
# Docker based runner can define a custom image to use. If a `build`
# key exists pointing to a Dockerfile, the image is build before. If
# the docker image is complex, you can build it independently and
# provide the pre-build image.
#
# The program is run via `docker run` passing the arguments to the
# addon. Thus it expects the entrypoint to be correctly configured
# to the executable. You may use `args` in order to prepend
# additional arguments, like the path to an executable if the image
# requires that. The joex machine must have docker installed and the
# user running joex must be allowed to use docker. You must either
# define an image with an appropriate entry point or a dockerfile.
docker:
enable: false
#image: myorg/myimage:latest
build: Dockerfile
# Trivial runner that simply executes the file specified with
# `exec`. Nothing is build before. This runner usually requires that
# the joex machine contains all dependencies needed to run the
# addon. You may need to install additional software on the machine
# running joex.
trivial:
enable: false
exec: src/addon.sh
# Optional arguments/options given to the program. The program
# receives at least one argument, which is a file to the user input as
# supplied in the application. The arguments here are prepended.
args:
options:
# If false, the program is run inside a private network, blocking
# traffic to the host and networks reachable from there. This only
# applies if the addon can be run inside a container.
#
# If the addon runs side effects (such as using dsc to set tags),
# this must be set to `true`.
#
# Default is false.
networking: true
# If true, the stdout of the program is parsed into a JSON structure
# that is interpreted as actions executed by the task that runs the
# addon. If the addon runs side effects only, set this to `false`
# and the output is ignored.
#
# It is recommended to use this approach, if possible. It allows
# docspell itself to apply any changes and the addon can run
# completely isolated.
#
# Default is false.
collectOutput: true
```
# Packaging
Docspell can use different ways to build and run the addon:
`nix-flake`, `docker` and `trivial`. The first two allow to package
the addon in a defined way (with a single dependency, either nix or
docker) and then execute it independently from the underlying system.
This makes it possible to execute the addon on a variety of systems.
This is especially useful for addons that are meant to be public and
reusable by different people.
The "trivial" runner is only executing some program specified in
`docspell-addon.yaml`, directly on the joex machine (or via
`systemd-nspawn`). The machine running joex must then provide all
necessary dependencies and it must be compatible to run the addon. It
may be useful especially for personal addons.
## nix flake
Using [nix](https://nixos.org) with
[flakes](https://nixos.wiki/wiki/Flakes) enabled, is the recommended
approach. It is very flexible and reproducible while sharing most
dependencies (in contrast to docker where each image contains the same
packages again and again).
Docspell runs `nix build` to build the addon and then executes the
file produced to `$out/bin`.
## docker
For docker it is recommended to provide pre-build images. Docspell can
build images from provided `Dockerfile`, but for larger images it
might be better to do this apriori.
Docspell will run the addon using `docker run …` passing it only the
user-input file as argument. Thus the image must define an appropriate
`ENTRYPOINT`.
# Examples
## Minimal Addon
The steps below create a minimal addon:
1. Create a bash script `addon.sh` with this content:
```bash
#!/usr/bin/env bash
echo "Hello world!"
```
2. Make it executable:
```bash
chmod +x addon.sh
```
3. Create a yaml file `docspell-addon.yaml` with this content:
```yaml
meta:
name: "minimal-addon"
version: "0.1.0"
triggers:
- existing-item
- scheduled
runner:
trivial:
enable: true
exec: addon.sh
```
4. Create a zip file containing these two files:
```bash
zip addon.zip docspell-addon.yaml addon.sh
```
The addon is now ready. Make it available via an url (use some file
sharing tool, upload it somewhere etc) and then it can be installed
and run.
## Non-Minimal Addon
The minimal example above is good to see what is required, but it is
not very useful…. Please see this post about the [audio file
addon](@/blog/2022-05-16_audio_file_addon.md) that walks through a
more useful addon.
# Misc
## Advantages of "pure" addons
Although the output structure is not set in stone, it is recommended
to use this in contrast to directly changing state via `dsc`.
- outputs of all addons are collected and only applied if all were
successful; in contrast side effects are always applied even if the
addon fails shortly after
- since addons are executed as joex tasks, their result can be send as
events to another http server for further processing.
- addons can run in an isolated environment without network (no data
can go out)
## Use addons in other addons?
This can be achieved very conveniently by using `nix`. If addons are
defined as a nik flake, they can be easily consumed by each other.

View File

@ -82,6 +82,7 @@ template = "docs.html"
- zip
- [eml](https://en.wikipedia.org/wiki/Email#Filename_extensions)
(e-mail files in plain text MIME)
- Extend Docspell via [addons](@/docs/addons/basics.md)
- Tooling:
- [Command Line Interface](@/docs/tools/cli.md) allowing to upload
files, watch folders and many more!

View File

@ -1,4 +1,4 @@
``` bash
{% set data = load_data(path=path) %}
``` bash
{{ data | safe }}
```

View File

@ -0,0 +1,4 @@
{% set data = load_data(path=path) %}
``` json
{{ data | safe }}
```

View File

@ -0,0 +1,86 @@
package docspell.website
import cats.syntax.all._
import docspell.addons.out.{AddonOutput, ItemFile, NewFile, NewItem}
import docspell.addons.out.NewFile.{Meta => FileMeta}
import docspell.addons.out.NewItem.{Meta => ItemMeta}
import docspell.common._
import docspell.common.bc.{AttachmentAction, BackendCommand, ItemAction}
import io.circe.syntax._
object AddonOutputExample extends Helper {
val example = AddonOutput(
commands = List(
BackendCommand.ItemUpdate(
itemId = id("XabZ-item-id"),
actions = List(
ItemAction.AddTags(Set("tag1", "tag2")),
ItemAction.ReplaceTags(Set("tagX", "tagY")),
ItemAction.RemoveTags(Set("tag0", "tag9")),
ItemAction.RemoveTagsCategory(Set("doc-type")),
ItemAction.SetFolder("folder-name".some),
ItemAction.SetCorrOrg(id("OaIy-org-ID").some),
ItemAction.SetCorrPerson(id("OaIy-person-ID").some),
ItemAction.SetConcPerson(id("AEiae-person-ID").some),
ItemAction.SetConcEquipment(id("AEiae-equipment-ID").some),
ItemAction.SetField(id("eur"), "12.99"),
ItemAction.SetName("new item name"),
ItemAction.SetNotes("replace notes with this".some),
ItemAction.AddNotes("More notes appended", Some("-----"))
)
),
BackendCommand
.AttachmentUpdate(
itemId = id("XabZ-item-id"),
attachId = id("Atca-attach-id"),
actions = List(
AttachmentAction.SetExtractedText("replace extracted text with this".some)
)
)
),
files = List(
ItemFile(
id("iZtb-item-id"),
textFiles = Map("attach-id" -> "newtext.txt"),
pdfFiles = Map("attach-id" -> "better.pdf"),
previewImages = Map("attach-id" -> "better-preview.png"),
newFiles = List(
NewFile(
metadata = FileMeta(
language = Some(Language.English),
skipDuplicate = Some(true),
attachmentsOnly = Some(false)
),
file = "new-file1.docx"
),
NewFile(
metadata = FileMeta(
language = Some(Language.German),
skipDuplicate = Some(true),
attachmentsOnly = Some(false)
),
file = "new-file2.pdf"
)
)
)
),
newItems = List(
NewItem(
metadata = ItemMeta(
language = Some(Language.English),
direction = Direction.Incoming.some,
folderId = id("my-folder").some,
source = "the-addon-x".some,
skipDuplicate = true.some,
tags = List("tag1", "tag2").some,
attachmentsOnly = None
).some,
files = List("a-file.pdf", "another.jpg")
)
)
)
def exampleJson =
example.asJson.spaces2
}

View File

@ -0,0 +1,23 @@
package docspell.website
import docspell.addons.out._
import docspell.common.bc._
import io.circe.syntax._
object AddonOutputMiniExample extends Helper {
val example = AddonOutput(
commands = List(
BackendCommand.ItemUpdate(
itemId = id("XabZ-item-id"),
actions = List(
ItemAction.AddTags(Set("tag1", "tag2"))
)
)
)
)
def exampleJson =
example.asJson.spaces2
}

View File

@ -0,0 +1,37 @@
package docspell.website
import cats.syntax.option._
import docspell.common.{ByteSize, Language, MimeType}
import docspell.store.queries.AttachedFile
import io.circe.syntax._
import scodec.bits.ByteVector
object FileMetaExample extends Helper {
val example1 = AttachedFile(
id = randomId,
name = "the filename.png".some,
position = 0,
language = Language.English.some,
mimetype = MimeType.png,
length = ByteSize(454654L),
checksum = ByteVector.fromValidHex("caffe0caffe").digest("sha256")
)
val example2 = AttachedFile(
id = randomId,
name = "other filename.png".some,
position = 1,
language = Language.English.some,
mimetype = MimeType.pdf,
length = ByteSize(1232214L),
checksum = ByteVector.fromValidHex("eff0eff0eff").digest("sha256")
)
val example = List(
example1,
example2
)
val exampleJson = example.asJson.spaces2
}

View File

@ -0,0 +1,29 @@
package docspell.website
import docspell.common.{IdRef, Ident, Timestamp}
import scodec.bits.ByteVector
import java.time.LocalDate
import scala.util.Random
trait Helper {
def id(str: String): Ident = Ident.unsafe(str)
val date20220514 = Timestamp.atUtc(LocalDate.of(2022, 5, 14).atTime(11, 22, 12))
val cid = id("collective")
implicit final class StringExt(self: String) {
def id: Ident = Ident.unsafe(self)
}
def idRef(name: String): IdRef = IdRef(randomId, name)
def randomId = {
val buffer = Array.ofDim[Byte](6)
new Random().nextBytes(buffer)
id(ByteVector.view(buffer).toBase58)
}
}

View File

@ -0,0 +1,25 @@
package docspell.website
import cats.syntax.option._
import docspell.common.{Language, ProcessItemArgs}
import io.circe.syntax._
object ItemArgsExample extends Helper {
val example = ProcessItemArgs.ProcessMeta(
collective = cid,
itemId = None,
language = Language.English,
direction = None,
sourceAbbrev = "scanner",
folderId = None,
validFileTypes = Seq.empty,
skipDuplicate = true,
fileFilter = None,
tags = List("given-tag-1").some,
reprocess = false,
attachmentsOnly = None
)
val exampleJson = example.asJson.spaces2
}

View File

@ -0,0 +1,75 @@
package docspell.website
import cats.syntax.option._
import docspell.common.MetaProposal.Candidate
import docspell.common._
import docspell.joex.process.ItemData
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem}
import io.circe.syntax._
object ItemDataExample extends Helper {
private val proposals: MetaProposalList = MetaProposalList(
List(
MetaProposal(MetaProposalType.CorrOrg, Candidate(idRef("Acme AG"), Set.empty)),
MetaProposal(
MetaProposalType.ConcPerson,
Candidate(idRef("Derek Jeter"), Set.empty)
)
)
)
private val givenProposals: MetaProposalList = MetaProposalList.empty
val example = ItemData(
item = RItem(
id = id("UyZ-item-id"),
cid = cid,
name = "yearly report 2021",
itemDate = date20220514.some,
source = "webapp",
direction = Direction.Incoming,
state = ItemState.Processing,
corrOrg = None,
corrPerson = None,
concPerson = None,
concEquipment = None,
inReplyTo = None,
dueDate = None,
created = date20220514,
updated = date20220514,
notes = None,
folderId = None
),
attachments = Vector(
RAttachment(
id = id("Apa-attach-id"),
itemId = id("UyZ-item-id"),
fileId = FileKey(cid, FileCategory.AttachmentConvert, id("abcxyz")),
position = 0,
created = date20220514,
name = "report_year_2021.pdf".some
)
),
metas = Vector(
RAttachmentMeta(
id = id("Apa-attach-id"),
content = "this is the extracted text …".some,
nerlabels = Nil,
proposals = proposals,
pages = 2.some,
language = Language.English.some
)
),
dateLabels = Vector.empty,
originFile = Map(
id("Apa-attach-id") -> FileKey(cid, FileCategory.AttachmentSource, "yanetar".id)
),
givenMeta = givenProposals,
tags = List("tag-1"),
classifyProposals = MetaProposalList.empty,
classifyTags = List("invoice")
)
val exampleJson = example.asJson.spaces2
}

View File

@ -0,0 +1,64 @@
package docspell.website
import cats.effect.{ExitCode, IO, IOApp}
import fs2.io.file.{Files, Path}
import fs2.Stream
import io.circe.Encoder
import io.circe.syntax._
object Main extends IOApp {
override def run(args: List[String]) =
args match {
case "addon-output" :: file :: Nil =>
if (file.isEmpty) ok(stdout(AddonOutputExample.exampleJson))
else ok(AddonOutputExample.example.writeFile(file))
case "addon-output-tags" :: file :: Nil =>
if (file.isEmpty) ok(stdout(AddonOutputMiniExample.exampleJson))
else ok(AddonOutputMiniExample.example.writeFile(file))
case "item-data" :: file :: Nil =>
if (file.isEmpty) ok(stdout(ItemDataExample.exampleJson))
else ok(ItemDataExample.example.writeFile(file))
case "item-args" :: file :: Nil =>
if (file.isEmpty) ok(stdout(ItemArgsExample.exampleJson))
else ok(ItemArgsExample.example.writeFile(file))
case "file-meta" :: file :: Nil =>
if (file.isEmpty) ok(stdout(FileMetaExample.exampleJson))
else ok(FileMetaExample.example.writeFile(file))
case v :: Nil =>
err(stderr(s"Unknown example: $v"))
case _ =>
err(stderr("Specify what example to print"))
}
def stdout(str: String, args: Any*): Unit =
Console.out.println(str.format(args: _*))
def stderr(str: String, args: Any*): Unit =
Console.err.println(str.format(args: _*))
def ok(f: IO[Unit]): IO[ExitCode] =
f.as(ExitCode.Success)
def ok(p: => Unit): IO[ExitCode] =
ok(IO(p))
def err(p: => Unit): IO[ExitCode] =
IO(p).as(ExitCode.Error)
implicit class WriteOps[A: Encoder](self: A) {
def writeFile(file: String): IO[Unit] =
Stream
.emit(self.asJson.spaces2)
.covary[IO]
.through(fs2.text.utf8.encode)
.through(Files[IO].writeAll(Path(file)))
.compile
.drain
}
}