Addon docs
5
.gitignore
vendored
@ -15,6 +15,11 @@ _site/
|
||||
/website/site/static/syntax-*.css
|
||||
/website/site/static/webfonts/
|
||||
/website/site/static/files/*.woff*
|
||||
/website/site/static/examples/
|
||||
/website/site/templates/shortcodes/addon-output
|
||||
/website/site/templates/shortcodes/item-data
|
||||
/website/site/templates/shortcodes/item-args
|
||||
/website/site/templates/shortcodes/file-meta
|
||||
/website/site/templates/shortcodes/server.conf
|
||||
/website/site/templates/shortcodes/sample-exim.conf
|
||||
/website/site/templates/shortcodes/joex.conf
|
||||
|
22
build.sbt
@ -966,8 +966,28 @@ val website = project
|
||||
)
|
||||
IO.append(target, IO.readBytes(changelog))
|
||||
Seq(target)
|
||||
}.taskValue
|
||||
}.taskValue,
|
||||
zolaPrepare := {
|
||||
val log = streams.value.log
|
||||
log.info("Generating examples…")
|
||||
val templateOut = baseDirectory.value / "site" / "templates" / "shortcodes"
|
||||
IO.createDirectory(templateOut)
|
||||
|
||||
// sbt crashes when interpolating values into the string in `toTask`
|
||||
// this is the reason for the followingy construct…
|
||||
(Compile / run).toTask(s" addon-output /tmp/addon-output.json").value
|
||||
(Compile / run).toTask(s" item-data /tmp/item-data.json").value
|
||||
(Compile / run).toTask(s" item-args /tmp/item-args.json").value
|
||||
(Compile / run).toTask(s" file-meta /tmp/file-meta.json").value
|
||||
|
||||
val inputs = List("addon-output", "item-data", "item-args", "file-meta")
|
||||
|
||||
inputs.foreach { name =>
|
||||
IO.move(file(s"/tmp/$name.json"), templateOut / name)
|
||||
}
|
||||
}
|
||||
)
|
||||
.dependsOn(addonlib, joex)
|
||||
|
||||
val root = project
|
||||
.in(file("."))
|
||||
|
@ -23,6 +23,7 @@ object ZolaPlugin extends AutoPlugin {
|
||||
"'python -m SimpleHTTPServer 1234' for example."
|
||||
)
|
||||
val zolaCheck = taskKey[Unit]("Runs zola check to check links")
|
||||
val zolaPrepare = taskKey[Unit]("Some task to run before generating docs")
|
||||
}
|
||||
|
||||
import autoImport._
|
||||
@ -33,10 +34,12 @@ object ZolaPlugin extends AutoPlugin {
|
||||
zolaOutputDir := target.value / "zola-site",
|
||||
zolaCommand := "zola",
|
||||
zolaTestBaseUrl := "http://localhost:1234",
|
||||
zolaPrepare := {},
|
||||
zolaBuild := {
|
||||
val logger = streams.value.log
|
||||
logger.info("Building web site using zola ...")
|
||||
(Compile / resources).value
|
||||
zolaPrepare.value
|
||||
buildSite(zolaCommand.value, zolaRootDir.value, zolaOutputDir.value, None, logger)
|
||||
logger.info("Website built")
|
||||
},
|
||||
@ -45,6 +48,7 @@ object ZolaPlugin extends AutoPlugin {
|
||||
val baseurl = zolaTestBaseUrl.value
|
||||
logger.info("Building web site (test) using zola ...")
|
||||
(Compile / resources).value
|
||||
zolaPrepare.value
|
||||
buildSite(
|
||||
zolaCommand.value,
|
||||
zolaRootDir.value,
|
||||
|
@ -19,7 +19,7 @@ out_base="$1"
|
||||
work_dir=$(mktemp -dt screenshot2-script.XXXXXX)
|
||||
export HOME=$work_dir
|
||||
export RATIO="16:9"
|
||||
export WAIT_SEC=4
|
||||
export WAIT_SEC=${WAIT_SEC:-4}
|
||||
#export TOP_CUT=400
|
||||
|
||||
dsc write-default-config
|
||||
|
581
website/site/content/blog/2022-05-16_audio_file_addon.md
Normal file
@ -0,0 +1,581 @@
|
||||
+++
|
||||
title = "Addon for audio file support"
|
||||
[extra]
|
||||
author = "eikek"
|
||||
+++
|
||||
|
||||
# 1st Addon: Audio file support
|
||||
|
||||
Since version 0.36.0 Docspell can be extended by
|
||||
[addons](@/docs/addons/basics.md) - external programs that are
|
||||
executed at some defined point in Docspell. This is a walk through the
|
||||
first addon that was created, mainly as an example: providing support
|
||||
for audio files.
|
||||
|
||||
<!-- more -->
|
||||
|
||||
I think it is interesting to provide support for audio files for a
|
||||
DMS, although admittedly I don't have much of a use :). But this is
|
||||
the kind of use-case that addons are for.
|
||||
|
||||
# The idea
|
||||
|
||||
The idea is very simple: the real work is done by external programs,
|
||||
most notably [coqui's stt](https://github.com/coqui-ai/STT) a deep
|
||||
learning toolkit originally created at Mozilla. It provides a command
|
||||
line tool that accepts a WAV file and spits out text. Perfect!
|
||||
|
||||
With this text, a PDF file can be created and a preview image which is
|
||||
already enough for basic support. You can see the pdf in the web-ui
|
||||
and search for the text via SOLR or PostgreSQL.
|
||||
|
||||
Because a WAV file is not the most popular format today, `ffmpeg` can
|
||||
be used to transform any other audio to WAV.
|
||||
|
||||
The only thing now is to create a program that checks the uploaded
|
||||
files, filters out all audio files and runs them through the mentioned
|
||||
programs. So let's do this.
|
||||
|
||||
# Preparation
|
||||
|
||||
Addons are external programs and can be written in whatever language….
|
||||
For me this is a good opportunity to refresh my rusty scheme know-how
|
||||
a bit. So this addon is written in Scheme, in particular
|
||||
[guile](https://www.gnu.org/software/guile/). Programming in scheme is
|
||||
fun and guile provides good integration into the (posix) OS and also
|
||||
has a nice JSON module. I had the [reference
|
||||
docs](https://www.gnu.org/software/guile/docs/docs-2.2/guile-ref/index.html)
|
||||
open all the time - look at them for further details on the used
|
||||
functions.
|
||||
|
||||
It's usually good to play around with the tools at first. For stt, we
|
||||
first need to download a *model*. This will be used to "detect" the
|
||||
text in the audio data. They have a [page](https://coqui.ai/models)
|
||||
where we can download model files for any supported language. For the
|
||||
addon, we will implement English and German.
|
||||
|
||||
When creating a PDF with wkhtmltopdf, we prettify it a little by
|
||||
embedding the plain text into some html template. This will also take
|
||||
care to specifiy UTF-8 as default encoding directly in the HTML
|
||||
template.
|
||||
|
||||
FFMpeg just works as usual. It figures out the input format
|
||||
automatically and knows from the extension of the output file what to
|
||||
do.
|
||||
|
||||
You can find the full code
|
||||
[here](https://github.com/docspell/audio-files-addon/blob/master/src/addon.scm).
|
||||
The following shows excerpts from it with some explanation.
|
||||
|
||||
# The script
|
||||
|
||||
## Helpers
|
||||
|
||||
After the preamble, there are two helper functions.
|
||||
|
||||
```lisp
|
||||
(define* (errln formatstr . args)
|
||||
(apply format (current-error-port) formatstr args)
|
||||
(newline))
|
||||
|
||||
;; Macro for executing system commands and making this program exit in
|
||||
;; case of failure.
|
||||
(define-syntax sysexec
|
||||
(syntax-rules ()
|
||||
((sysexec exp ...)
|
||||
(let ((rc (apply system* (list exp ...))))
|
||||
(unless (eqv? rc EXIT_SUCCESS)
|
||||
(format (current-error-port) "> '~a …' failed with: ~#*~:*~d~%" exp ... rc)
|
||||
(exit 1))
|
||||
#t))))
|
||||
```
|
||||
|
||||
As this addon wants to pass data back to Docspell via stdout, we use
|
||||
the stderr for logging and printing general information. The function
|
||||
`errln` (short for "error line" :)) allows to conveniently print to
|
||||
stderr and the second wraps the `system*` procedure such that the
|
||||
script fails whenever the external program fails. It is somewhat
|
||||
similar to `set -e` in bash.
|
||||
|
||||
## Dependencies
|
||||
|
||||
Next is the declaration of external dependencies. At first all
|
||||
external programs are listed. This is important for later, when the
|
||||
script is packaged via nix. Nix will substitute these commands with
|
||||
absolute paths. Then it's good to not have them scattered around.
|
||||
|
||||
It also reads in the expected environment variables (only those we
|
||||
need) that are provided by Docspell. Since this addon only makes sense
|
||||
to work on an item, it quits early should some env vars are missing.
|
||||
|
||||
```lisp
|
||||
(define *curl* "curl")
|
||||
(define *ffmpeg* "ffmpeg")
|
||||
(define *stt* "stt")
|
||||
(define *wkhtmltopdf* "wkhtmltopdf")
|
||||
|
||||
;; Getting some environment variables
|
||||
(define *output-dir* (getenv "OUTPUT_DIR"))
|
||||
(define *tmp-dir* (getenv "TMP_DIR"))
|
||||
(define *cache-dir* (getenv "CACHE_DIR"))
|
||||
|
||||
(define *item-data-json* (getenv "ITEM_DATA_JSON"))
|
||||
(define *original-files-json* (getenv "ITEM_ORIGINAL_JSON"))
|
||||
(define *original-files-dir* (getenv "ITEM_ORIGINAL_DIR"))
|
||||
|
||||
;; fail early if not in the right context
|
||||
(when (not *item-data-json*)
|
||||
(errln "No item data json file found.")
|
||||
(exit 1))
|
||||
```
|
||||
|
||||
## Input/Output
|
||||
|
||||
The input and output schemas can be defined now. This uses the
|
||||
[guile-json](https://github.com/aconchillo/guile-json) module. It
|
||||
provides very convenient features for reading and writing json.
|
||||
|
||||
It is possible to define a record via `define-json-type` that
|
||||
generates readers and writers to/from JSON. For example, the record
|
||||
`<itemdata>` is defined to be an object with only one field `id`. The
|
||||
function `json->scm` reads in json into scheme datastructures and then
|
||||
the generated function `scm->itemdata` creates the record from it. For
|
||||
every record, accessor functions exists. For example: `(itemdata-id
|
||||
data)` would lookup the field `id` in the given itemdata record
|
||||
`data`.
|
||||
|
||||
Here we need it to get the item-id and the list of file properties
|
||||
belonging to the original uploaded files.
|
||||
|
||||
Another interesting definition is the `<output>` record. This captures
|
||||
(a subset of) the schema of what Docspell receives from this addon as
|
||||
a result. A full example of this data is
|
||||
[here](@/docs/addons/writing.md#output). We don't need `commands` or
|
||||
`newItems`, so this schema only cares about the `files` attribute.
|
||||
|
||||
|
||||
```lisp
|
||||
(define-json-type <itemdata>
|
||||
(id))
|
||||
|
||||
;; The array of original files
|
||||
(define-json-type <original-file>
|
||||
(id)
|
||||
(name)
|
||||
(position)
|
||||
(language)
|
||||
(mimetype)
|
||||
(length)
|
||||
(checksum))
|
||||
|
||||
;; The output record, what is returned to docspell
|
||||
(define-json-type <itemfiles>
|
||||
(itemId)
|
||||
(textFiles)
|
||||
(pdfFiles))
|
||||
(define-json-type <output>
|
||||
(files "files" #(<itemfiles>)))
|
||||
|
||||
;; Parses the JSON containing the item information
|
||||
(define *itemdata-json*
|
||||
(scm->itemdata (call-with-input-file *item-data-json* json->scm)))
|
||||
|
||||
;; The JSON file containing meta data for all source files as vector.
|
||||
(define *original-meta-json*
|
||||
(let ((props (vector->list (call-with-input-file *original-files-json* json->scm))))
|
||||
(map scm->original-file props)))
|
||||
```
|
||||
|
||||
|
||||
## Finding the audio file
|
||||
|
||||
The previously parsed json array `*original-meta-json*` can now be
|
||||
used to find any audio files within the original uploaded files, as
|
||||
done in `find-audio-files`. It simply goes through the list and keeps
|
||||
those files whose mimetype starts with `audio/`. The mimetype is
|
||||
provided by Docspell in the file properties in `ITEM_ORIGINAL_JSON`.
|
||||
|
||||
Before converting to wav with ffmpeg, it is quickly checked if it's
|
||||
not a wav already.
|
||||
|
||||
|
||||
```lisp
|
||||
(define (is-wav? mime)
|
||||
"Test whether the mimetype MIME is denoting a wav file."
|
||||
(or (string-suffix? "/wav" mime)
|
||||
(string-suffix? "/x-wav" mime)
|
||||
(string-suffix? "/vnd.wav" mime)))
|
||||
|
||||
(define (find-audio-files)
|
||||
"Find all source files that are audio files."
|
||||
(filter! (lambda (el)
|
||||
(string-prefix?
|
||||
"audio/"
|
||||
(original-file-mimetype el)))
|
||||
*original-meta-json*))
|
||||
|
||||
(define (convert-wav id mime)
|
||||
"Run ffmpeg to convert to wav."
|
||||
(let ((src-file (format #f "~a/~a" *original-files-dir* id))
|
||||
(out-file (format #f "~a/in.wav" *tmp-dir*)))
|
||||
(if (is-wav? mime)
|
||||
src-file
|
||||
(begin
|
||||
(errln "Running ffmpeg to convert wav file...")
|
||||
(sysexec *ffmpeg* "-loglevel" "error" "-y" "-i" src-file out-file)
|
||||
out-file))))
|
||||
```
|
||||
|
||||
## Speech to text
|
||||
|
||||
Once we have a wav file, we can run speech-to-text recognition on it.
|
||||
As said above, we need to download a model first, which is depending
|
||||
on a language. Luckily, Docspell provides the language of the file.
|
||||
This is the lanugage either given directly by the user when uploading
|
||||
or it's the collective's default language.
|
||||
|
||||
In the following snippet, we get the language as arguments. We will
|
||||
get it later from the file properties.
|
||||
|
||||
As seen below, the model file is stored to the `CACHE_DIR`. This is
|
||||
provided by Docspell and will survive the execution of this script.
|
||||
All other directories involved will be deleted eventually. The
|
||||
`CACHE_DIR` is the place to store intermediate results you don't want
|
||||
to loose between addon runs. But as any cache, it may not exist the
|
||||
next time the addon is run. Docspell doesn't clear it automatically,
|
||||
though.
|
||||
|
||||
The last function simply executes the `stt` external command and puts
|
||||
stdout into a file.
|
||||
|
||||
```lisp
|
||||
(define (get-model language)
|
||||
(let* ((lang (or language "eng"))
|
||||
(file (format #f "~a/model_~a.pbmm" *cache-dir* lang)))
|
||||
(unless (file-exists? file)
|
||||
(download-model lang file))
|
||||
file))
|
||||
|
||||
(define (download-model lang file)
|
||||
"Download model files per language. Nix has currently stt 0.9.3 packaged."
|
||||
(let ((url (cond
|
||||
((string= lang "eng") "https://coqui.gateway.scarf.sh/english/coqui/v0.9.3/model.pbmm")
|
||||
((string= lang "deu") "https://coqui.gateway.scarf.sh/german/AASHISHAG/v0.9.0/model.pbmm")
|
||||
(else (error "Unsupported language: " lang)))))
|
||||
(errln "Downloading model file for language: ~a" lang)
|
||||
(sysexec *curl* "-SsL" "-o" file url)
|
||||
file))
|
||||
|
||||
(define (extract-text model input out)
|
||||
"Runs stt for speech-to-text and writes the text into the file OUT."
|
||||
(errln "Extracting text from audio…")
|
||||
(with-output-to-file out
|
||||
(lambda ()
|
||||
(sysexec *stt* "--model" model "--audio" input))))
|
||||
```
|
||||
|
||||
|
||||
## Create PDF
|
||||
|
||||
Creating the PDF is straight forward. The extracted text is embedded
|
||||
into a HTML file which is then passed to `wkhtmltopdf`. Since we don't
|
||||
need this file for anything else, it is stored to the `TMP_DIR`.
|
||||
|
||||
```lisp
|
||||
(define (create-pdf txt-file out)
|
||||
(define (line str)
|
||||
(format #t "~a\n" str))
|
||||
(errln "Creating pdf file…")
|
||||
(let ((tmphtml (format #f "~a/text.html" *tmp-dir*)))
|
||||
(with-output-to-file tmphtml
|
||||
(lambda ()
|
||||
(line "<!DOCTYPE html>")
|
||||
(line "<html>")
|
||||
(line " <head><meta charset=\"UTF-8\"></head>")
|
||||
(line " <body style=\"padding: 2em; font-size: large;\">")
|
||||
(line " <div style=\"padding: 0.5em; font-size:normal; font-weight: bold; border: 1px solid black;\">")
|
||||
(line " Extracted from audio using stt on ")
|
||||
(display (strftime "%c" (localtime (current-time))))
|
||||
(line " </div>")
|
||||
(line " <p>")
|
||||
(display (call-with-input-file txt-file read-string))
|
||||
(line " </p>")
|
||||
(line "</body></html>")))
|
||||
(sysexec *wkhtmltopdf* tmphtml out)))
|
||||
```
|
||||
|
||||
|
||||
## Putting it together
|
||||
|
||||
The main function now puts everything together. The `process-file`
|
||||
function is called for every file that is returned from
|
||||
`(find-audio-files)`. It will extract the necessary information (like
|
||||
the language) from the json document via record accessors (e.g.
|
||||
`original-file-lanugage file)`) and then calls the functions defined
|
||||
above. At last it creates a `<itemfile>` record with `make-itemfiles`.
|
||||
|
||||
An `<itemfile>` record contains now the important information for
|
||||
Docspell. It requires the item-id and a mapping from attachment-ids to
|
||||
files in `OUTPUT_DIR`. For each attachment identified by its ID,
|
||||
Docspell replaces the extracted text with the contents of the given
|
||||
file and replaces the converted PDF file, respectively. In the code
|
||||
below, two lists of such mappings are defined - the first for the text
|
||||
files, the second for the converted pdf. The files must be specified
|
||||
relative to `OUTPUT_DIR`.
|
||||
|
||||
That means `process-all` returns a list of `<itemfile>` records which
|
||||
is then used to create the `<output>` record. And finally, a
|
||||
`output->json` function will turn the record into proper JSON which is
|
||||
send to stdout.
|
||||
|
||||
```lisp
|
||||
(define (process-file itemid file)
|
||||
"Processing a single audio file."
|
||||
(let* ((id (original-file-id file))
|
||||
(mime (original-file-mimetype file))
|
||||
(lang (original-file-language file))
|
||||
(txt-file (format #f "~a/~a.txt" *output-dir* id))
|
||||
(pdf-file (format #f "~a/~a.pdf" *output-dir* id))
|
||||
(wav (convert-wav id mime))
|
||||
(model (get-model lang)))
|
||||
(extract-text model wav txt-file)
|
||||
(create-pdf txt-file pdf-file)
|
||||
(make-itemfiles itemid
|
||||
`((,id . ,(format #f "~a.txt" id)))
|
||||
`((,id . ,(format #f "~a.pdf" id))))))
|
||||
|
||||
(define (process-all)
|
||||
(let ((item-id (itemdata-id *itemdata-json*)))
|
||||
(map (lambda (file)
|
||||
(process-file item-id file))
|
||||
(find-audio-files))))
|
||||
|
||||
(define (main args)
|
||||
(let ((out (make-output (process-all))))
|
||||
(format #t "~a" (output->json out))))
|
||||
```
|
||||
|
||||
Example output:
|
||||
|
||||
```json
|
||||
{
|
||||
"files": [
|
||||
{
|
||||
"itemId":"qZDnyGIAJsXr",
|
||||
"textFiles": { "HPFvIDib6eA": "HPFvIDib6eA.txt" },
|
||||
"pdfFiles": { "HPFvIDib6eA": "HPFvIDib6eA.pdf"}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
# Packaging
|
||||
|
||||
Now with that script some additional plumbing is needed to make it an
|
||||
"Addon" for Docspell.
|
||||
|
||||
The external tools - stt, ffmpeg, curl and wkhtmltopdf are required as
|
||||
well as guile to compile and interpret the script. Also the guile-json
|
||||
module must be installed.
|
||||
|
||||
This can turn into a quite tedious task. Luckily, there is
|
||||
[nix](https://nixos.org) that has an answer to this. A user who wants
|
||||
to use this script only needs to install nix. This package manager
|
||||
then takes care of providing the exact dependencies we need (down to
|
||||
the correct version and including guile as the language and runtime).
|
||||
|
||||
## A flake
|
||||
|
||||
Everything is defined in the `flake.nix` in the source root. It looks
|
||||
like this:
|
||||
|
||||
```nix
|
||||
{
|
||||
description = "A docspell addon for basic audio file support";
|
||||
|
||||
inputs = {
|
||||
utils.url = "github:numtide/flake-utils";
|
||||
|
||||
# Nixpkgs / NixOS version to use.
|
||||
nixpkgs.url = "nixpkgs/nixos-21.11";
|
||||
};
|
||||
|
||||
outputs = { self, nixpkgs, utils }:
|
||||
utils.lib.eachSystem ["x86_64-linux"] (system:
|
||||
let
|
||||
pkgs = import nixpkgs {
|
||||
inherit system;
|
||||
overlays = [
|
||||
|
||||
];
|
||||
};
|
||||
name = "audio-files-addon";
|
||||
in rec {
|
||||
packages.${name} = pkgs.callPackage ./nix/addon.nix {
|
||||
inherit name;
|
||||
};
|
||||
|
||||
defaultPackage = packages.${name};
|
||||
|
||||
apps.${name} = utils.lib.mkApp {
|
||||
inherit name;
|
||||
drv = packages.${name};
|
||||
};
|
||||
defaultApp = apps.${name};
|
||||
|
||||
## … omitted for brevity
|
||||
}
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
First sad thing is, that only `x86_64` systems are supported. This is
|
||||
due to `stt` not being available on other platforms currently (as
|
||||
provided by nixpkgs).
|
||||
|
||||
The rest is a bit magic: A package and "defaultPackage" is defined
|
||||
with a reference to `nix/addon.nix`. The important part is the line
|
||||
|
||||
```nix
|
||||
inputs = {
|
||||
# Nixpkgs / NixOS version to use.
|
||||
nixpkgs.url = "nixpkgs/nixos-21.11";
|
||||
};
|
||||
```
|
||||
|
||||
It says that as input for "building" the script, we take all of
|
||||
[nixpkgs](https://github.com/NixOS/nixpkgs) which is a package
|
||||
collection defined for (and in) nix - including thousands of software
|
||||
packages. We can pick and choose from these. No surprise, all external
|
||||
tools we need are included!
|
||||
|
||||
A flake defines the inputs and outputs of a package. With all of
|
||||
nixpkgs as inputs, we can create a definition to elevate this script
|
||||
into a *package*.
|
||||
|
||||
## Package definition
|
||||
|
||||
The definition for "building" the script is in `nix/addon.nix`:
|
||||
|
||||
```nix
|
||||
{ stdenv, bash, cacert, curl, stt, wkhtmltopdf, ffmpeg, guile, guile-json, lib, name }:
|
||||
|
||||
stdenv.mkDerivation {
|
||||
inherit name;
|
||||
src = lib.sources.cleanSource ../.;
|
||||
|
||||
buildInputs = [ guile guile-json ];
|
||||
|
||||
patchPhase = ''
|
||||
TARGET=src/addon.scm
|
||||
sed -i 's,\*curl\* "curl",\*curl\* "${curl}/bin/curl",g' $TARGET
|
||||
sed -i 's,\*ffmpeg\* "ffmpeg",\*ffmpeg\* "${ffmpeg}/bin/ffmpeg",g' $TARGET
|
||||
sed -i 's,\*stt\* "stt",\*stt\* "${stt}/bin/stt",g' $TARGET
|
||||
sed -i 's,\*wkhtmltopdf\* "wkhtmltopdf",\*wkhtmltopdf\* "${wkhtmltopdf}/bin/wkhtmltopdf",g' $TARGET
|
||||
'';
|
||||
|
||||
buildPhase = ''
|
||||
guild compile -o ${name}.go src/addon.scm
|
||||
'';
|
||||
|
||||
# module name must be same as <filename>.go
|
||||
installPhase = ''
|
||||
mkdir -p $out/{bin,lib}
|
||||
cp ${name}.go $out/lib/
|
||||
|
||||
cat > $out/bin/${name} <<-EOF
|
||||
#!${bash}/bin/bash
|
||||
export SSL_CERT_FILE="${cacert}/etc/ssl/certs/ca-bundle.crt"
|
||||
exec -a "${name}" ${guile}/bin/guile -C ${guile-json}/share/guile/ccache -C $out/lib -e '(${name}) main' -c "" \$@
|
||||
EOF
|
||||
chmod +x $out/bin/${name}
|
||||
'';
|
||||
}
|
||||
```
|
||||
|
||||
With a bit of handwaving - this is a bash script that modifies
|
||||
slightly the scheme script and runs a compile on it. We simply declare
|
||||
all packages we need in the first line of `{ … }` - these are
|
||||
arguments that are automatically filled by nix by searching the
|
||||
corresponding package in nixpkgs.
|
||||
|
||||
First the `patchPhase` is executed. It will replace the variables
|
||||
containing the external tools with an absolute path to the version
|
||||
that we currently get from nixpkgs. With this step nix takes care that
|
||||
all these packages are available *at runtime* when executing the
|
||||
script. All versions are finally fixed in `flake.lock` and can be
|
||||
upgraded manually.
|
||||
|
||||
The `buildPhase` runs the guile compiler that produces some
|
||||
intermediate code that will be loaded instead of compiling the script
|
||||
on-the-fly.
|
||||
|
||||
At last, `installPhase` creates a wrapper script that runs guile with
|
||||
the correct load-path pointing to `guile-json` and to our pre-compiled
|
||||
script. Additionally, trusted root certificates are exported to make
|
||||
the curl commands work. This script will be created in `$out`
|
||||
directory that is provided by nix.
|
||||
|
||||
If you now run `nix build` in the source root, it will execute all
|
||||
these phases and produce a symlink pointing to the result. You can
|
||||
then `cat` the resulting file if you are curious.
|
||||
|
||||
This way the script is completely isolated from the system it runs
|
||||
on - as long as the nix package manager is available. It includes all
|
||||
the external tools, as well as the underlying runtime (guile)! The
|
||||
result is a tiny wrapper bash script that can be run "everywhere"
|
||||
(modulo all the restrictions, like non-x86_64 platforms, of course
|
||||
:)).
|
||||
|
||||
|
||||
## Addon Descriptor
|
||||
|
||||
At last, a small yaml file is needed to tell Docspell a little about
|
||||
the addon.
|
||||
|
||||
```yaml
|
||||
meta:
|
||||
name: "audio-files-addon"
|
||||
version: "0.1.0"
|
||||
description: |
|
||||
This addon adds support for audio files. Audio files are processed
|
||||
by a speech-to-text engine and a pdf is generated.
|
||||
|
||||
It doesn't expect any user arguments at the moment. It requires
|
||||
internet access to download model files.
|
||||
|
||||
triggers:
|
||||
- final-process-item
|
||||
- final-reprocess-item
|
||||
- existing-item
|
||||
|
||||
runner:
|
||||
nix:
|
||||
enable: true
|
||||
|
||||
docker:
|
||||
enable: false
|
||||
|
||||
trivial:
|
||||
enable: true
|
||||
exec: src/addon.scm
|
||||
|
||||
options:
|
||||
networking: true
|
||||
collectOutput: true
|
||||
```
|
||||
|
||||
This tells Docspell via `triggers` when this addon may be run. This
|
||||
one only makes sense for an item. Thus it can be hooked up to run with
|
||||
every file-processing job or a user can manually trigger it on an
|
||||
item.
|
||||
|
||||
It also tells via `runner:` that it can be build and run via nix, but
|
||||
not via docker (I gave up after an hour to create a Dockerfile…). It
|
||||
could also be run "as-is" but the user then needs to install all these
|
||||
tools and guile manually.
|
||||
|
||||
# Done
|
||||
|
||||
That's it. You can install this addon in Docspell and create a run
|
||||
configuration to let it execute when you want.
|
11
website/site/content/docs/addons/_index.md
Normal file
@ -0,0 +1,11 @@
|
||||
+++
|
||||
title = "Addons"
|
||||
insert_anchor_links = "right"
|
||||
description = "Describes how addons work."
|
||||
weight = 55
|
||||
template = "pages.html"
|
||||
sort_by = "weight"
|
||||
redirect_to = "docs/addons/basics"
|
||||
+++
|
||||
|
||||
No content here.
|
BIN
website/site/content/docs/addons/addon-install-01.png
Normal file
After Width: | Height: | Size: 105 KiB |
BIN
website/site/content/docs/addons/addon-install-01_dark.png
Normal file
After Width: | Height: | Size: 109 KiB |
BIN
website/site/content/docs/addons/addon-install-02.png
Normal file
After Width: | Height: | Size: 155 KiB |
BIN
website/site/content/docs/addons/addon-install-02_dark.png
Normal file
After Width: | Height: | Size: 164 KiB |
BIN
website/site/content/docs/addons/addon-install-03.png
Normal file
After Width: | Height: | Size: 100 KiB |
BIN
website/site/content/docs/addons/addon-install-03_dark.png
Normal file
After Width: | Height: | Size: 100 KiB |
BIN
website/site/content/docs/addons/addon-install-04.png
Normal file
After Width: | Height: | Size: 238 KiB |
BIN
website/site/content/docs/addons/addon-install-04_dark.png
Normal file
After Width: | Height: | Size: 242 KiB |
149
website/site/content/docs/addons/basics.md
Normal file
@ -0,0 +1,149 @@
|
||||
+++
|
||||
title = "Basics"
|
||||
insert_anchor_links = "right"
|
||||
description = "Docspell Addons."
|
||||
weight = 10
|
||||
template = "docs.html"
|
||||
+++
|
||||
|
||||
# Addons
|
||||
|
||||
Addons allow to execute custom software within a defined context in
|
||||
Docspell. The idea is to be able to support new features and amend
|
||||
existing ones.
|
||||
|
||||
{% warningbubble(title="Experimental") %} Addons are considered
|
||||
experimental. The interaction between addons and Docspell is still
|
||||
subject to change.
|
||||
|
||||
The intended audience for addons are developers (to create addons) and
|
||||
technically inclined users to install, configure and use them.
|
||||
{% end %}
|
||||
|
||||
Despite the warning above, addons are a nice way to amend your
|
||||
docspell server with new things, you are encouraged to try it out and
|
||||
give feedback ;-).
|
||||
|
||||
{% infobubble(title="Enable addons manually") %}
|
||||
Addons are disabled by default. They must be enabled in the config
|
||||
file of the restserver!
|
||||
{% end %}
|
||||
|
||||
|
||||
## What is an Addon?
|
||||
|
||||
An addon is a zip file that contains a `docspell-addon.yml` (or .yaml
|
||||
or .json) file in its root. The `docspell-addon.yml` is the *addon
|
||||
descriptor* telling how to run and optionally build the addon. In the
|
||||
ZIP file, an addon provides a program that expects one argument which
|
||||
is a file containing the user input for the addon. Addons can
|
||||
communicate back to docspell via their stdout and/or via directly
|
||||
calling the docspell server as part of their program.
|
||||
|
||||
|
||||
## What can Addons do?
|
||||
|
||||
Addons can accept user input and are arbitrary external programs that
|
||||
can do whatever they want. However, Docspell can embed running addons
|
||||
in restricted environments, where they don't have network for example.
|
||||
Addons can safely communicate to Docspell via their stdout output
|
||||
returning instructions that Docspell will realise.
|
||||
|
||||
Running addons is managed by docspell. Currently they can be executed:
|
||||
|
||||
- as the final step when processing or re-procssing an item. They then
|
||||
have access to all the item data that has been collected during
|
||||
processing (id, extracted text, converted pdfs, etc) and it can work
|
||||
with that. It may, for example, set more tags or custom fields.
|
||||
- trigger manually on some existing item
|
||||
- periodically defined by a schedule. This executes the addons only
|
||||
with the configured user input.
|
||||
- … (maybe more to come)
|
||||
|
||||
Since an addon may not make sense to run on all these situations, it
|
||||
must define a sensible subset via the `triggers` option in its
|
||||
descriptor.
|
||||
|
||||
|
||||
## How are they run
|
||||
|
||||
Addons are always executed by the joex component as an external
|
||||
process, therefore they can be written in any programming or scripting
|
||||
language.
|
||||
|
||||
That means the machine running joex possibly needs to match the
|
||||
requirements of each addon. To ease this, addons can provide a [nix
|
||||
descripton](https://nixos.wiki/wiki/Flakes) or a `Dockerfile`. Then
|
||||
you need to prepare the machine only with two things (nix and docker)
|
||||
to have the prerequisites for running many addons.
|
||||
|
||||
|
||||
# More …
|
||||
|
||||
Addons are a flexible way to extend Docspell and require some
|
||||
technical affinity. However, only "using" addons should not be that
|
||||
hard, but it will always depend on the documentation of the addon and
|
||||
its own complexity.
|
||||
|
||||
As the user, you may have different views: preparing the server to be
|
||||
able to run addons, writing your own addons and finally using them
|
||||
|
||||
The following sections are divided these perspectives:
|
||||
|
||||
## Using Addons
|
||||
|
||||
Addons must be installed and then configured in order before they can
|
||||
be used. [Using Addons](@/docs/addons/using.md) describes this
|
||||
perspective.
|
||||
|
||||
{{ buttonright(href="/docs/addons/using", text="More…") }}
|
||||
|
||||
## Control how addons are run
|
||||
|
||||
As the owner of your server, you want to [control how addons are
|
||||
run](@/docs/addons/control.md). Since addons are arbitrary programs,
|
||||
potentially downloaded from the internet, they can be run in a
|
||||
restricted environment.
|
||||
|
||||
{{ buttonright(href="/docs/addons/control", text="More…") }}
|
||||
|
||||
|
||||
## Write custom addons
|
||||
|
||||
Finally, [writing addons](@/docs/addons/writing.md) requires (among
|
||||
other things) to know how to interact with Docspell and what package
|
||||
format is expected.
|
||||
|
||||
{{ buttonright(href="/docs/addons/writing", text="More…") }}
|
||||
|
||||
|
||||
|
||||
<!-- ## Goals -->
|
||||
|
||||
<!-- - Convenient for addon creators. Addons can be written in any -->
|
||||
<!-- programming language and have a very light contract: they receive -->
|
||||
<!-- one input argument and _may_ return structured data to instruct -->
|
||||
<!-- docspell what to do. If not they can execute abritrary code to call -->
|
||||
<!-- the server directly. -->
|
||||
<!-- - Server administrators control how they are executed. Since addons -->
|
||||
<!-- may run anything, the execution should be able to locked down when -->
|
||||
<!-- wanted. -->
|
||||
<!-- - Users can install and configure addons via the web interface easily. -->
|
||||
<!-- It should be easy for addon creators to document how users can use -->
|
||||
<!-- them. -->
|
||||
|
||||
|
||||
<!-- # TODOs -->
|
||||
|
||||
<!-- - what if joex is running inside a container alread? -->
|
||||
<!-- - some use cases: -->
|
||||
<!-- - I want an addon to do some stuff when processing files -->
|
||||
<!-- - my files named "something_bla" are always this specific document -->
|
||||
<!-- and so very specific processing would be great -->
|
||||
<!-- - I want XYZ files to work (e.g. mp3?) -->
|
||||
<!-- - I want to generate previews for video files -->
|
||||
<!-- - Example Addons: -->
|
||||
<!-- - swiss qr code detection on invoices -->
|
||||
<!-- - tags via regexes -->
|
||||
<!-- - text extraction from audio? -->
|
||||
<!-- - preview generation for video? -->
|
238
website/site/content/docs/addons/control.md
Normal file
@ -0,0 +1,238 @@
|
||||
+++
|
||||
title = "Control Runtime"
|
||||
insert_anchor_links = "right"
|
||||
description = "Control how addons are run"
|
||||
weight = 30
|
||||
template = "docs.html"
|
||||
+++
|
||||
|
||||
# Control runtime of addons
|
||||
|
||||
Addons are run by the joex component as background tasks in an
|
||||
external process. Depending on the machine it is running on, the addon
|
||||
can be run
|
||||
|
||||
- inside a docker container
|
||||
- inside a systemd-nspawn container
|
||||
- directly on the machine
|
||||
|
||||
Addons can be provided as source packages, where the final program may
|
||||
need to be built. They also can depend on other software. In order to
|
||||
not prepare for each addon, it is recommended to install
|
||||
[nix](https://nixos.org) with [flakes](https://nixos.wiki/wiki/Flakes)
|
||||
and docker on the machine running joex.
|
||||
|
||||
Please also look at addon section in the [default
|
||||
configuration](@/docs/configure/main.md#joex) for joex.
|
||||
|
||||
You need to explicitly enable addons in the restserver config file.
|
||||
|
||||
Docspell uses "runners" to execute an addon. This includes building it
|
||||
if necessary. The following runner exist:
|
||||
|
||||
- `docker`: uses docker to build an run the addon
|
||||
- `nix-flake`: builds via `nix build` and runs the executable in
|
||||
`$out/bin`
|
||||
- `trivial`: simply executes a file inside the addon (as specified in
|
||||
the descriptor)
|
||||
|
||||
In the joex configuration you can specify which runners your system
|
||||
supports.
|
||||
|
||||
## Prepare for *running* addons
|
||||
|
||||
Depending on how you want addons to be run, you need to install either
|
||||
docker and/or systemd-nspawn on the machine running joex.
|
||||
Additionally, the user running joex must be able to use these tools.
|
||||
For docker it usually means to add the user to some group. For
|
||||
systemd-nspawn you most likely want to configure `sudo` to run
|
||||
passwordless the `systemd-nspawn` command.
|
||||
|
||||
Without this, an addon can only be run "directly" on the machine that
|
||||
hosts joex (which might be perfectly fine). The addon then "sees" all
|
||||
files on the machine and could potentially do harm.
|
||||
|
||||
It is recommended to install `nix` and `docker`, if possible. Addons
|
||||
may only run with docker or only without, so supporting both leaves
|
||||
more options.
|
||||
|
||||
|
||||
## Prepare for *building* addons
|
||||
|
||||
Addons can be packaged as source or binary packages. For the former,
|
||||
joex will build the addon first. There are two supported ways to do
|
||||
so:
|
||||
|
||||
- via `docker build` when the addons provides a `Dockerfile` (use
|
||||
runner `docker`)
|
||||
- via `nix build` when the addon provides a `flake.nix` file (use
|
||||
runner `nix-flake`)
|
||||
|
||||
Both build strategies will cache the resulting artifact, so subsequent
|
||||
builds will be (almost) no-ops.
|
||||
|
||||
{% infobubble(title="Note") %}
|
||||
*Building* addons requires to be connected to the internet! Running
|
||||
them may not require a network connection.
|
||||
{% end %}
|
||||
|
||||
If the addon is packaged as a binary, then usually the `trivial`
|
||||
runner (possibly in combination with `systemd-nspawn`) can be used.
|
||||
|
||||
# Runtime
|
||||
|
||||
## Cache directory
|
||||
|
||||
Addons can use a "cache directory" to store data between runs. This
|
||||
directory is not cleaned by docspell. If you have concerns about
|
||||
space, use a cron job or systemd-timer to periodically clean this
|
||||
directory.
|
||||
|
||||
## "Pure" vs "Impure"
|
||||
|
||||
Addons can talk back to Docspell in these ways: they can use the http
|
||||
api, for example with [dsc](@/docs/tools/cli.md), or they can return
|
||||
data to instruct Docspell to apply changes.
|
||||
|
||||
The former requires the addon to be connected to the network to reach
|
||||
the Docspell *restserver*. This allows the addon to do arbitrary
|
||||
changes at any time - this is the "impure" variant.
|
||||
|
||||
The second approach can be run without network connectivity. When
|
||||
using docker or systemd-nspawn, Docspell will run these addons without
|
||||
any network. Thus they can't do anything really, except return data
|
||||
back to Docspell.
|
||||
|
||||
The pure way is much preferred! It allows for more consistent
|
||||
behaviour, because Docspell is in charge for applying any changes.
|
||||
Docspell can apply changes *only if* the addon returned successfully.
|
||||
Addons can also be retried on error, because no changes happened yet.
|
||||
|
||||
It's the decision of the addon author, how the addon will work. It
|
||||
should document whether it is pure or impure. You can also look into
|
||||
the descriptor and check for a `networking: false` setting. As the
|
||||
server administrator, you can configure Docspell to only accept pure
|
||||
addons.
|
||||
|
||||
|
||||
## Runners
|
||||
|
||||
### nix flake runner
|
||||
|
||||
For addons providing a `flake.nix` this runner can build it and find
|
||||
the file to execute. With this `flake.nix` file addons can declare how
|
||||
they should be build and what dependencies are required to run them.
|
||||
|
||||
The resulting executable can be executed via `systemd-nspawn` in a
|
||||
restricted environment or directly on the machine.
|
||||
|
||||
{% infobubble(title="Requires") %}
|
||||
You need to install [nix](https://nixos.org) and enable
|
||||
[flakes](https://nixos.wiki/wiki/Flakes) to use this runner.
|
||||
{% end %}
|
||||
|
||||
### docker
|
||||
|
||||
Addons can provide a Dockerfile or an image. If no image is given,
|
||||
`docker build` will be run to build an image from the `Dockerfile`.
|
||||
Then `docker run` is used to run the addon.
|
||||
|
||||
{% infobubble(title="Requires") %}
|
||||
You need to install `docker` to use this runner.
|
||||
{% end %}
|
||||
|
||||
### trivial
|
||||
|
||||
Addons can simply declare a file to execute. Docspell can use
|
||||
`systemd-nspawn` to run it in an restricted environment, or it can be
|
||||
run directly on the machine. This variant is only useful for very
|
||||
simple addons, that don't require any special dependencies.
|
||||
|
||||
{% infobubble(title="Requires") %}
|
||||
You need to check each addon for its requirements and prepare the
|
||||
machine accordingly.
|
||||
{% end %}
|
||||
|
||||
### Choosing runners
|
||||
|
||||
The config `addons.executor-config.runners` accepts a list of runners.
|
||||
It specifies the preferred runner first. If an addon can be executed
|
||||
via docker and nix, Docspell will choose the runner first in the list.
|
||||
|
||||
If you don't have nix installed, remove the `nix-flake` runner from
|
||||
this list and same for docker, of course.
|
||||
|
||||
|
||||
### systemd-nspawn
|
||||
|
||||
The `systemd-nspawn` can be used to run programs in a lightweight
|
||||
ad-hoc container. It is available on most linux distributions (it is
|
||||
part of systemd…). It doesn't require an image to exist first; this
|
||||
makes it very convenient for running addons in a restricted
|
||||
environment.
|
||||
|
||||
If you enable it in the config file, then all addons are either run
|
||||
via `systemd-nspawn` or docker - and thus always in a restricted
|
||||
environment, where they can only access their own files and the files
|
||||
provided by Docspell.
|
||||
|
||||
The downside is that `systemd-nspawn` needs to be run as root (as far
|
||||
as I know). Therfore, configure `sudo` to allow the user that is
|
||||
running joex to execute `systemd-nspawn` non-interactively.
|
||||
|
||||
{% infobubble(title="Requires") %}
|
||||
Install `systemd-nspawn` and enable the user running joex to use it
|
||||
password-less via sudo.
|
||||
{% end %}
|
||||
|
||||
# Within Docker
|
||||
|
||||
If joex itself is run as a docker container, things get a bit
|
||||
complicated. The default image for joex does not contain `nix`, so the
|
||||
`nix-flake` runner cannot be used out of the box.
|
||||
|
||||
In order to use the `docker` runner, the container must be configured
|
||||
to access the hosts docker daemon. On most systems this can be
|
||||
achieved by bind-mounting the unix socket (usually at
|
||||
`/var/run/docker.sock`) into the container. Here is a snippet from the
|
||||
provided `docker-compose` file:
|
||||
|
||||
```yaml
|
||||
joex:
|
||||
image: docspell/joex:latest
|
||||
# ... left out for brevity
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /tmp:/tmp
|
||||
```
|
||||
|
||||
Additionally to `/var/run/docker.sock`, it also bind mounts the `/tmp`
|
||||
directory. This is necessary, because docker will be invoked with bind
|
||||
mounts from inside the continer - but these must be available on the
|
||||
host, because the docker client in the container actually runs the
|
||||
command on the host.
|
||||
|
||||
The addon executor uses the systems temp-directory (which is usually
|
||||
`/tmp`) as a base for creating a working and cache directory. Should
|
||||
you change this in joex config file (or your system uses a different
|
||||
default temp-dir), then the bind mount must be adapted as well.
|
||||
|
||||
Another variant is to extend the default joex image and add more
|
||||
programs as needed by addons and then use the `trivial` runner.
|
||||
|
||||
# Summary / tl;dr
|
||||
|
||||
When joex is not inside a container:
|
||||
|
||||
- (optional) Install `systemd-nspawn` - it is provided on many
|
||||
GNU/Linux distributions
|
||||
- Configure `sudo` to allow the user running the joex component to
|
||||
execute `systemd-nspawn` non-interactively (without requiring a
|
||||
password)
|
||||
- Install docker
|
||||
- Install [nix](https://nixos.org) and enable
|
||||
[flakes](https://nixos.wiki/wiki/Flakes)
|
||||
- Allow the user who runs the joex component to use docker and nix. If
|
||||
you install nix as multi-user, then this is already done.
|
||||
- Check the section on addons in the [default
|
||||
configuration](@/docs/configure/main.md#joex) for joex
|
103
website/site/content/docs/addons/using.md
Normal file
@ -0,0 +1,103 @@
|
||||
+++
|
||||
title = "Usage"
|
||||
insert_anchor_links = "right"
|
||||
description = "How to use addons"
|
||||
weight = 20
|
||||
template = "docs.html"
|
||||
+++
|
||||
|
||||
# Using Addons
|
||||
|
||||
This shows with an example, how to install and use an addon. If the ui
|
||||
doesn't show these forms, addons are probably disabled. Addons need to
|
||||
be enabled in the config file of the rest server.
|
||||
|
||||
## Discovering
|
||||
|
||||
Addons can be installed from any URL to a zip file. One way is to use
|
||||
URLs generated by forges like github or gitlab. They provide zip files
|
||||
containing the repository contents. Alternatively an addon may provide
|
||||
specific files in their release section.
|
||||
|
||||
For example, this is the url to the first release of the rotate-pdf
|
||||
addon:
|
||||
|
||||
- <https://github.com/docspell/rotate-pdf-addon/archive/refs/tags/v0.1.0.zip>
|
||||
|
||||
This url points to a fixed version. It is also possible to use urls
|
||||
that are "moving targets":
|
||||
|
||||
- <https://github.com/docspell/rotate-pdf-addon/archive/refs/heads/master.zip>
|
||||
|
||||
The contents behind the above url will very likely change over time.
|
||||
|
||||
For better discoverability, repositories for addons on public forges
|
||||
can be tagged with *docspell-addon*.
|
||||
|
||||
## Install
|
||||
|
||||
With an URL like above, you can go to *Manage Data -> Addons -> New*
|
||||
and insert the url:
|
||||
|
||||
{{ figure2(light="addon-install-01.png", dark="addon-install-01_dark.png") }}
|
||||
|
||||
It might take a while for Docspell to download, extract and verify the
|
||||
addon. The addon will be downloaded into the database. Once installed,
|
||||
the given URL is not used anymore, unless a manual update is issued.
|
||||
|
||||
After this finishes, you cannot change the URL anymore:
|
||||
|
||||
{{ figure2(light="addon-install-02.png", dark="addon-install-02_dark.png") }}
|
||||
|
||||
When using URLs pointing to "moving targets", you could click the
|
||||
*Update Addon* button to re-download the contents at the url. This
|
||||
doesn't make much sense for URLs to fixed versions (in *theory* these
|
||||
could change as well, of course) and it is not without risk. It can be
|
||||
useful for own addons to have them quickly updated.
|
||||
|
||||
Now the addon is installed. It can now be used by creating a *run configuration*.
|
||||
|
||||
## Run Configuration
|
||||
|
||||
A run configuration is comprised of one or more addons, their inputs
|
||||
and some settings regarding their runtime environment.
|
||||
|
||||
The name is used for displaying in the webapp. You can disable/enable
|
||||
a run configuration.
|
||||
|
||||
It is possible that addons use [dsc](@/docs/tools/cli.md) or call the
|
||||
rest-server otherwise. Usually a valid session is required (to set
|
||||
tags or do searches). When selecting to run *on behalf of a user*, a
|
||||
valid authenticator for that user is injected into the environment of
|
||||
the addon run.
|
||||
|
||||
The *Trigger Run* setting specfies when this run configuraiton should
|
||||
be executed. You can choose from options that all addons in the list
|
||||
must support. In this example, only `existing-item` is used. This
|
||||
means the run configuration can be selected to run on any item.
|
||||
|
||||
Other options include:
|
||||
- `final-process-item`: executes automatically as the last step when
|
||||
processing uploaded files
|
||||
- `final-reprocess-item`: like `final-process-item` but applies when
|
||||
an existing item is reprocessed.
|
||||
- `scheduled`: runs periodically based on a schedule (and independent
|
||||
from any item)
|
||||
|
||||
Each addon may require arguments. Click on *Configure* to enable the
|
||||
*Arguments* section and add arguments for the corresponding addon.
|
||||
What to insert here is completely specific to the addon. In this case,
|
||||
it expects a JSON object with only one field `"degree"` that indicates
|
||||
how to rotate. In this example, it should be rotated by 90°
|
||||
counter-clockwise. You need to click *Update* to set it into the addon
|
||||
and then *Submit* to save everything.
|
||||
|
||||
{{ figure2(light="addon-install-03.png", dark="addon-install-03_dark.png") }}
|
||||
|
||||
|
||||
With this run configuration in place, you can try it out on some item:
|
||||
|
||||
{{ figure2(light="addon-install-04.png", dark="addon-install-04_dark.png") }}
|
||||
|
||||
This example configured the *rotate-pdf-addon* to rotate left by 90°.
|
||||
Create a simlar run configuration to rotate to the right.
|
376
website/site/content/docs/addons/writing.md
Normal file
@ -0,0 +1,376 @@
|
||||
+++
|
||||
title = "Writing"
|
||||
insert_anchor_links = "right"
|
||||
description = "How to write addons"
|
||||
weight = 20
|
||||
template = "docs.html"
|
||||
+++
|
||||
|
||||
# Writing Addons
|
||||
|
||||
Writing an addon can be divided into two things:
|
||||
|
||||
- create the program
|
||||
- define how to package and run it
|
||||
|
||||
The next sections describe both parts. For a quick start, check out
|
||||
the example addons.
|
||||
|
||||
As previously written, you can choose a language. The interaction with
|
||||
docspell happens by exchanging JSON data. So, whatever you choose, it
|
||||
should be possible to read and produce JSON with some convenience.
|
||||
|
||||
|
||||
# Writing the program
|
||||
|
||||
## Interface to Docspell
|
||||
|
||||
The interface to Docspell is JSON data. The addon receives all inputs
|
||||
as JSON and may return a JSON object as output (via stdout).
|
||||
|
||||
An addon can be executed in different contexts. Depending on this, the
|
||||
available inputs differ. The addon always receives one argument, which
|
||||
is a file containing the user supplied data (it may be empty). A user
|
||||
is able to provide data to every addon from the web-ui.
|
||||
|
||||
All other things are provided as environment variables. There are
|
||||
environment variables that are always provided and some are only
|
||||
available for specific contexts.
|
||||
|
||||
For example, an addon that is executed in the context of an item
|
||||
(maybe after processing or when a user selects an addon to run "on an
|
||||
item"), Docspell prepares all data for the corresponding item and
|
||||
makes it available to the addon. In contrast, an addon executed
|
||||
periodically by a schedule, won't have this data available.
|
||||
|
||||
|
||||
## Basic Environment
|
||||
|
||||
The following environment variables are always provided by Docspell:
|
||||
|
||||
- `ADDON_DIR` points to the directory containing the extracted addon
|
||||
zip file
|
||||
- `TMPDIR` / `TMP_DIR` a directory for storing temporary data
|
||||
- `OUTPUT_DIR` a directory for storing files that should be processed
|
||||
by docspell
|
||||
- `CACHE_DIR` a directory for storing data that should stay between
|
||||
addon runs
|
||||
|
||||
It is very much recommended to always use these environment variables
|
||||
when reading and writing data. This keeps Docspell in control about
|
||||
the exact location.
|
||||
|
||||
The working directory will be set to a directory that is also
|
||||
temporary, but please don't rely on that. Use the environment
|
||||
variables.
|
||||
|
||||
## Item data
|
||||
|
||||
When executed in the context of an item. Meaning for triggers:
|
||||
`final-process-item`, `final-reprocess-item`, `existing-item`.
|
||||
|
||||
### `ITEM_DATA_JSON`
|
||||
|
||||
This environment variable points to a JSON file containing information
|
||||
about the current item. If it is run at processing time, it includes
|
||||
all information gathered so far by Docspell.
|
||||
|
||||
**Example**
|
||||
{{ incl_json(path="templates/shortcodes/item-data") }}
|
||||
|
||||
|
||||
### `ITEM_ARGS_JSON`
|
||||
|
||||
This environment variable points to a JSON file that contains the user
|
||||
supplied information with an upload request. That is, a user may
|
||||
specify tags or a language when uploading files. This would be in this
|
||||
file.
|
||||
|
||||
*This is only available for uploads. Trigger `final-process-item`.*
|
||||
|
||||
**Example**
|
||||
{{ incl_json(path="templates/shortcodes/item-args") }}
|
||||
|
||||
|
||||
### `ITEM_ORIGINAL_JSON` and `ITEM_PDF_JSON`
|
||||
|
||||
These JSON files contains a list of objects. Each object provides
|
||||
properties about a file - either the original file or the converted
|
||||
pdf. The structure is the same.
|
||||
|
||||
**Example**
|
||||
{{ incl_json(path="templates/shortcodes/file-meta") }}
|
||||
|
||||
|
||||
|
||||
### Directories
|
||||
|
||||
These environment variables point to directories that contain the attachment files.
|
||||
|
||||
- `ITEM_PDF_DIR` contains all converted pdf files, the attachment id is the filename
|
||||
- `ITEM_ORIGINAL_DIR` contains all original files, the attachment id is the filename
|
||||
|
||||
For example, to obtain a converted pdf file, lookup the id in
|
||||
`ITEM_PDF_JSON` and then construct the file name via
|
||||
`ITEM_PDF_DIR/{id}`.
|
||||
|
||||
|
||||
## Session for dsc
|
||||
|
||||
An addon may use [dsc](@/docs/tools/cli.md) which requires for many
|
||||
commands a valid session identifier. Usually this is obtained by
|
||||
logging in (i.e. using `dsc login`). This is not really feasible from
|
||||
inside an addon, of course. Therefore you can configure an addon to
|
||||
run on behalf of some user when creating the run configuration.
|
||||
Docspell then generates a valid session identifier and puts it into
|
||||
the environment. The [dsc](@/docs/tools/cli.md) tool will pick them up
|
||||
automatically.
|
||||
|
||||
It will also setup the URL to connect to some restserver. (If you have
|
||||
multiple rest-servers running, it will pick one randomly).
|
||||
|
||||
- `DSC_SESSION` env variable containing a session identifier. It's
|
||||
validity is coupled on the configured timeout.
|
||||
- `DSC_DOCSPELL_URL` the base url to some rest server
|
||||
|
||||
That means when using an addon in this way, you can simply use `dsc`
|
||||
without worrying about authentication or the correct URL to connect
|
||||
to.
|
||||
|
||||
|
||||
## Output
|
||||
|
||||
Docspell doesn't interpret the returncode of an addon, except checking
|
||||
for being equal to `0` which indicates a successful run.
|
||||
|
||||
In order to do change data in Docspell, the addon program can run
|
||||
`dsc` (for example) to change some state - like setting tags etc. But
|
||||
the preferred approach would be to return instructions for Docspell.
|
||||
Docspell will execute the instructions when the addon terminates
|
||||
successfully - that is with return code `0`.
|
||||
|
||||
These instructions are in a JSON object which needs to go to stdout.
|
||||
You can use stderr in an addon for logging/debugging purposes. But if
|
||||
you specify `collectOutput: true` in the descriptior, then stdout must
|
||||
only return this specific JSON (or nothing, empty output is ignored).
|
||||
|
||||
You find the complete structure below. It consists of these parts:
|
||||
|
||||
- `commands`: let's you declare actions to do for an item or attachment
|
||||
- `files`: defines files relative to `OUTPUT_DIR` that should be
|
||||
processed
|
||||
- `newItems`: declares files relative to `OUTPUT_DIR` that should be
|
||||
processed as new uploads
|
||||
|
||||
The `commands` allows to set tags, fields and other things. All parts
|
||||
are optional, you don't need to return the complete structure. Just
|
||||
returning `commands` or only `files` is ok.
|
||||
|
||||
**Example**
|
||||
{{ incl_json(path="templates/shortcodes/addon-output") }}
|
||||
|
||||
|
||||
# Descriptor
|
||||
|
||||
An addon must provide an *addon descriptior*, which is a yaml or json
|
||||
file looking like this:
|
||||
|
||||
```yaml
|
||||
# The meta section is required. Name and version must not contain
|
||||
# whitespace
|
||||
meta:
|
||||
name: "name-of-addon"
|
||||
version: "2.21"
|
||||
description: |
|
||||
Describe the purpose and how it must be used here
|
||||
|
||||
# Defining when this addon is run. This is used to guide the user
|
||||
# interface in selecting an addon. At least one is required to specify.
|
||||
#
|
||||
# Possible values:
|
||||
# - scheduled: requires to enter a timer to run this addon periodically
|
||||
# - final-process-item: the final step when processing an item
|
||||
# - final-reprocess-item: the final step when reprocessing an item
|
||||
# - existing-item: A user selects the addon to run on an item
|
||||
triggers:
|
||||
- final-process-item
|
||||
- final-reprocess-item
|
||||
- existing-item
|
||||
|
||||
# How to build and run this addon (optional). If missing, auto
|
||||
# detection will enable a nix runner if a `flake.nix` is found in the
|
||||
# source root and docker if a `Dockerfile` is found.
|
||||
#
|
||||
# Both runners are compared to what is enabled at the server.
|
||||
runner:
|
||||
# Building the program using nix flakes. This requires a flake.nix
|
||||
# file in the source root with a default package and a flake-enabled
|
||||
# nix on the joex machine.
|
||||
#
|
||||
# The program is build via `nix build`. If the joex machine has
|
||||
# systemd-nspawn installed, it is used to run the addon inside a
|
||||
# container. Otherwise the addon is run directly on the machine.
|
||||
nix:
|
||||
enable: true
|
||||
|
||||
# Docker based runner can define a custom image to use. If a `build`
|
||||
# key exists pointing to a Dockerfile, the image is build before. If
|
||||
# the docker image is complex, you can build it independently and
|
||||
# provide the pre-build image.
|
||||
#
|
||||
# The program is run via `docker run` passing the arguments to the
|
||||
# addon. Thus it expects the entrypoint to be correctly configured
|
||||
# to the executable. You may use `args` in order to prepend
|
||||
# additional arguments, like the path to an executable if the image
|
||||
# requires that. The joex machine must have docker installed and the
|
||||
# user running joex must be allowed to use docker. You must either
|
||||
# define an image with an appropriate entry point or a dockerfile.
|
||||
docker:
|
||||
enable: false
|
||||
#image: myorg/myimage:latest
|
||||
build: Dockerfile
|
||||
|
||||
# Trivial runner that simply executes the file specified with
|
||||
# `exec`. Nothing is build before. This runner usually requires that
|
||||
# the joex machine contains all dependencies needed to run the
|
||||
# addon. You may need to install additional software on the machine
|
||||
# running joex.
|
||||
trivial:
|
||||
enable: false
|
||||
exec: src/addon.sh
|
||||
|
||||
# Optional arguments/options given to the program. The program
|
||||
# receives at least one argument, which is a file to the user input as
|
||||
# supplied in the application. The arguments here are prepended.
|
||||
args:
|
||||
|
||||
|
||||
options:
|
||||
# If false, the program is run inside a private network, blocking
|
||||
# traffic to the host and networks reachable from there. This only
|
||||
# applies if the addon can be run inside a container.
|
||||
#
|
||||
# If the addon runs side effects (such as using dsc to set tags),
|
||||
# this must be set to `true`.
|
||||
#
|
||||
# Default is false.
|
||||
networking: true
|
||||
|
||||
# If true, the stdout of the program is parsed into a JSON structure
|
||||
# that is interpreted as actions executed by the task that runs the
|
||||
# addon. If the addon runs side effects only, set this to `false`
|
||||
# and the output is ignored.
|
||||
#
|
||||
# It is recommended to use this approach, if possible. It allows
|
||||
# docspell itself to apply any changes and the addon can run
|
||||
# completely isolated.
|
||||
#
|
||||
# Default is false.
|
||||
collectOutput: true
|
||||
```
|
||||
|
||||
|
||||
# Packaging
|
||||
|
||||
Docspell can use different ways to build and run the addon:
|
||||
`nix-flake`, `docker` and `trivial`. The first two allow to package
|
||||
the addon in a defined way (with a single dependency, either nix or
|
||||
docker) and then execute it independently from the underlying system.
|
||||
This makes it possible to execute the addon on a variety of systems.
|
||||
This is especially useful for addons that are meant to be public and
|
||||
reusable by different people.
|
||||
|
||||
The "trivial" runner is only executing some program specified in
|
||||
`docspell-addon.yaml`, directly on the joex machine (or via
|
||||
`systemd-nspawn`). The machine running joex must then provide all
|
||||
necessary dependencies and it must be compatible to run the addon. It
|
||||
may be useful especially for personal addons.
|
||||
|
||||
|
||||
## nix flake
|
||||
|
||||
Using [nix](https://nixos.org) with
|
||||
[flakes](https://nixos.wiki/wiki/Flakes) enabled, is the recommended
|
||||
approach. It is very flexible and reproducible while sharing most
|
||||
dependencies (in contrast to docker where each image contains the same
|
||||
packages again and again).
|
||||
|
||||
Docspell runs `nix build` to build the addon and then executes the
|
||||
file produced to `$out/bin`.
|
||||
|
||||
|
||||
## docker
|
||||
|
||||
For docker it is recommended to provide pre-build images. Docspell can
|
||||
build images from provided `Dockerfile`, but for larger images it
|
||||
might be better to do this apriori.
|
||||
|
||||
Docspell will run the addon using `docker run …` passing it only the
|
||||
user-input file as argument. Thus the image must define an appropriate
|
||||
`ENTRYPOINT`.
|
||||
|
||||
# Examples
|
||||
## Minimal Addon
|
||||
|
||||
The steps below create a minimal addon:
|
||||
|
||||
1. Create a bash script `addon.sh` with this content:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
echo "Hello world!"
|
||||
```
|
||||
2. Make it executable:
|
||||
```bash
|
||||
chmod +x addon.sh
|
||||
```
|
||||
3. Create a yaml file `docspell-addon.yaml` with this content:
|
||||
|
||||
```yaml
|
||||
meta:
|
||||
name: "minimal-addon"
|
||||
version: "0.1.0"
|
||||
triggers:
|
||||
- existing-item
|
||||
- scheduled
|
||||
runner:
|
||||
trivial:
|
||||
enable: true
|
||||
exec: addon.sh
|
||||
```
|
||||
4. Create a zip file containing these two files:
|
||||
```bash
|
||||
zip addon.zip docspell-addon.yaml addon.sh
|
||||
```
|
||||
|
||||
The addon is now ready. Make it available via an url (use some file
|
||||
sharing tool, upload it somewhere etc) and then it can be installed
|
||||
and run.
|
||||
|
||||
## Non-Minimal Addon
|
||||
|
||||
The minimal example above is good to see what is required, but it is
|
||||
not very useful…. Please see this post about the [audio file
|
||||
addon](@/blog/2022-05-16_audio_file_addon.md) that walks through a
|
||||
more useful addon.
|
||||
|
||||
# Misc
|
||||
|
||||
## Advantages of "pure" addons
|
||||
|
||||
Although the output structure is not set in stone, it is recommended
|
||||
to use this in contrast to directly changing state via `dsc`.
|
||||
|
||||
- outputs of all addons are collected and only applied if all were
|
||||
successful; in contrast side effects are always applied even if the
|
||||
addon fails shortly after
|
||||
- since addons are executed as joex tasks, their result can be send as
|
||||
events to another http server for further processing.
|
||||
- addons can run in an isolated environment without network (no data
|
||||
can go out)
|
||||
|
||||
## Use addons in other addons?
|
||||
|
||||
This can be achieved very conveniently by using `nix`. If addons are
|
||||
defined as a nik flake, they can be easily consumed by each other.
|
@ -82,6 +82,7 @@ template = "docs.html"
|
||||
- zip
|
||||
- [eml](https://en.wikipedia.org/wiki/Email#Filename_extensions)
|
||||
(e-mail files in plain text MIME)
|
||||
- Extend Docspell via [addons](@/docs/addons/basics.md)
|
||||
- Tooling:
|
||||
- [Command Line Interface](@/docs/tools/cli.md) allowing to upload
|
||||
files, watch folders and many more!
|
||||
|
@ -1,4 +1,4 @@
|
||||
``` bash
|
||||
{% set data = load_data(path=path) %}
|
||||
``` bash
|
||||
{{ data | safe }}
|
||||
```
|
||||
|
4
website/site/templates/shortcodes/incl_json.md
Normal file
@ -0,0 +1,4 @@
|
||||
{% set data = load_data(path=path) %}
|
||||
``` json
|
||||
{{ data | safe }}
|
||||
```
|
@ -0,0 +1,86 @@
|
||||
package docspell.website
|
||||
|
||||
import cats.syntax.all._
|
||||
import docspell.addons.out.{AddonOutput, ItemFile, NewFile, NewItem}
|
||||
import docspell.addons.out.NewFile.{Meta => FileMeta}
|
||||
import docspell.addons.out.NewItem.{Meta => ItemMeta}
|
||||
import docspell.common._
|
||||
import docspell.common.bc.{AttachmentAction, BackendCommand, ItemAction}
|
||||
import io.circe.syntax._
|
||||
|
||||
object AddonOutputExample extends Helper {
|
||||
|
||||
val example = AddonOutput(
|
||||
commands = List(
|
||||
BackendCommand.ItemUpdate(
|
||||
itemId = id("XabZ-item-id"),
|
||||
actions = List(
|
||||
ItemAction.AddTags(Set("tag1", "tag2")),
|
||||
ItemAction.ReplaceTags(Set("tagX", "tagY")),
|
||||
ItemAction.RemoveTags(Set("tag0", "tag9")),
|
||||
ItemAction.RemoveTagsCategory(Set("doc-type")),
|
||||
ItemAction.SetFolder("folder-name".some),
|
||||
ItemAction.SetCorrOrg(id("OaIy-org-ID").some),
|
||||
ItemAction.SetCorrPerson(id("OaIy-person-ID").some),
|
||||
ItemAction.SetConcPerson(id("AEiae-person-ID").some),
|
||||
ItemAction.SetConcEquipment(id("AEiae-equipment-ID").some),
|
||||
ItemAction.SetField(id("eur"), "12.99"),
|
||||
ItemAction.SetName("new item name"),
|
||||
ItemAction.SetNotes("replace notes with this".some),
|
||||
ItemAction.AddNotes("More notes appended", Some("-----"))
|
||||
)
|
||||
),
|
||||
BackendCommand
|
||||
.AttachmentUpdate(
|
||||
itemId = id("XabZ-item-id"),
|
||||
attachId = id("Atca-attach-id"),
|
||||
actions = List(
|
||||
AttachmentAction.SetExtractedText("replace extracted text with this".some)
|
||||
)
|
||||
)
|
||||
),
|
||||
files = List(
|
||||
ItemFile(
|
||||
id("iZtb-item-id"),
|
||||
textFiles = Map("attach-id" -> "newtext.txt"),
|
||||
pdfFiles = Map("attach-id" -> "better.pdf"),
|
||||
previewImages = Map("attach-id" -> "better-preview.png"),
|
||||
newFiles = List(
|
||||
NewFile(
|
||||
metadata = FileMeta(
|
||||
language = Some(Language.English),
|
||||
skipDuplicate = Some(true),
|
||||
attachmentsOnly = Some(false)
|
||||
),
|
||||
file = "new-file1.docx"
|
||||
),
|
||||
NewFile(
|
||||
metadata = FileMeta(
|
||||
language = Some(Language.German),
|
||||
skipDuplicate = Some(true),
|
||||
attachmentsOnly = Some(false)
|
||||
),
|
||||
file = "new-file2.pdf"
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
newItems = List(
|
||||
NewItem(
|
||||
metadata = ItemMeta(
|
||||
language = Some(Language.English),
|
||||
direction = Direction.Incoming.some,
|
||||
folderId = id("my-folder").some,
|
||||
source = "the-addon-x".some,
|
||||
skipDuplicate = true.some,
|
||||
tags = List("tag1", "tag2").some,
|
||||
attachmentsOnly = None
|
||||
).some,
|
||||
files = List("a-file.pdf", "another.jpg")
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
def exampleJson =
|
||||
example.asJson.spaces2
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
package docspell.website
|
||||
|
||||
import docspell.addons.out._
|
||||
import docspell.common.bc._
|
||||
import io.circe.syntax._
|
||||
|
||||
object AddonOutputMiniExample extends Helper {
|
||||
|
||||
val example = AddonOutput(
|
||||
commands = List(
|
||||
BackendCommand.ItemUpdate(
|
||||
itemId = id("XabZ-item-id"),
|
||||
actions = List(
|
||||
ItemAction.AddTags(Set("tag1", "tag2"))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
def exampleJson =
|
||||
example.asJson.spaces2
|
||||
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
package docspell.website
|
||||
|
||||
import cats.syntax.option._
|
||||
import docspell.common.{ByteSize, Language, MimeType}
|
||||
import docspell.store.queries.AttachedFile
|
||||
import io.circe.syntax._
|
||||
import scodec.bits.ByteVector
|
||||
|
||||
object FileMetaExample extends Helper {
|
||||
|
||||
val example1 = AttachedFile(
|
||||
id = randomId,
|
||||
name = "the filename.png".some,
|
||||
position = 0,
|
||||
language = Language.English.some,
|
||||
mimetype = MimeType.png,
|
||||
length = ByteSize(454654L),
|
||||
checksum = ByteVector.fromValidHex("caffe0caffe").digest("sha256")
|
||||
)
|
||||
|
||||
val example2 = AttachedFile(
|
||||
id = randomId,
|
||||
name = "other filename.png".some,
|
||||
position = 1,
|
||||
language = Language.English.some,
|
||||
mimetype = MimeType.pdf,
|
||||
length = ByteSize(1232214L),
|
||||
checksum = ByteVector.fromValidHex("eff0eff0eff").digest("sha256")
|
||||
)
|
||||
|
||||
val example = List(
|
||||
example1,
|
||||
example2
|
||||
)
|
||||
|
||||
val exampleJson = example.asJson.spaces2
|
||||
}
|
29
website/src/main/scala/docspell/website/Helper.scala
Normal file
@ -0,0 +1,29 @@
|
||||
package docspell.website
|
||||
|
||||
import docspell.common.{IdRef, Ident, Timestamp}
|
||||
import scodec.bits.ByteVector
|
||||
|
||||
import java.time.LocalDate
|
||||
import scala.util.Random
|
||||
|
||||
trait Helper {
|
||||
|
||||
def id(str: String): Ident = Ident.unsafe(str)
|
||||
|
||||
val date20220514 = Timestamp.atUtc(LocalDate.of(2022, 5, 14).atTime(11, 22, 12))
|
||||
|
||||
val cid = id("collective")
|
||||
|
||||
implicit final class StringExt(self: String) {
|
||||
def id: Ident = Ident.unsafe(self)
|
||||
}
|
||||
|
||||
def idRef(name: String): IdRef = IdRef(randomId, name)
|
||||
|
||||
def randomId = {
|
||||
val buffer = Array.ofDim[Byte](6)
|
||||
new Random().nextBytes(buffer)
|
||||
id(ByteVector.view(buffer).toBase58)
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
package docspell.website
|
||||
|
||||
import cats.syntax.option._
|
||||
import docspell.common.{Language, ProcessItemArgs}
|
||||
import io.circe.syntax._
|
||||
|
||||
object ItemArgsExample extends Helper {
|
||||
|
||||
val example = ProcessItemArgs.ProcessMeta(
|
||||
collective = cid,
|
||||
itemId = None,
|
||||
language = Language.English,
|
||||
direction = None,
|
||||
sourceAbbrev = "scanner",
|
||||
folderId = None,
|
||||
validFileTypes = Seq.empty,
|
||||
skipDuplicate = true,
|
||||
fileFilter = None,
|
||||
tags = List("given-tag-1").some,
|
||||
reprocess = false,
|
||||
attachmentsOnly = None
|
||||
)
|
||||
|
||||
val exampleJson = example.asJson.spaces2
|
||||
}
|
@ -0,0 +1,75 @@
|
||||
package docspell.website
|
||||
|
||||
import cats.syntax.option._
|
||||
import docspell.common.MetaProposal.Candidate
|
||||
import docspell.common._
|
||||
import docspell.joex.process.ItemData
|
||||
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem}
|
||||
import io.circe.syntax._
|
||||
|
||||
object ItemDataExample extends Helper {
|
||||
|
||||
private val proposals: MetaProposalList = MetaProposalList(
|
||||
List(
|
||||
MetaProposal(MetaProposalType.CorrOrg, Candidate(idRef("Acme AG"), Set.empty)),
|
||||
MetaProposal(
|
||||
MetaProposalType.ConcPerson,
|
||||
Candidate(idRef("Derek Jeter"), Set.empty)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
private val givenProposals: MetaProposalList = MetaProposalList.empty
|
||||
|
||||
val example = ItemData(
|
||||
item = RItem(
|
||||
id = id("UyZ-item-id"),
|
||||
cid = cid,
|
||||
name = "yearly report 2021",
|
||||
itemDate = date20220514.some,
|
||||
source = "webapp",
|
||||
direction = Direction.Incoming,
|
||||
state = ItemState.Processing,
|
||||
corrOrg = None,
|
||||
corrPerson = None,
|
||||
concPerson = None,
|
||||
concEquipment = None,
|
||||
inReplyTo = None,
|
||||
dueDate = None,
|
||||
created = date20220514,
|
||||
updated = date20220514,
|
||||
notes = None,
|
||||
folderId = None
|
||||
),
|
||||
attachments = Vector(
|
||||
RAttachment(
|
||||
id = id("Apa-attach-id"),
|
||||
itemId = id("UyZ-item-id"),
|
||||
fileId = FileKey(cid, FileCategory.AttachmentConvert, id("abcxyz")),
|
||||
position = 0,
|
||||
created = date20220514,
|
||||
name = "report_year_2021.pdf".some
|
||||
)
|
||||
),
|
||||
metas = Vector(
|
||||
RAttachmentMeta(
|
||||
id = id("Apa-attach-id"),
|
||||
content = "this is the extracted text …".some,
|
||||
nerlabels = Nil,
|
||||
proposals = proposals,
|
||||
pages = 2.some,
|
||||
language = Language.English.some
|
||||
)
|
||||
),
|
||||
dateLabels = Vector.empty,
|
||||
originFile = Map(
|
||||
id("Apa-attach-id") -> FileKey(cid, FileCategory.AttachmentSource, "yanetar".id)
|
||||
),
|
||||
givenMeta = givenProposals,
|
||||
tags = List("tag-1"),
|
||||
classifyProposals = MetaProposalList.empty,
|
||||
classifyTags = List("invoice")
|
||||
)
|
||||
|
||||
val exampleJson = example.asJson.spaces2
|
||||
}
|
64
website/src/main/scala/docspell/website/Main.scala
Normal file
@ -0,0 +1,64 @@
|
||||
package docspell.website
|
||||
|
||||
import cats.effect.{ExitCode, IO, IOApp}
|
||||
import fs2.io.file.{Files, Path}
|
||||
import fs2.Stream
|
||||
import io.circe.Encoder
|
||||
import io.circe.syntax._
|
||||
|
||||
object Main extends IOApp {
|
||||
override def run(args: List[String]) =
|
||||
args match {
|
||||
case "addon-output" :: file :: Nil =>
|
||||
if (file.isEmpty) ok(stdout(AddonOutputExample.exampleJson))
|
||||
else ok(AddonOutputExample.example.writeFile(file))
|
||||
|
||||
case "addon-output-tags" :: file :: Nil =>
|
||||
if (file.isEmpty) ok(stdout(AddonOutputMiniExample.exampleJson))
|
||||
else ok(AddonOutputMiniExample.example.writeFile(file))
|
||||
|
||||
case "item-data" :: file :: Nil =>
|
||||
if (file.isEmpty) ok(stdout(ItemDataExample.exampleJson))
|
||||
else ok(ItemDataExample.example.writeFile(file))
|
||||
|
||||
case "item-args" :: file :: Nil =>
|
||||
if (file.isEmpty) ok(stdout(ItemArgsExample.exampleJson))
|
||||
else ok(ItemArgsExample.example.writeFile(file))
|
||||
|
||||
case "file-meta" :: file :: Nil =>
|
||||
if (file.isEmpty) ok(stdout(FileMetaExample.exampleJson))
|
||||
else ok(FileMetaExample.example.writeFile(file))
|
||||
|
||||
case v :: Nil =>
|
||||
err(stderr(s"Unknown example: $v"))
|
||||
|
||||
case _ =>
|
||||
err(stderr("Specify what example to print"))
|
||||
}
|
||||
|
||||
def stdout(str: String, args: Any*): Unit =
|
||||
Console.out.println(str.format(args: _*))
|
||||
|
||||
def stderr(str: String, args: Any*): Unit =
|
||||
Console.err.println(str.format(args: _*))
|
||||
|
||||
def ok(f: IO[Unit]): IO[ExitCode] =
|
||||
f.as(ExitCode.Success)
|
||||
|
||||
def ok(p: => Unit): IO[ExitCode] =
|
||||
ok(IO(p))
|
||||
|
||||
def err(p: => Unit): IO[ExitCode] =
|
||||
IO(p).as(ExitCode.Error)
|
||||
|
||||
implicit class WriteOps[A: Encoder](self: A) {
|
||||
def writeFile(file: String): IO[Unit] =
|
||||
Stream
|
||||
.emit(self.asJson.spaces2)
|
||||
.covary[IO]
|
||||
.through(fs2.text.utf8.encode)
|
||||
.through(Files[IO].writeAll(Path(file)))
|
||||
.compile
|
||||
.drain
|
||||
}
|
||||
}
|