Add support for integrating into nix/nixos

This commit is contained in:
Eike Kettner 2020-01-19 22:53:58 +01:00
parent 2454f358b1
commit 23af8acff8
9 changed files with 929 additions and 5 deletions

3
.gitignore vendored
View File

@ -3,4 +3,5 @@ target/
dev.conf
elm-stuff
result
_site/
_site/
*.qcow2

View File

@ -80,6 +80,46 @@ docspell.joex {
}
```
## Nix Expressions
The directory `/nix` contains nix expressions to install docspell via
the nix package manager and to integrate it into NixOS.
### Testing NixOS Modules
The modules can be build by building the `configuration-test.nix` file
together with some nixpkgs version. For example:
``` shell
nixos-rebuild build-vm -I nixos-config=./configuration-test.nix \
-I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/nixos-19.09.tar.gz
```
This will build all modules imported in `configuration-test.nix` and
create a virtual machine containing the system. After that completes,
the system configuration can be found behind the `./result/system`
symlink. So it is possible to look at the generated systemd config for
example:
``` shell
cat result/system/etc/systemd/system/docspell-joex.service
```
And with some more commands (there probably is an easier way…) the
config file can be checked:
``` shell
cat result/system/etc/systemd/system/docspell-joex.service | grep ExecStart | cut -d'=' -f2 | xargs cat | tail -n1 | awk '{print $NF}'| sed 's/.$//' | xargs cat | jq
```
To see the module in action, the vm can be started (the first line
sets more memory for the vm):
``` shell
export QEMU_OPTS="-m 2048"
./result/bin/run-docspelltest-vm
```
## ADRs
Some early information about certain details can be found in the few

View File

@ -23,7 +23,7 @@ docspell.server {
# The secret for this server that is used to sign the authenicator
# tokens. If multiple servers are running, all must share the same
# secret. You can use base64 or hex strings (prefix with b64: and
# hex:, respectively)
# hex:, respectively).
server-secret = "hex:caffee"
# How long an authentication token is valid. The web application
@ -68,9 +68,10 @@ docspell.server {
}
files {
# Defines the chunk size used to store bytes. This will affect
# the memory footprint when uploading and downloading files. At
# most this amount is loaded into RAM for down- and uploading.
# Defines the chunk size (in bytes) used to store the files.
# This will affect the memory footprint when uploading and
# downloading files. At most this amount is loaded into RAM for
# down- and uploading.
#
# It also defines the chunk size used for the blobs inside the
# database.

View File

@ -0,0 +1,52 @@
{ config, pkgs, ... }:
let
docspell = import ./release.nix;
in
{
imports = docspell.modules;
i18n = {
consoleKeyMap = "neo";
defaultLocale = "en_US.UTF-8";
};
users.users.root = {
password = "root";
};
nixpkgs = {
config = {
packageOverrides = pkgs:
let
callPackage = pkgs.lib.callPackageWith(custom // pkgs);
custom = {
docspell = callPackage (docspell.pkg "0.2.0") {};
};
in custom;
};
};
services.docspell-joex = {
enable = true;
base-url = "http://docspelltest:7878";
};
services.docspell-restserver = {
enable = true;
};
services.docspell-consumedir = {
enable = true;
watchDirs = ["/tmp/test"];
urls = ["http://localhost:7880/api/v1/open/upload/item/blabla"];
};
services.xserver = {
enable = false;
};
networking = {
hostName = "docspelltest";
};
system.stateVersion = "19.09";
}

84
nix/module-consumedir.nix Normal file
View File

@ -0,0 +1,84 @@
{config, lib, pkgs, ...}:
with lib;
let
cfg = config.services.docspell-consumedir;
user = if cfg.runAs == null then "docspell-consumedir" else cfg.runAs;
in {
## interface
options = {
services.docspell-consumedir = {
enable = mkOption {
default = false;
description = "Whether to enable docspell consume directory.";
};
runAs = mkOption {
type = types.nullOr types.str;
default = null;
description = ''
The user that runs the consumedir process.
'';
};
watchDirs = mkOption {
type = types.listOf types.str;
description = "The directories to watch for new files.";
};
verbose = mkOption {
type = types.bool;
default = false;
description = "Run in verbose mode";
};
deleteFiles = mkOption {
type = types.bool;
default = false;
description = "Whether to delete successfully uploaded files.";
};
distinct = mkOption {
type = types.bool;
default = true;
description = "Check for duplicates and update only if the file is not already present.";
};
urls = mkOption {
type = types.listOf types.str;
example = [ "http://localhost:7880/api/v1/open/upload/item/abced-12345-abcde-12345" ];
description = "A list of upload urls.";
};
};
};
## implementation
config = mkIf config.services.docspell-consumedir.enable {
users.users."${user}" = mkIf (cfg.runAs == null) {
name = user;
isSystemUser = true;
description = "Docspell consumedir user";
};
systemd.services.docspell-consumedir =
let
args = (builtins.concatMap (a: ["--path" ("'" + a + "'")]) cfg.watchDirs) ++
(if cfg.verbose then ["-v"] else []) ++
(if cfg.deleteFiles then ["-d"] else []) ++
(if cfg.distinct then [ "-m" ] else []) ++
(map (a: "'" + a + "'") cfg.urls);
cmd = "${pkgs.docspell.tools}/bin/consumedir.sh " + (builtins.concatStringsSep " " args);
in
{
description = "Docspell Consumedir";
after = [ "networking.target" ];
wantedBy = [ "multi-user.target" ];
path = [ pkgs.utillinux pkgs.curl pkgs.coreutils ];
script =
"${pkgs.su}/bin/su -s ${pkgs.bash}/bin/sh ${user} -c \"${cmd}\"";
};
};
}

368
nix/module-joex.nix Normal file
View File

@ -0,0 +1,368 @@
{config, lib, pkgs, ...}:
with lib;
let
cfg = config.services.docspell-joex;
user = if cfg.runAs == null then "docspell" else cfg.runAs;
configFile = pkgs.writeText "docspell-joex.conf" ''
{"docspell": { "joex":
${builtins.toJSON cfg}
}}
'';
in {
## interface
options = {
services.docspell-joex = {
enable = mkOption {
type = types.bool;
default = false;
description = "Whether to enable docspell docspell job executor.";
};
runAs = mkOption {
type = types.nullOr types.str;
default = null;
description = ''
Specify a user for running the application. If null, a new
user is created.
'';
};
app-id = mkOption {
type = types.str;
default = "docspell-joex1";
description = "The node id. Must be unique across all docspell nodes.";
};
base-url = mkOption {
type = types.str;
default = "http://localhost:7878";
description = "The base url where attentive is deployed.";
};
bind = mkOption {
type = types.submodule({
options = {
address = mkOption {
type = types.str;
default = "localhost";
description = "The address to bind the REST server to.";
};
port = mkOption {
type = types.int;
default = 7878;
description = "The port to bind the REST server";
};
};
});
default = {
address = "localhost";
port = 7878;
};
description = "Address and port bind the rest server.";
};
jdbc = mkOption {
type = types.submodule ({
options = {
url = mkOption {
type = types.str;
default = "jdbc:h2:///tmp/docspell-demo.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE";
description = ''
The URL to the database. By default a file-based database is
used. It should also work with mariadb and postgresql.
Examples:
"jdbc:mariadb://192.168.1.172:3306/docspell"
"jdbc:postgresql://localhost:5432/docspell"
"jdbc:h2:///home/dbs/docspell.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
'';
};
user = mkOption {
type = types.str;
default = "sa";
description = "The user name to connect to the database.";
};
password = mkOption {
type = types.str;
default = "";
description = "The password to connect to the database.";
};
};
});
default = {
url = "jdbc:h2:///tmp/docspell-demo.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE";
user = "sa";
password = "";
};
description = "Database connection settings";
};
scheduler = mkOption {
type = types.submodule({
options = {
pool-size = mkOption {
type = types.int;
default = 2;
description = "Number of processing allowed in parallel.";
};
counting-scheme = mkOption {
type = types.str;
default = "4,1";
description = ''
A counting scheme determines the ratio of how high- and low-prio
jobs are run. For example: 4,1 means run 4 high prio jobs, then
1 low prio and then start over.
'';
};
retries = mkOption {
type = types.int;
default = 5;
description = ''
How often a failed job should be retried until it enters failed
state. If a job fails, it becomes "stuck" and will be retried
after a delay.
'';
};
retry-delay = mkOption {
type = types.str;
default = "1 minute";
description = ''
The delay until the next try is performed for a failed job. This
delay is increased exponentially with the number of retries.
'';
};
log-buffer-size = mkOption {
type = types.int;
default = 500;
description = ''
The queue size of log statements from a job.
'';
};
wakeup-period = mkOption {
type = types.str;
default = "30 minutes";
description = ''
If no job is left in the queue, the scheduler will wait until a
notify is requested (using the REST interface). To also retry
stuck jobs, it will notify itself periodically.
'';
};
};
});
default = {
pool-size = 2;
counting-scheme = "4,1";
retries = 5;
retry-delay = "1 minute";
log-buffer-size = 500;
wakeup-period = "30 minutes";
};
description = "Settings for the scheduler";
};
extraction =
let
gsdefaults = {
program = "${pkgs.ghostscript}/bin/gs";
args = [ "-dNOPAUSE" "-dBATCH" "-dSAFER" "-sDEVICE=tiffscaled8" "-sOutputFile={{outfile}}" "{{infile}}" ];
timeout = "5 minutes";
};
unpaperdefaults = {
program = "${pkgs.unpaper}/bin/unpaper";
args = [ "{{infile}}" "{{outfile}}" ];
timeout = "5 minutes";
};
tesseractdefaults = {
program = "${pkgs.tesseract4}/bin/tesseract";
args = ["{{file}}" "stdout" "-l" "{{lang}}" ];
timeout = "5 minutes";
};
in
mkOption {
type = types.submodule({
options = {
page-range = mkOption {
type = types.submodule({
options = {
begin = mkOption {
type = types.int;
default = 10;
description = "Specifies the first N pages of a file to process.";
};
};
});
default = {
begin = 10;
};
description = ''
Defines what pages to process. If a PDF with 600 pages is
submitted, it is probably not necessary to scan through all of
them. This would take a long time and occupy resources for no
value. The first few pages should suffice. The default is first
10 pages.
If you want all pages being processed, set this number to -1.
Note: if you change the ghostscript command below, be aware that
this setting (if not -1) will add another parameter to the
beginning of the command.
'';
};
ghostscript = mkOption {
type = types.submodule({
options = {
working-dir = mkOption {
type = types.str;
default = "/tmp/docspell-extraction";
description = "Directory where the extraction processes can put their temp files";
};
command = mkOption {
type = types.submodule({
options = {
program = mkOption {
type = types.str;
default = gsdefaults.program;
description = "The path to the executable.";
};
args = mkOption {
type = types.listOf types.str;
default = gsdefaults.args;
description = "The arguments to the program";
};
timeout = mkOption {
type = types.str;
default = gsdefaults.timeout;
description = "The timeout when executing the command";
};
};
});
default = gsdefaults;
description = "The system command";
};
};
});
default = {
command = gsdefaults;
working-dir = "/tmp/docspell-extraction";
};
description = "The ghostscript command.";
};
unpaper = mkOption {
type = types.submodule({
options = {
command = mkOption {
type = types.submodule({
options = {
program = mkOption {
type = types.str;
default = unpaperdefaults.program;
description = "The path to the executable.";
};
args = mkOption {
type = types.listOf types.str;
default = unpaperdefaults.args;
description = "The arguments to the program";
};
timeout = mkOption {
type = types.str;
default = unpaperdefaults.timeout;
description = "The timeout when executing the command";
};
};
});
default = unpaperdefaults;
description = "The system command";
};
};
});
default = {
command = unpaperdefaults;
};
description = "The unpaper command.";
};
tesseract = mkOption {
type = types.submodule({
options = {
command = mkOption {
type = types.submodule({
options = {
program = mkOption {
type = types.str;
default = tesseractdefaults.program;
description = "The path to the executable.";
};
args = mkOption {
type = types.listOf types.str;
default = tesseractdefaults.args;
description = "The arguments to the program";
};
timeout = mkOption {
type = types.str;
default = tesseractdefaults.timeout;
description = "The timeout when executing the command";
};
};
});
default = tesseractdefaults;
description = "The system command";
};
};
});
default = {
command = tesseractdefaults;
};
description = "The tesseract command.";
};
};
});
default = {
page-range = {
begin = 10;
};
ghostscript = {
command = gsdefaults;
working-dir = "/tmp/docspell-extraction";
};
};
description = ''
Configuration of text extraction
Extracting text currently only work for image and pdf files. It
will first runs ghostscript to create a gray image from a
pdf. Then unpaper is run to optimize the image for the upcoming
ocr, which will be done by tesseract. All these programs must be
available in your PATH or the absolute path can be specified
below.
'';
};
};
};
## implementation
config = mkIf config.services.docspell-joex.enable {
users.users."${user}" = mkIf (cfg.runAs == null) {
name = user;
isSystemUser = true;
description = "Docspell user";
};
systemd.services.docspell-joex =
let
cmd = "${pkgs.docspell.joex}/bin/docspell-joex ${configFile}";
in
{
description = "Docspell Joex";
after = [ "networking.target" ];
wantedBy = [ "multi-user.target" ];
path = [ pkgs.gawk ];
preStart = ''
'';
script =
"${pkgs.su}/bin/su -s ${pkgs.bash}/bin/sh ${user} -c \"${cmd}\"";
};
};
}

273
nix/module-server.nix Normal file
View File

@ -0,0 +1,273 @@
{config, lib, pkgs, ...}:
with lib;
let
cfg = config.services.docspell-restserver;
user = if cfg.runAs == null then "docspell" else cfg.runAs;
configFile = pkgs.writeText "docspell-server.conf" ''
{"docspell": {"server":
${builtins.toJSON cfg}
}}
'';
defaults = {
app-name = "Docspell";
app-id = "rest1";
base-url = "http://localhost:7880";
bind = {
address = "localhost";
port = 7880;
};
auth = {
server-secret = "hex:caffee";
session-valid = "5 minutes";
};
backend = {
jdbc = {
url = "jdbc:h2:///tmp/docspell-demo.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE";
user = "sa";
password = "";
};
signup = {
mode = "open";
new-invite-password = "";
invite-time = "3 days";
};
files = {
chunk-size = 524288;
valid-mime-types = ["application/pdf"];
};
};
};
in {
## interface
options = {
services.docspell-restserver = {
enable = mkOption {
default = false;
description = "Whether to enable docspell.";
};
runAs = mkOption {
type = types.nullOr types.str;
default = null;
description = ''
Specify a user for running the application. If null, a new
user is created.
'';
};
app-name = mkOption {
type = types.str;
default = defaults.app-name;
description = "The name used in the web ui and in notification mails.";
};
app-id = mkOption {
type = types.str;
default = defaults.app-id;
description = ''
This is the id of this node. If you run more than one server, you
have to make sure to provide unique ids per node.
'';
};
base-url = mkOption {
type = types.str;
default = defaults.base-url;
description = ''
This is the base URL this application is deployed to. This is used
to create absolute URLs and to configure the cookie.
'';
};
bind = mkOption {
type = types.submodule({
options = {
address = mkOption {
type = types.str;
default = defaults.bind.address;
description = "The address to bind the REST server to.";
};
port = mkOption {
type = types.int;
default = defaults.bind.port;
description = "The port to bind the REST server";
};
};
});
default = defaults.bind;
description = "Address and port bind the rest server.";
};
auth = mkOption {
type = types.submodule({
options = {
server-secret = mkOption {
type = types.str;
default = defaults.auth.server-secret;
description = ''
The secret for this server that is used to sign the authenicator
tokens. If multiple servers are running, all must share the same
secret. You can use base64 or hex strings (prefix with b64: and
hex:, respectively).
'';
};
session-valid = mkOption {
type = types.str;
default = defaults.auth.session-valid;
description = ''
How long an authentication token is valid. The web application
will get a new one periodically.
'';
};
};
});
default = defaults.auth;
description = "Authentication";
};
backend = mkOption {
type = types.submodule({
options = {
jdbc = mkOption {
type = types.submodule ({
options = {
url = mkOption {
type = types.str;
default = defaults.backend.jdbc.url;
description = ''
The URL to the database. By default a file-based database is
used. It should also work with mariadb and postgresql.
Examples:
"jdbc:mariadb://192.168.1.172:3306/docspell"
"jdbc:postgresql://localhost:5432/docspell"
"jdbc:h2:///home/dbs/docspell.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
'';
};
user = mkOption {
type = types.str;
default = defaults.backend.jdbc.user;
description = "The user name to connect to the database.";
};
password = mkOption {
type = types.str;
default = defaults.backend.jdbc.password;
description = "The password to connect to the database.";
};
};
});
default = defaults.backend.jdbc;
description = "Database connection settings";
};
signup = mkOption {
type = types.submodule ({
options = {
mode = mkOption {
type = types.str;
default = defaults.backend.signup.mode;
description = ''
The mode defines if new users can signup or not. It can have
three values:
- open: every new user can sign up
- invite: new users can sign up only if they provide a correct
invitation key. Invitation keys can be generated by the
server.
- closed: signing up is disabled.
'';
};
new-invite-password = mkOption {
type = types.str;
default = defaults.backend.signup.new-invite-password;
description = ''
If mode == 'invite', a password must be provided to generate
invitation keys. It must not be empty.
'';
};
invite-time = mkOption {
type = types.str;
default = defaults.backend.signup.invite-time;
description = ''
If mode == 'invite', this is the period an invitation token is
considered valid.
'';
};
};
});
default = {
mode = "closed";
newInvitePassword = "";
inviteTime = "3 days";
};
description = "Registration settings";
};
files = mkOption {
type = types.submodule({
options = {
chunk-size = mkOption {
type = types.int;
default = defaults.backend.files.chunk-size;
description = ''
Defines the chunk size (in bytes) used to store the files.
This will affect the memory footprint when uploading and
downloading files. At most this amount is loaded into RAM for
down- and uploading.
It also defines the chunk size used for the blobs inside the
database.
'';
};
valid-mime-types = mkOption {
type = types.listOf types.str;
default = defaults.backend.files.valid-mime-types;
description = ''
The file content types that are considered valid. Docspell
will only pass these files to processing. The processing code
itself has also checks for which files are supported and which
not. This affects the uploading part and is a first check to
avoid that 'bad' files get into the system.
'';
};
};
});
default = defaults.backend.files;
description= "Settings for how files are stored.";
};
};
});
default = defaults.backend;
description = "Configuration for the backend";
};
};
};
## implementation
config = mkIf config.services.docspell-restserver.enable {
users.users."${user}" = mkIf (cfg.runAs == null) {
name = user;
isSystemUser = true;
description = "Docspell user";
};
systemd.services.docspell-restserver =
let
cmd = "${pkgs.docspell.server}/bin/docspell-restserver ${configFile}";
in
{
description = "Docspell Rest Server";
after = [ "networking.target" ];
wantedBy = [ "multi-user.target" ];
path = [ pkgs.gawk ];
preStart = ''
'';
script =
"${pkgs.su}/bin/su -s ${pkgs.bash}/bin/sh ${user} -c \"${cmd}\"";
};
};
}

85
nix/pkg.nix Normal file
View File

@ -0,0 +1,85 @@
version: {stdenv, fetchzip, file, curl, inotifyTools, fetchurl, jre8_headless, bash}:
let
# version = "0.2.0";
meta = with stdenv.lib; {
description = "Docspell helps to organize and archive your paper documents.";
homepage = https://github.com/eikek/docspell;
license = licenses.gpl3;
maintainers = [ maintainers.eikek ];
};
in
{ server = stdenv.mkDerivation rec {
name = "docspell-server-${version}";
src = fetchzip {
url = "https://github.com/eikek/docspell/releases/download/v${version}/docspell-restserver-${version}.zip";
sha256 = "1mpyd66pcsd2q4wx9vszldqlamz9qgv6abrxh7xwzw23np61avy5";
};
buildInputs = [ jre8_headless ];
buildPhase = "true";
installPhase = ''
mkdir -p $out/{bin,program}
cp -R * $out/program/
cat > $out/bin/docspell-restserver <<-EOF
#!${bash}/bin/bash
$out/program/bin/docspell-restserver -java-home ${jre8_headless} "\$@"
EOF
chmod 755 $out/bin/docspell-restserver
'';
inherit meta;
};
joex = stdenv.mkDerivation rec {
name = "docspell-joex-${version}";
src = fetchzip {
url = "https://github.com/eikek/docspell/releases/download/v${version}/docspell-joex-${version}.zip";
sha256 = "1ycfcfcv24vvkdbzvnahj500gb5l9vdls4bxq0jd1zn72p4z765f";
};
buildInputs = [ jre8_headless ];
buildPhase = "true";
installPhase = ''
mkdir -p $out/{bin,program}
cp -R * $out/program/
cat > $out/bin/docspell-joex <<-EOF
#!${bash}/bin/bash
$out/program/bin/docspell-joex -java-home ${jre8_headless} "\$@"
EOF
chmod 755 $out/bin/docspell-joex
'';
inherit meta;
};
tools = stdenv.mkDerivation rec {
name = "docspell-tools-${version}";
src = fetchzip {
url = "https://github.com/eikek/docspell/releases/download/v${version}/docspell-tools-${version}.zip";
sha256 = "0hd93rlnnrq8xj7knp38x1jj2mv4y5lvbcv968bzk5f1az51qsvg";
};
buildPhase = "true";
installPhase = ''
mkdir -p $out/bin
cp $src/consumedir.sh $out/bin/
cp $src/ds.sh $out/bin/ds
sed -i 's,CURL_CMD="curl",CURL_CMD="${curl}/bin/curl",g' $out/bin/consumedir.sh
sed -i 's,CURL_CMD="curl",CURL_CMD="${curl}/bin/curl",g' $out/bin/ds
sed -i 's,INOTIFY_CMD="inotifywait",INOTIFY_CMD="${inotifyTools}/bin/inotifywait",g' $out/bin/consumedir.sh
sed -i 's,FILE_CMD="file",FILE_CMD="${file}/bin/file",g' $out/bin/ds
chmod 755 $out/bin/*
'';
inherit meta;
};
}

20
nix/release.nix Normal file
View File

@ -0,0 +1,20 @@
let
currentVersion =
let
file = builtins.readFile ../version.sbt;
comps = builtins.split ":=" file;
last = builtins.head (builtins.tail (builtins.filter builtins.isString comps));
in
builtins.replaceStrings ["\"" "\n" " "] ["" "" ""] last;
in
rec {
pkg = v: import ./pkg.nix v;
currentPkg = pkg currentVersion;
module-joex = ./module-joex.nix;
module-restserver = ./module-server.nix;
module-consumedir = ./module-consumedir.nix;
modules = [ module-joex
module-restserver
module-consumedir
];
}