2019-07-17 20:03:10 +00:00
|
|
|
|
docspell.joex {
|
|
|
|
|
|
2019-07-22 22:53:30 +00:00
|
|
|
|
# This is the id of this node. If you run more than one server, you
|
|
|
|
|
# have to make sure to provide unique ids per node.
|
|
|
|
|
app-id = "joex1"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# This is the base URL this application is deployed to. This is used
|
|
|
|
|
# to register this joex instance such that docspell rest servers can
|
|
|
|
|
# reach them
|
|
|
|
|
base-url = "http://localhost:7878"
|
|
|
|
|
|
|
|
|
|
# Where the REST server binds to.
|
|
|
|
|
#
|
|
|
|
|
# JOEX provides a very simple REST interface to inspect its state.
|
|
|
|
|
bind {
|
|
|
|
|
address = "localhost"
|
|
|
|
|
port = 7878
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-19 13:00:47 +00:00
|
|
|
|
# Configures logging
|
|
|
|
|
logging {
|
|
|
|
|
# The format for the log messages. Can be one of:
|
|
|
|
|
# Json, Logfmt, Fancy or Plain
|
2022-02-23 22:26:11 +00:00
|
|
|
|
format = "Fancy"
|
2022-02-19 13:00:47 +00:00
|
|
|
|
|
|
|
|
|
# The minimum level to log. From lowest to highest:
|
|
|
|
|
# Trace, Debug, Info, Warn, Error
|
2022-04-30 16:26:19 +00:00
|
|
|
|
minimum-level = "Warn"
|
|
|
|
|
|
|
|
|
|
# Override the log level of specific loggers
|
|
|
|
|
levels = {
|
|
|
|
|
"docspell" = "Info"
|
|
|
|
|
"org.flywaydb" = "Info"
|
|
|
|
|
"binny" = "Info"
|
|
|
|
|
"org.http4s" = "Info"
|
|
|
|
|
}
|
2022-02-19 13:00:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
2019-07-22 22:53:30 +00:00
|
|
|
|
# The database connection.
|
|
|
|
|
#
|
|
|
|
|
# It must be the same connection as the rest server is using.
|
|
|
|
|
jdbc {
|
2021-10-24 22:21:18 +00:00
|
|
|
|
|
|
|
|
|
# The JDBC url to the database. By default a H2 file-based
|
|
|
|
|
# database is configured. You can provide a postgresql or mariadb
|
|
|
|
|
# connection here. When using H2 use the PostgreSQL compatibility
|
|
|
|
|
# mode and AUTO_SERVER feature.
|
2019-07-22 22:53:30 +00:00
|
|
|
|
url = "jdbc:h2://"${java.io.tmpdir}"/docspell-demo.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
|
2021-10-24 22:21:18 +00:00
|
|
|
|
|
|
|
|
|
# The database user.
|
2019-07-22 22:53:30 +00:00
|
|
|
|
user = "sa"
|
2021-10-24 22:21:18 +00:00
|
|
|
|
|
|
|
|
|
# The database password.
|
2019-07-22 22:53:30 +00:00
|
|
|
|
password = ""
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-21 22:07:36 +00:00
|
|
|
|
# Additional settings related to schema migration.
|
|
|
|
|
database-schema = {
|
|
|
|
|
# Whether to run main database migrations.
|
|
|
|
|
run-main-migrations = true
|
|
|
|
|
|
|
|
|
|
# Whether to run the fixup migrations.
|
|
|
|
|
run-fixup-migrations = true
|
|
|
|
|
|
|
|
|
|
# Use with care. This repairs all migrations in the database by
|
|
|
|
|
# updating their checksums and removing failed migrations. Good
|
|
|
|
|
# for testing, not recommended for normal operation.
|
|
|
|
|
repair-schema = false
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-20 20:15:25 +00:00
|
|
|
|
# Enable or disable debugging for e-mail related functionality. This
|
|
|
|
|
# applies to both sending and receiving mails. For security reasons
|
|
|
|
|
# logging is not very extensive on authentication failures. Setting
|
|
|
|
|
# this to true, results in a lot of data printed to stdout.
|
|
|
|
|
mail-debug = false
|
|
|
|
|
|
2020-04-29 20:44:05 +00:00
|
|
|
|
send-mail {
|
|
|
|
|
# This is used as the List-Id e-mail header when mails are sent
|
|
|
|
|
# from docspell to its users (example: for notification mails). It
|
|
|
|
|
# is not used when sending to external recipients. If it is empty,
|
|
|
|
|
# no such header is added. Using this header is often useful when
|
|
|
|
|
# filtering mails.
|
|
|
|
|
#
|
|
|
|
|
# It should be a string in angle brackets. See
|
|
|
|
|
# https://tools.ietf.org/html/rfc2919 for a formal specification
|
|
|
|
|
# of this header.
|
|
|
|
|
list-id = ""
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-22 22:53:30 +00:00
|
|
|
|
# Configuration for the job scheduler.
|
|
|
|
|
scheduler {
|
|
|
|
|
|
|
|
|
|
# Each scheduler needs a unique name. This defaults to the node
|
|
|
|
|
# name, which must be unique, too.
|
|
|
|
|
name = ${docspell.joex.app-id}
|
|
|
|
|
|
|
|
|
|
# Number of processing allowed in parallel.
|
2021-01-11 20:20:08 +00:00
|
|
|
|
pool-size = 1
|
2019-07-22 22:53:30 +00:00
|
|
|
|
|
|
|
|
|
# A counting scheme determines the ratio of how high- and low-prio
|
|
|
|
|
# jobs are run. For example: 4,1 means run 4 high prio jobs, then
|
|
|
|
|
# 1 low prio and then start over.
|
|
|
|
|
counting-scheme = "4,1"
|
|
|
|
|
|
|
|
|
|
# How often a failed job should be retried until it enters failed
|
|
|
|
|
# state. If a job fails, it becomes "stuck" and will be retried
|
|
|
|
|
# after a delay.
|
2020-06-25 21:57:01 +00:00
|
|
|
|
retries = 2
|
2019-07-22 22:53:30 +00:00
|
|
|
|
|
|
|
|
|
# The delay until the next try is performed for a failed job. This
|
|
|
|
|
# delay is increased exponentially with the number of retries.
|
|
|
|
|
retry-delay = "1 minute"
|
|
|
|
|
|
|
|
|
|
# The queue size of log statements from a job.
|
|
|
|
|
log-buffer-size = 500
|
|
|
|
|
|
|
|
|
|
# If no job is left in the queue, the scheduler will wait until a
|
|
|
|
|
# notify is requested (using the REST interface). To also retry
|
|
|
|
|
# stuck jobs, it will notify itself periodically.
|
|
|
|
|
wakeup-period = "30 minutes"
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-08 01:48:47 +00:00
|
|
|
|
periodic-scheduler {
|
|
|
|
|
|
|
|
|
|
# Each scheduler needs a unique name. This defaults to the node
|
|
|
|
|
# name, which must be unique, too.
|
|
|
|
|
name = ${docspell.joex.app-id}
|
|
|
|
|
|
|
|
|
|
# A fallback to start looking for due periodic tasks regularily.
|
|
|
|
|
# Usually joex instances should be notified via REST calls if
|
|
|
|
|
# external processes change tasks. But these requests may get
|
|
|
|
|
# lost.
|
|
|
|
|
wakeup-period = "10 minutes"
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-19 05:39:02 +00:00
|
|
|
|
# Configuration for the user-tasks.
|
|
|
|
|
user-tasks {
|
|
|
|
|
# Allows to import e-mails by scanning a mailbox.
|
|
|
|
|
scan-mailbox {
|
|
|
|
|
# A limit of how many folders to scan through. If a user
|
|
|
|
|
# configures more than this, only upto this limit folders are
|
|
|
|
|
# scanned and a warning is logged.
|
|
|
|
|
max-folders = 50
|
|
|
|
|
|
|
|
|
|
# How many mails (headers only) to retrieve in one chunk.
|
2020-05-20 20:44:29 +00:00
|
|
|
|
#
|
|
|
|
|
# If this is greater than `max-mails' it is set automatically to
|
|
|
|
|
# the value of `max-mails'.
|
|
|
|
|
mail-chunk-size = 50
|
2020-05-19 05:39:02 +00:00
|
|
|
|
|
|
|
|
|
# A limit on how many mails to process in one job run. This is
|
2020-05-20 20:44:29 +00:00
|
|
|
|
# meant to avoid too heavy resource allocation to one
|
|
|
|
|
# user/collective.
|
2020-05-19 05:39:02 +00:00
|
|
|
|
#
|
|
|
|
|
# If more than this number of mails is encountered, a warning is
|
|
|
|
|
# logged.
|
2020-05-20 20:44:29 +00:00
|
|
|
|
max-mails = 500
|
2020-05-19 05:39:02 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-03-08 14:26:56 +00:00
|
|
|
|
# Docspell uses periodic house keeping tasks, like cleaning expired
|
|
|
|
|
# invites, that can be configured here.
|
|
|
|
|
house-keeping {
|
|
|
|
|
|
2020-03-09 19:24:00 +00:00
|
|
|
|
# When the house keeping tasks execute. Default is to run every
|
|
|
|
|
# week.
|
2022-03-01 20:40:23 +00:00
|
|
|
|
schedule = "Sun *-*-* 00:00:00 UTC"
|
2020-03-08 14:26:56 +00:00
|
|
|
|
|
|
|
|
|
# This task removes invitation keys that have been created but not
|
|
|
|
|
# used. The timespan here must be greater than the `invite-time'
|
|
|
|
|
# setting in the rest server config file.
|
|
|
|
|
cleanup-invites = {
|
2020-03-09 19:24:00 +00:00
|
|
|
|
|
|
|
|
|
# Whether this task is enabled.
|
|
|
|
|
enabled = true
|
|
|
|
|
|
|
|
|
|
# The minimum age of invites to be deleted.
|
|
|
|
|
older-than = "30 days"
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-03 23:59:08 +00:00
|
|
|
|
# This task removes expired remember-me tokens. The timespan
|
|
|
|
|
# should be greater than the `valid` time in the restserver
|
|
|
|
|
# config.
|
|
|
|
|
cleanup-remember-me = {
|
|
|
|
|
# Whether the job is enabled.
|
|
|
|
|
enabled = true
|
|
|
|
|
|
|
|
|
|
# The minimum age of tokens to be deleted.
|
|
|
|
|
older-than = "30 days"
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-09 19:24:00 +00:00
|
|
|
|
# Jobs store their log output in the database. Normally this data
|
|
|
|
|
# is only interesting for some period of time. The processing logs
|
|
|
|
|
# of old files can be removed eventually.
|
|
|
|
|
cleanup-jobs = {
|
|
|
|
|
|
|
|
|
|
# Whether this task is enabled.
|
|
|
|
|
enabled = true
|
|
|
|
|
|
|
|
|
|
# The minimum age of jobs to delete. It is matched against the
|
|
|
|
|
# `finished' timestamp.
|
2020-03-08 14:26:56 +00:00
|
|
|
|
older-than = "30 days"
|
2020-03-09 19:24:00 +00:00
|
|
|
|
|
|
|
|
|
# This defines how many jobs are deleted in one transaction.
|
|
|
|
|
# Since the data to delete may get large, it can be configured
|
|
|
|
|
# whether more or less memory should be used.
|
|
|
|
|
delete-batch = "100"
|
2020-03-08 14:26:56 +00:00
|
|
|
|
}
|
2021-02-17 23:34:37 +00:00
|
|
|
|
|
2022-04-09 12:01:36 +00:00
|
|
|
|
# Zip files created for downloading multiple files are cached and
|
|
|
|
|
# can be cleared periodically.
|
|
|
|
|
cleanup-downloads = {
|
|
|
|
|
|
|
|
|
|
# Whether to enable clearing old download archives.
|
|
|
|
|
enabled = true
|
|
|
|
|
|
|
|
|
|
# The minimum age of a download file to be deleted.
|
|
|
|
|
older-than = "14 days"
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-17 23:34:37 +00:00
|
|
|
|
# Removes node entries that are not reachable anymore.
|
|
|
|
|
check-nodes {
|
|
|
|
|
# Whether this task is enabled
|
|
|
|
|
enabled = true
|
|
|
|
|
# How often the node must be unreachable, before it is removed.
|
|
|
|
|
min-not-found = 2
|
|
|
|
|
}
|
2022-03-12 11:06:36 +00:00
|
|
|
|
|
|
|
|
|
# Checks all files against their checksum
|
|
|
|
|
integrity-check {
|
|
|
|
|
enabled = true
|
|
|
|
|
}
|
2020-03-08 14:26:56 +00:00
|
|
|
|
}
|
|
|
|
|
|
2021-08-19 20:24:35 +00:00
|
|
|
|
# A periodic task to check for new releases of docspell. It can
|
|
|
|
|
# inform about a new release via e-mail. You need to specify an
|
|
|
|
|
# account that has SMTP settings to use for sending.
|
|
|
|
|
update-check {
|
|
|
|
|
# Whether to enable this task
|
|
|
|
|
enabled = false
|
|
|
|
|
|
|
|
|
|
# Sends the mail without checking the latest release. Can be used
|
|
|
|
|
# if you want to see if mail sending works, but don't want to wait
|
|
|
|
|
# until a new release is published.
|
|
|
|
|
test-run = false
|
|
|
|
|
|
|
|
|
|
# When the update check should execute. Default is to run every
|
2022-03-01 20:40:23 +00:00
|
|
|
|
# week. You can specify a time zone identifier, like
|
|
|
|
|
# 'Europe/Berlin' at the end.
|
|
|
|
|
schedule = "Sun *-*-* 00:00:00 UTC"
|
2021-08-19 20:24:35 +00:00
|
|
|
|
|
|
|
|
|
# An account id in form of `collective/user` (or just `user` if
|
|
|
|
|
# collective and user name are the same). This user account must
|
|
|
|
|
# have at least one valid SMTP settings which are used to send the
|
|
|
|
|
# mail.
|
|
|
|
|
sender-account = ""
|
|
|
|
|
|
|
|
|
|
# The SMTP connection id that should be used for sending the mail.
|
|
|
|
|
smtp-id = ""
|
|
|
|
|
|
|
|
|
|
# A list of recipient e-mail addresses.
|
|
|
|
|
# Example: `[ "john.doe@gmail.com" ]`
|
|
|
|
|
recipients = []
|
|
|
|
|
|
|
|
|
|
# The subject of the mail. It supports the same variables as the
|
|
|
|
|
# body.
|
|
|
|
|
subject = "Docspell {{ latestVersion }} is available"
|
|
|
|
|
|
|
|
|
|
# The body of the mail. Subject and body can contain these
|
|
|
|
|
# variables which are replaced:
|
|
|
|
|
#
|
|
|
|
|
# - `latestVersion` the latest available version of Docspell
|
|
|
|
|
# - `currentVersion` the currently running (old) version of Docspell
|
|
|
|
|
# - `releasedAt` a date when the release was published
|
|
|
|
|
#
|
|
|
|
|
# The body is processed as markdown after the variables have been
|
|
|
|
|
# replaced.
|
|
|
|
|
body = """
|
|
|
|
|
Hello,
|
|
|
|
|
|
|
|
|
|
You are currently running Docspell {{ currentVersion }}. Version *{{ latestVersion }}*
|
|
|
|
|
is now available, which was released on {{ releasedAt }}. Check the release page at:
|
|
|
|
|
|
|
|
|
|
<https://github.com/eikek/docspell/releases/latest>
|
|
|
|
|
|
|
|
|
|
Have a nice day!
|
|
|
|
|
|
|
|
|
|
Docpell Update Check
|
|
|
|
|
"""
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-22 22:53:30 +00:00
|
|
|
|
# Configuration of text extraction
|
|
|
|
|
extraction {
|
2020-02-19 22:27:00 +00:00
|
|
|
|
# For PDF files it is first tried to read the text parts of the
|
|
|
|
|
# PDF. But PDFs can be complex documents and they may contain text
|
|
|
|
|
# and images. If the returned text is shorter than the value
|
|
|
|
|
# below, OCR is run afterwards. Then both extracted texts are
|
|
|
|
|
# compared and the longer will be used.
|
2022-07-07 15:58:03 +00:00
|
|
|
|
#
|
|
|
|
|
# If you set this to 0 (or a negative value), then the text parts
|
|
|
|
|
# of a PDF are ignored and OCR is always run and its result used.
|
2020-02-19 22:27:00 +00:00
|
|
|
|
pdf {
|
2020-08-01 13:44:46 +00:00
|
|
|
|
min-text-len = 500
|
2019-07-22 22:53:30 +00:00
|
|
|
|
}
|
|
|
|
|
|
2020-11-09 00:18:48 +00:00
|
|
|
|
preview {
|
|
|
|
|
# When rendering a pdf page, use this dpi. This results in
|
|
|
|
|
# scaling the image. A standard A4 page rendered at 96dpi
|
|
|
|
|
# results in roughly 790x1100px image. Using 32 results in
|
|
|
|
|
# roughly 200x300px image.
|
|
|
|
|
#
|
|
|
|
|
# Note, when this is changed, you might want to re-generate
|
|
|
|
|
# preview images. Check the api for this, there is an endpoint
|
|
|
|
|
# to regenerate all for a collective.
|
|
|
|
|
dpi = 32
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-19 22:27:00 +00:00
|
|
|
|
# Extracting text using OCR works for image and pdf files. It will
|
|
|
|
|
# first run ghostscript to create a gray image from a pdf. Then
|
|
|
|
|
# unpaper is run to optimize the image for the upcoming ocr, which
|
|
|
|
|
# will be done by tesseract. All these programs must be available
|
|
|
|
|
# in your PATH or the absolute path can be specified below.
|
|
|
|
|
ocr {
|
|
|
|
|
|
|
|
|
|
# Images greater than this size are skipped. Note that every
|
2021-01-11 20:20:08 +00:00
|
|
|
|
# image is loaded completely into memory for doing OCR. This is
|
|
|
|
|
# the pixel count, `height * width` of the image.
|
2020-02-19 22:27:00 +00:00
|
|
|
|
max-image-size = 14000000
|
|
|
|
|
|
|
|
|
|
# Defines what pages to process. If a PDF with 600 pages is
|
|
|
|
|
# submitted, it is probably not necessary to scan through all of
|
|
|
|
|
# them. This would take a long time and occupy resources for no
|
|
|
|
|
# value. The first few pages should suffice. The default is first
|
|
|
|
|
# 10 pages.
|
|
|
|
|
#
|
|
|
|
|
# If you want all pages being processed, set this number to -1.
|
|
|
|
|
#
|
|
|
|
|
# Note: if you change the ghostscript command below, be aware that
|
|
|
|
|
# this setting (if not -1) will add another parameter to the
|
|
|
|
|
# beginning of the command.
|
|
|
|
|
page-range {
|
|
|
|
|
begin = 10
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# The ghostscript command.
|
|
|
|
|
ghostscript {
|
|
|
|
|
command {
|
|
|
|
|
program = "gs"
|
|
|
|
|
args = [ "-dNOPAUSE"
|
|
|
|
|
, "-dBATCH"
|
|
|
|
|
, "-dSAFER"
|
|
|
|
|
, "-sDEVICE=tiffscaled8"
|
|
|
|
|
, "-sOutputFile={{outfile}}"
|
|
|
|
|
, "{{infile}}"
|
|
|
|
|
]
|
|
|
|
|
timeout = "5 minutes"
|
|
|
|
|
}
|
|
|
|
|
working-dir = ${java.io.tmpdir}"/docspell-extraction"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# The unpaper command.
|
|
|
|
|
unpaper {
|
|
|
|
|
command {
|
|
|
|
|
program = "unpaper"
|
|
|
|
|
args = [ "{{infile}}", "{{outfile}}" ]
|
|
|
|
|
timeout = "5 minutes"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# The tesseract command.
|
|
|
|
|
tesseract {
|
|
|
|
|
command {
|
|
|
|
|
program = "tesseract"
|
|
|
|
|
args = ["{{file}}"
|
|
|
|
|
, "stdout"
|
|
|
|
|
, "-l"
|
|
|
|
|
, "{{lang}}"
|
|
|
|
|
]
|
|
|
|
|
timeout = "5 minutes"
|
|
|
|
|
}
|
2019-07-22 22:53:30 +00:00
|
|
|
|
}
|
2020-02-19 22:27:00 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-27 21:54:49 +00:00
|
|
|
|
# Settings for text analysis
|
|
|
|
|
text-analysis {
|
|
|
|
|
# Maximum length of text to be analysed.
|
|
|
|
|
#
|
|
|
|
|
# All text to analyse must fit into RAM. A large document may take
|
|
|
|
|
# too much heap. Also, most important information is at the
|
|
|
|
|
# beginning of a document, so in most cases the first two pages
|
2021-01-22 21:56:51 +00:00
|
|
|
|
# should suffice. Default is 5000, which are about 2 pages (just a
|
|
|
|
|
# rough guess, of course). For my data, more than 80% of the
|
|
|
|
|
# documents are less than 5000 characters.
|
|
|
|
|
#
|
|
|
|
|
# This values applies to nlp and the classifier. If this value is
|
|
|
|
|
# <= 0, the limit is disabled.
|
|
|
|
|
max-length = 5000
|
2020-08-24 21:25:57 +00:00
|
|
|
|
|
|
|
|
|
# A working directory for the analyser to store temporary/working
|
|
|
|
|
# files.
|
|
|
|
|
working-dir = ${java.io.tmpdir}"/docspell-analysis"
|
|
|
|
|
|
2021-01-16 22:43:24 +00:00
|
|
|
|
nlp {
|
|
|
|
|
# The mode for configuring NLP models:
|
2021-01-13 23:55:19 +00:00
|
|
|
|
#
|
2021-01-16 22:43:24 +00:00
|
|
|
|
# 1. full – builds the complete pipeline
|
|
|
|
|
# 2. basic - builds only the ner annotator
|
|
|
|
|
# 3. regexonly - matches each entry in your address book via regexps
|
|
|
|
|
# 4. disabled - doesn't use any stanford-nlp feature
|
|
|
|
|
#
|
|
|
|
|
# The full and basic variants rely on pre-build language models
|
2021-01-20 20:35:54 +00:00
|
|
|
|
# that are available for only a few languages. Memory usage
|
|
|
|
|
# varies among the languages. So joex should run with -Xmx1400M
|
|
|
|
|
# at least when using mode=full.
|
2021-01-13 23:55:19 +00:00
|
|
|
|
#
|
|
|
|
|
# The basic variant does a quite good job for German and
|
|
|
|
|
# English. It might be worse for French, always depending on the
|
2021-01-20 20:35:54 +00:00
|
|
|
|
# type of text that is analysed. Joex should run with about 500M
|
2021-01-16 22:43:24 +00:00
|
|
|
|
# heap, here again lanugage German uses the most.
|
|
|
|
|
#
|
|
|
|
|
# The regexonly variant doesn't depend on a language. It roughly
|
|
|
|
|
# works by converting all entries in your addressbook into
|
|
|
|
|
# regexps and matches each one against the text. This can get
|
|
|
|
|
# memory intensive, too, when the addressbook grows large. This
|
|
|
|
|
# is included in the full and basic by default, but can be used
|
|
|
|
|
# independently by setting mode=regexner.
|
|
|
|
|
#
|
|
|
|
|
# When mode=disabled, then the whole nlp pipeline is disabled,
|
|
|
|
|
# and you won't get any suggestions. Only what the classifier
|
|
|
|
|
# returns (if enabled).
|
2021-01-13 23:55:19 +00:00
|
|
|
|
mode = full
|
2021-01-05 23:35:58 +00:00
|
|
|
|
|
2021-01-16 22:43:24 +00:00
|
|
|
|
# The StanfordCoreNLP library caches language models which
|
|
|
|
|
# requires quite some amount of memory. Setting this interval to a
|
|
|
|
|
# positive duration, the cache is cleared after this amount of
|
|
|
|
|
# idle time. Set it to 0 to disable it if you have enough memory,
|
|
|
|
|
# processing will be faster.
|
2021-01-13 23:55:19 +00:00
|
|
|
|
#
|
2021-01-16 22:43:24 +00:00
|
|
|
|
# This has only any effect, if mode != disabled.
|
|
|
|
|
clear-interval = "15 minutes"
|
2020-08-24 21:25:57 +00:00
|
|
|
|
|
2021-01-20 18:17:29 +00:00
|
|
|
|
# Restricts proposals for due dates. Only dates earlier than this
|
|
|
|
|
# number of years in the future are considered.
|
|
|
|
|
max-due-date-years = 10
|
|
|
|
|
|
2021-01-16 22:43:24 +00:00
|
|
|
|
regex-ner {
|
|
|
|
|
# Whether to enable custom NER annotation. This uses the
|
|
|
|
|
# address book of a collective as input for NER tagging (to
|
|
|
|
|
# automatically find correspondent and concerned entities). If
|
|
|
|
|
# the address book is large, this can be quite memory
|
|
|
|
|
# intensive and also makes text analysis much slower. But it
|
|
|
|
|
# improves accuracy and can be used independent of the
|
|
|
|
|
# lanugage. If this is set to 0, it is effectively disabled
|
|
|
|
|
# and NER tagging uses only statistical models (that also work
|
|
|
|
|
# quite well, but are restricted to the languages mentioned
|
|
|
|
|
# above).
|
|
|
|
|
#
|
|
|
|
|
# Note, this is only relevant if nlp-config.mode is not
|
|
|
|
|
# "disabled".
|
|
|
|
|
max-entries = 1000
|
|
|
|
|
|
|
|
|
|
# The NER annotation uses a file of patterns that is derived
|
|
|
|
|
# from a collective's address book. This is is the time how
|
|
|
|
|
# long this data will be kept until a check for a state change
|
|
|
|
|
# is done.
|
|
|
|
|
file-cache-time = "1 minute"
|
|
|
|
|
}
|
2020-08-24 21:25:57 +00:00
|
|
|
|
}
|
2020-08-28 20:17:49 +00:00
|
|
|
|
|
|
|
|
|
# Settings for doing document classification.
|
|
|
|
|
#
|
2021-01-20 20:35:54 +00:00
|
|
|
|
# This works by learning from existing documents. This requires a
|
|
|
|
|
# satstical model that is computed from all existing documents.
|
|
|
|
|
# This process is run periodically as configured by the
|
|
|
|
|
# collective. It may require more memory, depending on the amount
|
|
|
|
|
# of data.
|
2020-08-28 20:17:49 +00:00
|
|
|
|
#
|
|
|
|
|
# It utilises this NLP library: https://nlp.stanford.edu/.
|
|
|
|
|
classification {
|
|
|
|
|
# Whether to enable classification globally. Each collective can
|
2021-01-20 20:35:54 +00:00
|
|
|
|
# enable/disable auto-tagging. The classifier is also used for
|
|
|
|
|
# finding correspondents and concerned entities, if enabled
|
|
|
|
|
# here.
|
2020-08-28 20:17:49 +00:00
|
|
|
|
enabled = true
|
|
|
|
|
|
|
|
|
|
# If concerned with memory consumption, this restricts the
|
|
|
|
|
# number of items to consider. More are better for training. A
|
2021-01-20 20:35:54 +00:00
|
|
|
|
# negative value or zero means to train on all items.
|
2021-01-22 21:56:51 +00:00
|
|
|
|
#
|
|
|
|
|
# This limit and `text-analysis.max-length` define how much
|
|
|
|
|
# memory is required. On weaker hardware, it is advised to play
|
|
|
|
|
# with these values.
|
2021-01-21 16:46:39 +00:00
|
|
|
|
item-count = 600
|
2020-08-28 20:17:49 +00:00
|
|
|
|
|
|
|
|
|
# These settings are used to configure the classifier. If
|
|
|
|
|
# multiple are given, they are all tried and the "best" is
|
|
|
|
|
# chosen at the end. See
|
2020-08-31 20:35:27 +00:00
|
|
|
|
# https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/classify/ColumnDataClassifier.html
|
2020-09-01 21:57:27 +00:00
|
|
|
|
# for more info about these settings. The settings here yielded
|
|
|
|
|
# good results with *my* dataset.
|
2020-08-28 20:17:49 +00:00
|
|
|
|
#
|
|
|
|
|
# Enclose regexps in triple quotes.
|
|
|
|
|
classifiers = [
|
|
|
|
|
{ "useSplitWords" = "true"
|
|
|
|
|
"splitWordsTokenizerRegexp" = """[\p{L}][\p{L}0-9]*|(?:\$ ?)?[0-9]+(?:\.[0-9]{2})?%?|\s+|."""
|
|
|
|
|
"splitWordsIgnoreRegexp" = """\s+"""
|
|
|
|
|
"useSplitPrefixSuffixNGrams" = "true"
|
|
|
|
|
"maxNGramLeng" = "4"
|
|
|
|
|
"minNGramLeng" = "1"
|
|
|
|
|
"splitWordShape" = "chris4"
|
2020-09-01 21:57:27 +00:00
|
|
|
|
"intern" = "true" # makes it slower but saves memory
|
2020-08-28 20:17:49 +00:00
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
2020-03-27 21:54:49 +00:00
|
|
|
|
}
|
|
|
|
|
|
2020-02-19 22:27:00 +00:00
|
|
|
|
# Configuration for converting files into PDFs.
|
|
|
|
|
#
|
|
|
|
|
# Most of it is delegated to external tools, which can be configured
|
|
|
|
|
# below. They must be in the PATH environment or specify the full
|
|
|
|
|
# path below via the `program` key.
|
|
|
|
|
convert {
|
2020-02-20 20:10:00 +00:00
|
|
|
|
|
|
|
|
|
# The chunk size used when storing files. This should be the same
|
|
|
|
|
# as used with the rest server.
|
2021-09-21 22:28:47 +00:00
|
|
|
|
chunk-size = ${docspell.joex.files.chunk-size}
|
2020-02-19 22:27:00 +00:00
|
|
|
|
|
2020-11-08 00:23:07 +00:00
|
|
|
|
# A string used to change the filename of the converted pdf file.
|
|
|
|
|
# If empty, the original file name is used for the pdf file ( the
|
|
|
|
|
# extension is always replaced with `pdf`).
|
|
|
|
|
converted-filename-part = "converted"
|
|
|
|
|
|
2020-02-20 20:10:00 +00:00
|
|
|
|
# When reading images, this is the maximum size. Images that are
|
|
|
|
|
# larger are not processed.
|
|
|
|
|
max-image-size = ${docspell.joex.extraction.ocr.max-image-size}
|
2020-02-19 22:27:00 +00:00
|
|
|
|
|
2020-02-20 20:10:00 +00:00
|
|
|
|
# Settings when processing markdown files (and other text files)
|
|
|
|
|
# to HTML.
|
|
|
|
|
#
|
|
|
|
|
# In order to support text formats, text files are first converted
|
|
|
|
|
# to HTML using a markdown processor. The resulting HTML is then
|
|
|
|
|
# converted to a PDF file.
|
2020-02-19 22:27:00 +00:00
|
|
|
|
markdown {
|
2020-02-20 20:10:00 +00:00
|
|
|
|
|
|
|
|
|
# The CSS that is used to style the resulting HTML.
|
2020-02-19 22:27:00 +00:00
|
|
|
|
internal-css = """
|
|
|
|
|
body { padding: 2em 5em; }
|
|
|
|
|
"""
|
2019-07-22 22:53:30 +00:00
|
|
|
|
}
|
|
|
|
|
|
2022-11-04 23:48:32 +00:00
|
|
|
|
# Which HTML->PDF converter command to use. One of: wkhtmlpdf,
|
|
|
|
|
# weasyprint.
|
|
|
|
|
html-converter = "wkhtmlpdf"
|
|
|
|
|
|
2020-02-20 20:10:00 +00:00
|
|
|
|
# To convert HTML files into PDF files, the external tool
|
|
|
|
|
# wkhtmltopdf is used.
|
2020-02-19 22:27:00 +00:00
|
|
|
|
wkhtmlpdf {
|
2020-02-22 11:40:56 +00:00
|
|
|
|
command = {
|
2020-02-19 22:27:00 +00:00
|
|
|
|
program = "wkhtmltopdf"
|
|
|
|
|
args = [
|
|
|
|
|
"-s",
|
|
|
|
|
"A4",
|
|
|
|
|
"--encoding",
|
2020-03-23 21:43:15 +00:00
|
|
|
|
"{{encoding}}",
|
|
|
|
|
"--load-error-handling", "ignore",
|
|
|
|
|
"--load-media-error-handling", "ignore",
|
2020-02-19 22:27:00 +00:00
|
|
|
|
"-",
|
|
|
|
|
"{{outfile}}"
|
|
|
|
|
]
|
2020-02-20 20:10:00 +00:00
|
|
|
|
timeout = "2 minutes"
|
2019-07-22 22:53:30 +00:00
|
|
|
|
}
|
2022-11-04 23:48:32 +00:00
|
|
|
|
working-dir = ${java.io.tmpdir}"/docspell-wkhtmltopdf"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# An alternative to wkhtmltopdf is weasyprint.
|
|
|
|
|
weasyprint {
|
|
|
|
|
command = {
|
|
|
|
|
program = "weasyprint"
|
|
|
|
|
args = [
|
|
|
|
|
"--optimize-size", "all",
|
|
|
|
|
"--encoding", "{{encoding}}",
|
|
|
|
|
"-",
|
|
|
|
|
"{{outfile}}"
|
|
|
|
|
]
|
|
|
|
|
timeout = "2 minutes"
|
|
|
|
|
}
|
|
|
|
|
working-dir = ${java.io.tmpdir}"/docspell-weasyprint"
|
2019-07-22 22:53:30 +00:00
|
|
|
|
}
|
|
|
|
|
|
2020-02-20 20:10:00 +00:00
|
|
|
|
# To convert image files to PDF files, tesseract is used. This
|
|
|
|
|
# also extracts the text in one go.
|
2020-02-19 22:27:00 +00:00
|
|
|
|
tesseract = {
|
2020-02-22 11:40:56 +00:00
|
|
|
|
command = {
|
2019-07-22 22:53:30 +00:00
|
|
|
|
program = "tesseract"
|
2020-02-19 22:27:00 +00:00
|
|
|
|
args = [
|
|
|
|
|
"{{infile}}",
|
|
|
|
|
"out",
|
|
|
|
|
"-l",
|
|
|
|
|
"{{lang}}",
|
|
|
|
|
"pdf",
|
|
|
|
|
"txt"
|
|
|
|
|
]
|
2020-02-20 20:10:00 +00:00
|
|
|
|
timeout = "5 minutes"
|
2020-02-19 22:27:00 +00:00
|
|
|
|
}
|
|
|
|
|
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-20 20:10:00 +00:00
|
|
|
|
# To convert "office" files to PDF files, the external tool
|
|
|
|
|
# unoconv is used. Unoconv uses libreoffice/openoffice for
|
|
|
|
|
# converting. So it supports all formats that are possible to read
|
|
|
|
|
# with libreoffice/openoffic.
|
|
|
|
|
#
|
|
|
|
|
# Note: to greatly improve performance, it is recommended to start
|
|
|
|
|
# a libreoffice listener by running `unoconv -l` in a separate
|
|
|
|
|
# process.
|
2020-02-19 22:27:00 +00:00
|
|
|
|
unoconv = {
|
2020-02-22 11:40:56 +00:00
|
|
|
|
command = {
|
2020-02-19 22:27:00 +00:00
|
|
|
|
program = "unoconv"
|
|
|
|
|
args = [
|
|
|
|
|
"-f",
|
|
|
|
|
"pdf",
|
|
|
|
|
"-o",
|
|
|
|
|
"{{outfile}}",
|
|
|
|
|
"{{infile}}"
|
|
|
|
|
]
|
2020-02-20 20:10:00 +00:00
|
|
|
|
timeout = "2 minutes"
|
2019-07-22 22:53:30 +00:00
|
|
|
|
}
|
2020-02-19 22:27:00 +00:00
|
|
|
|
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
2019-07-22 22:53:30 +00:00
|
|
|
|
}
|
2020-07-18 10:48:41 +00:00
|
|
|
|
|
|
|
|
|
# The tool ocrmypdf can be used to convert pdf files to pdf files
|
|
|
|
|
# in order to add extracted text as a separate layer. This makes
|
|
|
|
|
# image-only pdfs searchable and you can select and copy/paste the
|
|
|
|
|
# text. It also converts pdfs into pdf/a type pdfs, which are best
|
|
|
|
|
# suited for archiving. So it makes sense to use this even for
|
|
|
|
|
# text-only pdfs.
|
|
|
|
|
#
|
|
|
|
|
# It is recommended to install ocrympdf, but it also is optional.
|
|
|
|
|
# If it is enabled but fails, the error is not fatal and the
|
|
|
|
|
# processing will continue using the original pdf for extracting
|
|
|
|
|
# text. You can also disable it to remove the errors from the
|
|
|
|
|
# processing logs.
|
|
|
|
|
#
|
|
|
|
|
# The `--skip-text` option is necessary to not fail on "text" pdfs
|
|
|
|
|
# (where ocr is not necessary). In this case, the pdf will be
|
|
|
|
|
# converted to PDF/A.
|
|
|
|
|
ocrmypdf = {
|
|
|
|
|
enabled = true
|
|
|
|
|
command = {
|
|
|
|
|
program = "ocrmypdf"
|
|
|
|
|
args = [
|
|
|
|
|
"-l", "{{lang}}",
|
|
|
|
|
"--skip-text",
|
|
|
|
|
"--deskew",
|
|
|
|
|
"-j", "1",
|
|
|
|
|
"{{infile}}",
|
|
|
|
|
"{{outfile}}"
|
|
|
|
|
]
|
|
|
|
|
timeout = "5 minutes"
|
|
|
|
|
}
|
|
|
|
|
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
|
|
|
|
}
|
2021-09-29 21:04:26 +00:00
|
|
|
|
|
|
|
|
|
# Allows to try to decrypt a PDF with encryption or protection. If
|
|
|
|
|
# enabled, a PDFs encryption or protection will be removed during
|
|
|
|
|
# conversion.
|
|
|
|
|
#
|
|
|
|
|
# For encrypted PDFs, this is necessary to be processed, because
|
|
|
|
|
# docspell needs to read it. It also requires to specify a
|
|
|
|
|
# password here. All passwords are tried when reading a PDF.
|
|
|
|
|
#
|
2021-09-30 09:11:08 +00:00
|
|
|
|
# This is enabled by default with an empty password list. This
|
2021-09-29 21:04:26 +00:00
|
|
|
|
# removes protection from PDFs, which is better for processing.
|
2021-09-30 09:11:08 +00:00
|
|
|
|
#
|
|
|
|
|
# Passwords can be given here and each collective can maintain
|
|
|
|
|
# their passwords as well. But if the `enabled` setting below is
|
|
|
|
|
# `false`, then no attempt at decrypting is done.
|
2021-09-29 21:04:26 +00:00
|
|
|
|
decrypt-pdf = {
|
|
|
|
|
enabled = true
|
|
|
|
|
passwords = []
|
|
|
|
|
}
|
2019-07-22 22:53:30 +00:00
|
|
|
|
}
|
2020-05-18 13:58:31 +00:00
|
|
|
|
|
|
|
|
|
# The same section is also present in the rest-server config. It is
|
|
|
|
|
# used when submitting files into the job queue for processing.
|
|
|
|
|
#
|
|
|
|
|
# Currently, these settings may affect memory usage of all nodes, so
|
|
|
|
|
# it should be the same on all nodes.
|
|
|
|
|
files {
|
|
|
|
|
# Defines the chunk size (in bytes) used to store the files.
|
|
|
|
|
# This will affect the memory footprint when uploading and
|
|
|
|
|
# downloading files. At most this amount is loaded into RAM for
|
|
|
|
|
# down- and uploading.
|
|
|
|
|
#
|
|
|
|
|
# It also defines the chunk size used for the blobs inside the
|
|
|
|
|
# database.
|
|
|
|
|
chunk-size = 524288
|
|
|
|
|
|
|
|
|
|
# The file content types that are considered valid. Docspell
|
|
|
|
|
# will only pass these files to processing. The processing code
|
|
|
|
|
# itself has also checks for which files are supported and which
|
|
|
|
|
# not. This affects the uploading part and can be used to
|
|
|
|
|
# restrict file types that should be handed over to processing.
|
|
|
|
|
# By default all files are allowed.
|
|
|
|
|
valid-mime-types = [ ]
|
2022-03-07 16:21:38 +00:00
|
|
|
|
|
|
|
|
|
# The id of an enabled store from the `stores` array that should
|
|
|
|
|
# be used.
|
|
|
|
|
#
|
|
|
|
|
# IMPORTANT NOTE: All nodes must have the exact same file store
|
|
|
|
|
# configuration!
|
|
|
|
|
default-store = "database"
|
|
|
|
|
|
|
|
|
|
# A list of possible file stores. Each entry must have a unique
|
|
|
|
|
# id. The `type` is one of: default-database, filesystem, s3.
|
|
|
|
|
#
|
|
|
|
|
# The enabled property serves currently to define target stores
|
|
|
|
|
# for te "copy files" task. All stores with enabled=false are
|
|
|
|
|
# removed from the list. The `default-store` must be enabled.
|
|
|
|
|
stores = {
|
|
|
|
|
database =
|
|
|
|
|
{ enabled = true
|
|
|
|
|
type = "default-database"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
filesystem =
|
|
|
|
|
{ enabled = false
|
|
|
|
|
type = "file-system"
|
|
|
|
|
directory = "/some/directory"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
minio =
|
|
|
|
|
{ enabled = false
|
|
|
|
|
type = "s3"
|
|
|
|
|
endpoint = "http://localhost:9000"
|
|
|
|
|
access-key = "username"
|
|
|
|
|
secret-key = "password"
|
|
|
|
|
bucket = "docspell"
|
2023-11-17 19:53:27 +00:00
|
|
|
|
region = ""
|
2022-03-07 16:21:38 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2020-05-18 13:58:31 +00:00
|
|
|
|
}
|
2020-06-16 22:24:23 +00:00
|
|
|
|
|
2022-04-24 16:34:22 +00:00
|
|
|
|
# Configuration of the full-text search engine. (the same must be used for restserver)
|
2020-06-16 22:24:23 +00:00
|
|
|
|
full-text-search {
|
2020-06-23 23:16:13 +00:00
|
|
|
|
# The full-text search feature can be disabled. It requires an
|
2020-06-24 19:25:46 +00:00
|
|
|
|
# additional index server which needs additional memory and disk
|
|
|
|
|
# space. It can be enabled later any time.
|
2020-06-23 23:16:13 +00:00
|
|
|
|
#
|
2022-04-24 16:34:22 +00:00
|
|
|
|
# Currently the SOLR search platform and PostgreSQL is supported.
|
2020-06-22 20:54:39 +00:00
|
|
|
|
enabled = false
|
2020-06-23 23:16:13 +00:00
|
|
|
|
|
2022-03-21 10:04:58 +00:00
|
|
|
|
# Which backend to use, either solr or postgresql
|
|
|
|
|
backend = "solr"
|
|
|
|
|
|
2020-06-23 23:16:13 +00:00
|
|
|
|
# Configuration for the SOLR backend.
|
2020-06-16 22:24:23 +00:00
|
|
|
|
solr = {
|
2020-06-24 19:25:46 +00:00
|
|
|
|
# The URL to solr
|
2020-06-22 20:54:39 +00:00
|
|
|
|
url = "http://localhost:8983/solr/docspell"
|
2020-06-24 19:25:46 +00:00
|
|
|
|
# Used to tell solr when to commit the data
|
2020-06-19 18:49:59 +00:00
|
|
|
|
commit-within = 1000
|
2020-06-24 19:25:46 +00:00
|
|
|
|
# If true, logs request and response bodies
|
|
|
|
|
log-verbose = false
|
2020-06-24 20:55:11 +00:00
|
|
|
|
# The defType parameter to lucene that defines the parser to
|
|
|
|
|
# use. You might want to try "edismax" or look here:
|
2021-05-31 11:25:57 +00:00
|
|
|
|
# https://solr.apache.org/guide/8_4/query-syntax-and-parsing.html#query-syntax-and-parsing
|
2020-06-24 20:55:11 +00:00
|
|
|
|
def-type = "lucene"
|
|
|
|
|
# The default combiner for tokens. One of {AND, OR}.
|
|
|
|
|
q-op = "OR"
|
2020-06-16 22:24:23 +00:00
|
|
|
|
}
|
2020-06-23 23:16:13 +00:00
|
|
|
|
|
2022-03-21 10:04:58 +00:00
|
|
|
|
# Configuration for PostgreSQL backend
|
|
|
|
|
postgresql = {
|
|
|
|
|
# Whether to use the default database, only works if it is
|
|
|
|
|
# postgresql
|
|
|
|
|
use-default-connection = false
|
|
|
|
|
|
|
|
|
|
# The database connection.
|
|
|
|
|
jdbc {
|
|
|
|
|
url = "jdbc:postgresql://server:5432/db"
|
|
|
|
|
user = "pguser"
|
|
|
|
|
password = ""
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# A mapping from a language to a postgres text search config. By
|
|
|
|
|
# default a language is mapped to a predefined config.
|
|
|
|
|
# PostgreSQL has predefined configs for some languages. This
|
|
|
|
|
# setting allows to create a custom text search config and
|
|
|
|
|
# define it here for some or all languages.
|
|
|
|
|
#
|
|
|
|
|
# Example:
|
|
|
|
|
# { german = "my-german" }
|
|
|
|
|
#
|
|
|
|
|
# See https://www.postgresql.org/docs/14/textsearch-tables.html ff.
|
|
|
|
|
pg-config = {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Define which query parser to use.
|
|
|
|
|
#
|
|
|
|
|
# https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
|
|
|
|
|
pg-query-parser = "websearch_to_tsquery"
|
|
|
|
|
|
|
|
|
|
# Allows to define a normalization for the ranking.
|
|
|
|
|
#
|
|
|
|
|
# https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
|
|
|
|
|
pg-rank-normalization = [ 4 ]
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-23 23:16:13 +00:00
|
|
|
|
# Settings for running the index migration tasks
|
|
|
|
|
migration = {
|
|
|
|
|
# Chunk size to use when indexing data from the database. This
|
|
|
|
|
# many attachments are loaded into memory and pushed to the
|
|
|
|
|
# full-text index.
|
|
|
|
|
index-all-chunk = 10
|
|
|
|
|
}
|
2020-06-16 22:24:23 +00:00
|
|
|
|
}
|
2022-04-22 12:07:28 +00:00
|
|
|
|
|
|
|
|
|
addons {
|
|
|
|
|
# A directory to extract addons when running them. Everything in
|
|
|
|
|
# here will be cleared after each run.
|
|
|
|
|
working-dir = ${java.io.tmpdir}"/docspell-addons"
|
|
|
|
|
|
|
|
|
|
# A directory for addons to store data between runs. This is not
|
|
|
|
|
# cleared by Docspell and can get large depending on the addons
|
|
|
|
|
# executed.
|
|
|
|
|
#
|
|
|
|
|
# This directory is used as base. In it subdirectories are created
|
|
|
|
|
# per run configuration id.
|
|
|
|
|
cache-dir = ${java.io.tmpdir}"/docspell-addon-cache"
|
|
|
|
|
|
|
|
|
|
executor-config {
|
|
|
|
|
# Define a (comma or whitespace separated) list of runners that
|
|
|
|
|
# are responsible for executing an addon. This setting is
|
|
|
|
|
# compared to what is supported by addons. Possible values are:
|
|
|
|
|
#
|
|
|
|
|
# - nix-flake: use nix-flake runner if the addon supports it
|
|
|
|
|
# (this requires the nix package manager on the joex machine)
|
|
|
|
|
# - docker: use docker
|
|
|
|
|
# - trivial: use the trivial runner
|
|
|
|
|
#
|
|
|
|
|
# The first successful execution is used. This should list all
|
|
|
|
|
# runners the computer supports.
|
|
|
|
|
runner = "nix-flake, docker, trivial"
|
|
|
|
|
|
|
|
|
|
# systemd-nspawn can be used to run the program in a container.
|
|
|
|
|
# This is used by runners nix-flake and trivial.
|
|
|
|
|
nspawn = {
|
|
|
|
|
# If this is false, systemd-nspawn is not tried. When true, the
|
|
|
|
|
# addon is executed inside a lightweight container via
|
|
|
|
|
# systemd-nspawn.
|
|
|
|
|
enabled = false
|
|
|
|
|
|
|
|
|
|
# Path to sudo command. By default systemd-nspawn is executed
|
|
|
|
|
# via sudo - the user running joex must be allowed to do so NON
|
|
|
|
|
# INTERACTIVELY. If this is empty, then nspawn is tried to
|
|
|
|
|
# execute without sudo.
|
|
|
|
|
sudo-binary = "sudo"
|
|
|
|
|
|
|
|
|
|
# Path to the systemd-nspawn command.
|
|
|
|
|
nspawn-binary = "systemd-nspawn"
|
|
|
|
|
|
|
|
|
|
# Workaround, if multiple same named containers are run too fast
|
|
|
|
|
container-wait = "100 millis"
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-19 21:58:53 +00:00
|
|
|
|
# When multiple addons are executed sequentially, stop after the
|
|
|
|
|
# first failing result. If this is false, then subsequent addons
|
|
|
|
|
# will be run for their side effects only.
|
|
|
|
|
fail-fast = true
|
|
|
|
|
|
2022-04-22 12:07:28 +00:00
|
|
|
|
# The timeout for running an addon.
|
|
|
|
|
run-timeout = "15 minutes"
|
|
|
|
|
|
|
|
|
|
# Configure the nix flake runner.
|
|
|
|
|
nix-runner {
|
|
|
|
|
# Path to the nix command.
|
|
|
|
|
nix-binary = "nix"
|
|
|
|
|
|
|
|
|
|
# The timeout for building the package (running nix build).
|
|
|
|
|
build-timeout = "15 minutes"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Configure the docker runner
|
|
|
|
|
docker-runner {
|
|
|
|
|
# Path to the docker command.
|
|
|
|
|
docker-binary = "docker"
|
|
|
|
|
|
|
|
|
|
# The timeout for building the package (running docker build).
|
|
|
|
|
build-timeout = "15 minutes"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-07-17 20:03:10 +00:00
|
|
|
|
}
|