mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-03-28 17:55:06 +00:00
Update config file doc
This commit is contained in:
parent
fbe0c1aec5
commit
3f316ab4d0
@ -147,16 +147,31 @@ docspell.joex {
|
||||
# below. They must be in the PATH environment or specify the full
|
||||
# path below via the `program` key.
|
||||
convert {
|
||||
|
||||
# The chunk size used when storing files. This should be the same
|
||||
# as used with the rest server.
|
||||
chunk-size = 524288
|
||||
|
||||
max-image-size = 12000000
|
||||
# When reading images, this is the maximum size. Images that are
|
||||
# larger are not processed.
|
||||
max-image-size = ${docspell.joex.extraction.ocr.max-image-size}
|
||||
|
||||
# Settings when processing markdown files (and other text files)
|
||||
# to HTML.
|
||||
#
|
||||
# In order to support text formats, text files are first converted
|
||||
# to HTML using a markdown processor. The resulting HTML is then
|
||||
# converted to a PDF file.
|
||||
markdown {
|
||||
|
||||
# The CSS that is used to style the resulting HTML.
|
||||
internal-css = """
|
||||
body { padding: 2em 5em; }
|
||||
"""
|
||||
}
|
||||
|
||||
# To convert HTML files into PDF files, the external tool
|
||||
# wkhtmltopdf is used.
|
||||
wkhtmlpdf {
|
||||
cmd = {
|
||||
program = "wkhtmltopdf"
|
||||
@ -168,11 +183,13 @@ docspell.joex {
|
||||
"-",
|
||||
"{{outfile}}"
|
||||
]
|
||||
timeout = "20 seconds"
|
||||
timeout = "2 minutes"
|
||||
}
|
||||
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
||||
}
|
||||
|
||||
# To convert image files to PDF files, tesseract is used. This
|
||||
# also extracts the text in one go.
|
||||
tesseract = {
|
||||
cmd = {
|
||||
program = "tesseract"
|
||||
@ -184,11 +201,19 @@ docspell.joex {
|
||||
"pdf",
|
||||
"txt"
|
||||
]
|
||||
timeout = "120 seconds"
|
||||
timeout = "5 minutes"
|
||||
}
|
||||
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
||||
}
|
||||
|
||||
# To convert "office" files to PDF files, the external tool
|
||||
# unoconv is used. Unoconv uses libreoffice/openoffice for
|
||||
# converting. So it supports all formats that are possible to read
|
||||
# with libreoffice/openoffic.
|
||||
#
|
||||
# Note: to greatly improve performance, it is recommended to start
|
||||
# a libreoffice listener by running `unoconv -l` in a separate
|
||||
# process.
|
||||
unoconv = {
|
||||
cmd = {
|
||||
program = "unoconv"
|
||||
@ -199,7 +224,7 @@ docspell.joex {
|
||||
"{{outfile}}",
|
||||
"{{infile}}"
|
||||
]
|
||||
timeout = "20 seconds"
|
||||
timeout = "2 minutes"
|
||||
}
|
||||
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user