mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-04 06:05:59 +00:00
Update config file doc
This commit is contained in:
parent
fbe0c1aec5
commit
3f316ab4d0
@ -147,16 +147,31 @@ docspell.joex {
|
|||||||
# below. They must be in the PATH environment or specify the full
|
# below. They must be in the PATH environment or specify the full
|
||||||
# path below via the `program` key.
|
# path below via the `program` key.
|
||||||
convert {
|
convert {
|
||||||
|
|
||||||
|
# The chunk size used when storing files. This should be the same
|
||||||
|
# as used with the rest server.
|
||||||
chunk-size = 524288
|
chunk-size = 524288
|
||||||
|
|
||||||
max-image-size = 12000000
|
# When reading images, this is the maximum size. Images that are
|
||||||
|
# larger are not processed.
|
||||||
|
max-image-size = ${docspell.joex.extraction.ocr.max-image-size}
|
||||||
|
|
||||||
|
# Settings when processing markdown files (and other text files)
|
||||||
|
# to HTML.
|
||||||
|
#
|
||||||
|
# In order to support text formats, text files are first converted
|
||||||
|
# to HTML using a markdown processor. The resulting HTML is then
|
||||||
|
# converted to a PDF file.
|
||||||
markdown {
|
markdown {
|
||||||
|
|
||||||
|
# The CSS that is used to style the resulting HTML.
|
||||||
internal-css = """
|
internal-css = """
|
||||||
body { padding: 2em 5em; }
|
body { padding: 2em 5em; }
|
||||||
"""
|
"""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# To convert HTML files into PDF files, the external tool
|
||||||
|
# wkhtmltopdf is used.
|
||||||
wkhtmlpdf {
|
wkhtmlpdf {
|
||||||
cmd = {
|
cmd = {
|
||||||
program = "wkhtmltopdf"
|
program = "wkhtmltopdf"
|
||||||
@ -168,11 +183,13 @@ docspell.joex {
|
|||||||
"-",
|
"-",
|
||||||
"{{outfile}}"
|
"{{outfile}}"
|
||||||
]
|
]
|
||||||
timeout = "20 seconds"
|
timeout = "2 minutes"
|
||||||
}
|
}
|
||||||
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# To convert image files to PDF files, tesseract is used. This
|
||||||
|
# also extracts the text in one go.
|
||||||
tesseract = {
|
tesseract = {
|
||||||
cmd = {
|
cmd = {
|
||||||
program = "tesseract"
|
program = "tesseract"
|
||||||
@ -184,11 +201,19 @@ docspell.joex {
|
|||||||
"pdf",
|
"pdf",
|
||||||
"txt"
|
"txt"
|
||||||
]
|
]
|
||||||
timeout = "120 seconds"
|
timeout = "5 minutes"
|
||||||
}
|
}
|
||||||
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# To convert "office" files to PDF files, the external tool
|
||||||
|
# unoconv is used. Unoconv uses libreoffice/openoffice for
|
||||||
|
# converting. So it supports all formats that are possible to read
|
||||||
|
# with libreoffice/openoffic.
|
||||||
|
#
|
||||||
|
# Note: to greatly improve performance, it is recommended to start
|
||||||
|
# a libreoffice listener by running `unoconv -l` in a separate
|
||||||
|
# process.
|
||||||
unoconv = {
|
unoconv = {
|
||||||
cmd = {
|
cmd = {
|
||||||
program = "unoconv"
|
program = "unoconv"
|
||||||
@ -199,7 +224,7 @@ docspell.joex {
|
|||||||
"{{outfile}}",
|
"{{outfile}}",
|
||||||
"{{infile}}"
|
"{{infile}}"
|
||||||
]
|
]
|
||||||
timeout = "20 seconds"
|
timeout = "2 minutes"
|
||||||
}
|
}
|
||||||
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user