Update config file doc

2025-07-04 16:48:26 +00:00 · 2020-02-20 21:10:00 +01:00
parent fbe0c1aec5
commit 3f316ab4d0
1 changed files with 29 additions and 4 deletions
--- a/modules/joex/src/main/resources/reference.conf
+++ b/modules/joex/src/main/resources/reference.conf
@ -147,16 +147,31 @@ docspell.joex {
  # below. They must be in the PATH environment or specify the full
  # path below via the `program` key.
  convert {
    # The chunk size used when storing files. This should be the same
    # as used with the rest server.
    chunk-size = 524288
-    max-image-size = 12000000
+    # When reading images, this is the maximum size. Images that are
    # larger are not processed.
    max-image-size = ${docspell.joex.extraction.ocr.max-image-size}
    # Settings when processing markdown files (and other text files)
    # to HTML.
    #
    # In order to support text formats, text files are first converted
    # to HTML using a markdown processor. The resulting HTML is then
    # converted to a PDF file.
    markdown {
      # The CSS that is used to style the resulting HTML.
      internal-css = """
        body { padding: 2em 5em; }
      """
    }
    # To convert HTML files into PDF files, the external tool
    # wkhtmltopdf is used.
    wkhtmlpdf {
      cmd = {
        program = "wkhtmltopdf"
@ -168,11 +183,13 @@ docspell.joex {
          "-",
          "{{outfile}}"
        ]
-        timeout = "20 seconds"
+        timeout = "2 minutes"
      }
      working-dir = ${java.io.tmpdir}"/docspell-convert"
    }
    # To convert image files to PDF files, tesseract is used. This
    # also extracts the text in one go.
    tesseract = {
      cmd = {
        program = "tesseract"
@ -184,11 +201,19 @@ docspell.joex {
          "pdf",
          "txt"
        ]
-        timeout = "120 seconds"
+        timeout = "5 minutes"
      }
      working-dir = ${java.io.tmpdir}"/docspell-convert"
    }
    # To convert "office" files to PDF files, the external tool
    # unoconv is used. Unoconv uses libreoffice/openoffice for
    # converting. So it supports all formats that are possible to read
    # with libreoffice/openoffic.
    #
    # Note: to greatly improve performance, it is recommended to start
    # a libreoffice listener by running `unoconv -l` in a separate
    # process.
    unoconv = {
      cmd = {
        program = "unoconv"
@ -199,7 +224,7 @@ docspell.joex {
          "{{outfile}}",
          "{{infile}}"
        ]
-        timeout = "20 seconds"
+        timeout = "2 minutes"
      }
      working-dir = ${java.io.tmpdir}"/docspell-convert"
    }