From ec419c7bfdcb9551701d32d0c21face652665bbf Mon Sep 17 00:00:00 2001
From: Eike Kettner <eike.kettner@posteo.de>
Date: Sat, 22 Feb 2020 12:40:56 +0100
Subject: [PATCH] Adopt nix modules to new config

---
 .../docspell/convert/extern/Tesseract.scala   |   4 +-
 .../convert/extern/TesseractConfig.scala      |   2 +-
 .../docspell/convert/extern/Unoconv.scala     |   2 +-
 .../convert/extern/UnoconvConfig.scala        |   2 +-
 .../docspell/convert/extern/WkHtmlPdf.scala   |   2 +-
 .../convert/extern/WkHtmlPdfConfig.scala      |   2 +-
 .../docspell/convert/ConversionTest.scala     |   6 +-
 .../joex/src/main/resources/reference.conf    |   6 +-
 modules/microsite/docs/doc.md                 |   2 +-
 nix/module-joex.nix                           | 470 ++++++++++++++----
 nix/module-server.nix                         |   2 +-
 11 files changed, 378 insertions(+), 122 deletions(-)

diff --git a/modules/convert/src/main/scala/docspell/convert/extern/Tesseract.scala b/modules/convert/src/main/scala/docspell/convert/extern/Tesseract.scala
index 233cfa96..1a6b966d 100644
--- a/modules/convert/src/main/scala/docspell/convert/extern/Tesseract.scala
+++ b/modules/convert/src/main/scala/docspell/convert/extern/Tesseract.scala
@@ -17,11 +17,11 @@ object Tesseract {
       blocker: Blocker,
       logger: Logger[F]
   )(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
-    val outBase = cfg.cmd.args.tail.headOption.getOrElse("out")
+    val outBase = cfg.command.args.tail.headOption.getOrElse("out")
     val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
       ExternConv.readResultTesseract[F](outBase, blocker, chunkSize, logger)
 
-    ExternConv.toPDF[F, A]("tesseract", cfg.cmd.replace(Map("{{lang}}" -> lang.iso3)), cfg.workingDir, false, blocker, logger, reader)(in, handler)
+    ExternConv.toPDF[F, A]("tesseract", cfg.command.replace(Map("{{lang}}" -> lang.iso3)), cfg.workingDir, false, blocker, logger, reader)(in, handler)
   }
 
 }
diff --git a/modules/convert/src/main/scala/docspell/convert/extern/TesseractConfig.scala b/modules/convert/src/main/scala/docspell/convert/extern/TesseractConfig.scala
index f5bef831..51f25c23 100644
--- a/modules/convert/src/main/scala/docspell/convert/extern/TesseractConfig.scala
+++ b/modules/convert/src/main/scala/docspell/convert/extern/TesseractConfig.scala
@@ -4,4 +4,4 @@ import java.nio.file.Path
 
 import docspell.common.SystemCommand
 
-case class TesseractConfig (cmd: SystemCommand.Config, workingDir: Path)
+case class TesseractConfig (command: SystemCommand.Config, workingDir: Path)
diff --git a/modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala b/modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala
index a6bb5b04..0f362428 100644
--- a/modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala
+++ b/modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala
@@ -19,7 +19,7 @@ object Unoconv {
     val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
       ExternConv.readResult[F](blocker, chunkSize, logger)
 
-    ExternConv.toPDF[F, A]("unoconv", cfg.cmd, cfg.workingDir, false, blocker, logger, reader)(in, handler)
+    ExternConv.toPDF[F, A]("unoconv", cfg.command, cfg.workingDir, false, blocker, logger, reader)(in, handler)
   }
 
 }
diff --git a/modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala b/modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala
index da4af43c..70fd7975 100644
--- a/modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala
+++ b/modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala
@@ -4,4 +4,4 @@ import java.nio.file.Path
 
 import docspell.common.SystemCommand
 
-case class UnoconvConfig (cmd: SystemCommand.Config, workingDir: Path)
+case class UnoconvConfig (command: SystemCommand.Config, workingDir: Path)
diff --git a/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdf.scala b/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdf.scala
index 0c5657c1..7b70a78f 100644
--- a/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdf.scala
+++ b/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdf.scala
@@ -19,7 +19,7 @@ object WkHtmlPdf {
     val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
       ExternConv.readResult[F](blocker, chunkSize, logger)
 
-    ExternConv.toPDF[F, A]("wkhtmltopdf", cfg.cmd, cfg.workingDir, true, blocker, logger, reader)(in, handler)
+    ExternConv.toPDF[F, A]("wkhtmltopdf", cfg.command, cfg.workingDir, true, blocker, logger, reader)(in, handler)
   }
 
 }
diff --git a/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdfConfig.scala b/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdfConfig.scala
index 11d8aa1c..3be06951 100644
--- a/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdfConfig.scala
+++ b/modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdfConfig.scala
@@ -4,4 +4,4 @@ import java.nio.file.Path
 
 import docspell.common.SystemCommand
 
-case class WkHtmlPdfConfig (cmd: SystemCommand.Config, workingDir: Path)
+case class WkHtmlPdfConfig (command: SystemCommand.Config, workingDir: Path)
diff --git a/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala b/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala
index 294ce4db..3c6eebc5 100644
--- a/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala
+++ b/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala
@@ -154,7 +154,7 @@ object ConversionTest extends SimpleTestSuite with FileChecks {
       })
 
   def commandsExist: Boolean =
-    commandExists(convertConfig.unoconv.cmd.program) &&
-      commandExists(convertConfig.wkhtmlpdf.cmd.program) &&
-      commandExists(convertConfig.tesseract.cmd.program)
+    commandExists(convertConfig.unoconv.command.program) &&
+      commandExists(convertConfig.wkhtmlpdf.command.program) &&
+      commandExists(convertConfig.tesseract.command.program)
 }
diff --git a/modules/joex/src/main/resources/reference.conf b/modules/joex/src/main/resources/reference.conf
index 12f55c9f..9712f54d 100644
--- a/modules/joex/src/main/resources/reference.conf
+++ b/modules/joex/src/main/resources/reference.conf
@@ -173,7 +173,7 @@ docspell.joex {
     # To convert HTML files into PDF files, the external tool
     # wkhtmltopdf is used.
     wkhtmlpdf {
-      cmd = {
+      command = {
         program = "wkhtmltopdf"
         args = [
           "-s",
@@ -191,7 +191,7 @@ docspell.joex {
     # To convert image files to PDF files, tesseract is used. This
     # also extracts the text in one go.
     tesseract = {
-      cmd = {
+      command = {
         program = "tesseract"
         args = [
           "{{infile}}",
@@ -215,7 +215,7 @@ docspell.joex {
     # a libreoffice listener by running `unoconv -l` in a separate
     # process.
     unoconv = {
-      cmd = {
+      command = {
         program = "unoconv"
         args = [
           "-f",
diff --git a/modules/microsite/docs/doc.md b/modules/microsite/docs/doc.md
index 34f59942..b7bfd088 100644
--- a/modules/microsite/docs/doc.md
+++ b/modules/microsite/docs/doc.md
@@ -72,7 +72,7 @@ multiple files, which are called **attachments**. And an item has
 - a **direction**: one of "incoming" or "outgoing"
 - a **name**: some item name, defaults to the file name of the
   attachments
-- some **notes**: arbitraty descriptive text. You can use markdown
+- some **notes**: arbitrary descriptive text. You can use markdown
   here, which is appropriately formatted in the web application.
 
 ### Collective
diff --git a/nix/module-joex.nix b/nix/module-joex.nix
index 516947fa..9c39651f 100644
--- a/nix/module-joex.nix
+++ b/nix/module-joex.nix
@@ -30,30 +30,74 @@ let
       wakeup-period = "30 minutes";
     };
     extraction = {
-      page-range = {
-        begin = 10;
+      pdf = {
+        min-text-len = 10;
       };
-      ghostscript =  {
-        working-dir = "/tmp/docspell-extraction";
-        command = {
-          program = "${pkgs.ghostscript}/bin/gs";
-          args = [ "-dNOPAUSE" "-dBATCH" "-dSAFER" "-sDEVICE=tiffscaled8" "-sOutputFile={{outfile}}" "{{infile}}" ];
-          timeout = "5 minutes";
+
+      ocr = {
+        max-image-size = 14000000;
+        page-range = {
+          begin = 10;
+        };
+        ghostscript =  {
+          working-dir = "/tmp/docspell-extraction";
+          command = {
+            program = "${pkgs.ghostscript}/bin/gs";
+            args = [ "-dNOPAUSE" "-dBATCH" "-dSAFER" "-sDEVICE=tiffscaled8" "-sOutputFile={{outfile}}" "{{infile}}" ];
+            timeout = "5 minutes";
+          };
+        };
+        unpaper = {
+          command = {
+            program = "${pkgs.unpaper}/bin/unpaper";
+            args = [ "{{infile}}" "{{outfile}}" ];
+            timeout = "5 minutes";
+          };
+        };
+        tesseract = {
+          command= {
+            program = "${pkgs.tesseract4}/bin/tesseract";
+            args = ["{{file}}" "stdout" "-l" "{{lang}}" ];
+            timeout = "5 minutes";
+          };
         };
       };
-      unpaper = {
-        command = {
-          program = "${pkgs.unpaper}/bin/unpaper";
-          args = [ "{{infile}}" "{{outfile}}" ];
-          timeout = "5 minutes";
-        };
+    };
+    convert = {
+      chunk-size = 524288;
+      max-image-size = 14000000;
+
+      markdown = {
+        internal-css = ''
+            body { padding: 2em 5em; }
+          '';
       };
+
+      wkhtmlpdf = {
+        command = {
+          program = "${pkgs.wkhtmltopdf}/bin/wkhtmltopdf";
+          args = ["-s" "A4" "--encoding" "UTF-8" "-" "{{outfile}}"];
+          timeout = "2 minutes";
+        };
+        working-dir = "/tmp/docspell-convert";
+      };
+
       tesseract = {
-        command= {
+        command = {
           program = "${pkgs.tesseract4}/bin/tesseract";
-          args = ["{{file}}" "stdout" "-l" "{{lang}}" ];
+          args = ["{{infile}}" "out" "-l" "{{lang}}" "pdf" "txt"];
           timeout = "5 minutes";
         };
+        working-dir = "/tmp/docspell-convert";
+      };
+
+      unoconv = {
+        command = {
+          program = "${pkgs.unoconv}/bin/unoconv";
+          args = ["-f" "pdf" "-o" "{{outfile}}" "{{infile}}"];
+          timeout = "2 minutes";
+        };
+        working-dir = "/tmp/docspell-convert";
       };
     };
   };
@@ -199,128 +243,164 @@ in {
       extraction = mkOption {
         type = types.submodule({
           options = {
-            page-range = mkOption {
+            pdf = mkOption {
               type = types.submodule({
                 options = {
-                  begin = mkOption {
+                  min-text-len = mkOption {
                     type = types.int;
-                    default = defaults.extraction.page-range.begin;
-                    description = "Specifies the first N pages of a file to process.";
+                    default = defaults.extraction.pdf.min-text-len;
+                    description = ''
+                      For PDF files it is first tried to read the text parts of the
+                      PDF. But PDFs can be complex documents and they may contain text
+                      and images. If the returned text is shorter than the value
+                      below, OCR is run afterwards. Then both extracted texts are
+                      compared and the longer will be used.
+                    '';
                   };
                 };
               });
-              default = defaults.extraction.page-range;
-              description = ''
-                Defines what pages to process. If a PDF with 600 pages is
-                submitted, it is probably not necessary to scan through all of
-                them. This would take a long time and occupy resources for no
-                value. The first few pages should suffice. The default is first
-                10 pages.
+              default = defaults.extraction.pdf;
+              description = "Settings for PDF extraction";
+            };
+            ocr = mkOption {
+              type = types.submodule({
+                options = {
+                  max-image-size = mkOption {
+                    type = types.int;
+                    default = defaults.extraction.ocr.max-image-size;
+                    description = ''
+                      Images greater than this size are skipped. Note that every
+                      image is loaded completely into memory for doing OCR.
+                    '';
+                  };
+                  page-range = mkOption {
+                    type = types.submodule({
+                      options = {
+                        begin = mkOption {
+                          type = types.int;
+                          default = defaults.extraction.page-range.begin;
+                          description = "Specifies the first N pages of a file to process.";
+                        };
+                      };
+                    });
+                    default = defaults.extraction.page-range;
+                    description = ''
+                      Defines what pages to process. If a PDF with 600 pages is
+                      submitted, it is probably not necessary to scan through all of
+                      them. This would take a long time and occupy resources for no
+                      value. The first few pages should suffice. The default is first
+                      10 pages.
 
-                If you want all pages being processed, set this number to -1.
+                      If you want all pages being processed, set this number to -1.
 
-                Note: if you change the ghostscript command below, be aware that
-                this setting (if not -1) will add another parameter to the
-                beginning of the command.
-              '';
-            };
-            ghostscript = mkOption {
-              type = types.submodule({
-                options = {
-                  working-dir = mkOption {
-                    type = types.str;
-                    default = defaults.extraction.ghostscript.working-dir;
-                    description = "Directory where the extraction processes can put their temp files";
+                      Note: if you change the ghostscript command below, be aware that
+                      this setting (if not -1) will add another parameter to the
+                      beginning of the command.
+                    '';
                   };
-                  command = mkOption {
+                  ghostscript = mkOption {
                     type = types.submodule({
                       options = {
-                        program = mkOption {
+                        working-dir = mkOption {
                           type = types.str;
-                          default = defaults.extraction.ghostscript.command.program;
-                          description = "The path to the executable.";
+                          default = defaults.extraction.ghostscript.working-dir;
+                          description = "Directory where the extraction processes can put their temp files";
                         };
-                        args = mkOption {
-                          type = types.listOf types.str;
-                          default = defaults.extraction.ghostscript.command.args;
-                          description = "The arguments to the program";
-                        };
-                        timeout = mkOption {
-                          type = types.str;
-                          default = defaults.extraction.ghostscript.command.timeout;
-                          description = "The timeout when executing the command";
+                        command = mkOption {
+                          type = types.submodule({
+                            options = {
+                              program = mkOption {
+                                type = types.str;
+                                default = defaults.extraction.ghostscript.command.program;
+                                description = "The path to the executable.";
+                              };
+                              args = mkOption {
+                                type = types.listOf types.str;
+                                default = defaults.extraction.ghostscript.command.args;
+                                description = "The arguments to the program";
+                              };
+                              timeout = mkOption {
+                                type = types.str;
+                                default = defaults.extraction.ghostscript.command.timeout;
+                                description = "The timeout when executing the command";
+                              };
+                            };
+                          });
+                          default = defaults.extraction.ghostscript.command;
+                          description = "The system command";
                         };
                       };
                     });
-                    default = defaults.extraction.ghostscript.command;
-                    description = "The system command";
+                    default = defaults.extraction.ghostscript;
+                    description = "The ghostscript command.";
                   };
-                };
-              });
-              default = defaults.extraction.ghostscript;
-              description = "The ghostscript command.";
-            };
-            unpaper = mkOption {
-              type = types.submodule({
-                options = {
-                  command = mkOption {
+                  unpaper = mkOption {
                     type = types.submodule({
                       options = {
-                        program = mkOption {
-                          type = types.str;
-                          default = defaults.extraction.unpaper.command.program;
-                          description = "The path to the executable.";
-                        };
-                        args = mkOption {
-                          type = types.listOf types.str;
-                          default = defaults.extraction.unpaper.command.args;
-                          description = "The arguments to the program";
-                        };
-                        timeout = mkOption {
-                          type = types.str;
-                          default = defaults.extraction.unpaper.command.timeout;
-                          description = "The timeout when executing the command";
+                        command = mkOption {
+                          type = types.submodule({
+                            options = {
+                              program = mkOption {
+                                type = types.str;
+                                default = defaults.extraction.unpaper.command.program;
+                                description = "The path to the executable.";
+                              };
+                              args = mkOption {
+                                type = types.listOf types.str;
+                                default = defaults.extraction.unpaper.command.args;
+                                description = "The arguments to the program";
+                              };
+                              timeout = mkOption {
+                                type = types.str;
+                                default = defaults.extraction.unpaper.command.timeout;
+                                description = "The timeout when executing the command";
+                              };
+                            };
+                          });
+                          default = defaults.extraction.unpaper.command;
+                          description = "The system command";
                         };
                       };
                     });
-                    default = defaults.extraction.unpaper.command;
-                    description = "The system command";
+                    default = defaults.extraction.unpaper;
+                    description = "The unpaper command.";
                   };
-                };
-              });
-              default = defaults.extraction.unpaper;
-              description = "The unpaper command.";
-            };
-            tesseract = mkOption {
-              type = types.submodule({
-                options = {
-                  command = mkOption {
+                  tesseract = mkOption {
                     type = types.submodule({
                       options = {
-                        program = mkOption {
-                          type = types.str;
-                          default = defaults.extraction.tesseract.command.program;
-                          description = "The path to the executable.";
-                        };
-                        args = mkOption {
-                          type = types.listOf types.str;
-                          default = defaults.extraction.tesseract.command.args;
-                          description = "The arguments to the program";
-                        };
-                        timeout = mkOption {
-                          type = types.str;
-                          default = defaults.extraction.tesseract.command.timeout;
-                          description = "The timeout when executing the command";
+                        command = mkOption {
+                          type = types.submodule({
+                            options = {
+                              program = mkOption {
+                                type = types.str;
+                                default = defaults.extraction.tesseract.command.program;
+                                description = "The path to the executable.";
+                              };
+                              args = mkOption {
+                                type = types.listOf types.str;
+                                default = defaults.extraction.tesseract.command.args;
+                                description = "The arguments to the program";
+                              };
+                              timeout = mkOption {
+                                type = types.str;
+                                default = defaults.extraction.tesseract.command.timeout;
+                                description = "The timeout when executing the command";
+                              };
+                            };
+                          });
+                          default = defaults.extraction.tesseract.command;
+                          description = "The system command";
                         };
                       };
                     });
-                    default = defaults.extraction.tesseract.command;
-                    description = "The system command";
+                    default = defaults.extraction.tesseract;
+                    description = "The tesseract command.";
                   };
+
                 };
               });
-              default = defaults.extraction.tesseract;
-              description = "The tesseract command.";
+              default = defaults.extraction.ocr;
+              description = "";
             };
           };
         });
@@ -336,6 +416,182 @@ in {
           below.
         '';
       };
+
+      convert = mkOption {
+        type = types.submodule({
+          options = {
+            chunk-size = mkOption {
+              type = types.int;
+              default = defaults.convert.chunk-size;
+              description = ''
+                The chunk size used when storing files. This should be the same
+                as used with the rest server.
+              '';
+            };
+            max-image-size = mkOption {
+              type = types.int;
+              default = defaults.convert.max-image-size;
+              description = ''
+                When reading images, this is the maximum size. Images that are
+                larger are not processed.
+              '';
+            };
+            markdown = mkOption {
+              type = types.submodule({
+                options = {
+                  internal-css = mkOption {
+                    type = types.str;
+                    default = defaults.convert.markdown.internal-css;
+                    description = ''
+                      The CSS that is used to style the resulting HTML.
+                    '';
+                  };
+                };
+              });
+              default = defaults.convert.markdown;
+              description = ''
+                Settings when processing markdown files (and other text files)
+                to HTML.
+
+                In order to support text formats, text files are first converted
+                to HTML using a markdown processor. The resulting HTML is then
+                converted to a PDF file.
+              '';
+            };
+            wkhtmlpdf = mkOption {
+              type = types.submodule({
+                options = {
+                  working-dir = mkOption {
+                    type = types.str;
+                    default = defaults.convert.wktmlpdf.working-dir;
+                    description = "Directory where the conversion processes can put their temp files";
+                  };
+                  command = mkOption {
+                    type = types.submodule({
+                      options = {
+                        program = mkOption {
+                          type = types.str;
+                          default = defaults.convert.wkhtmlpdf.command.program;
+                          description = "The path to the executable.";
+                        };
+                        args = mkOption {
+                          type = types.listOf types.str;
+                          default = defaults.convert.wkhtmlpdf.command.args;
+                          description = "The arguments to the program";
+                        };
+                        timeout = mkOption {
+                          type = types.str;
+                          default = defaults.convert.wkhtmlpdf.command.timeout;
+                          description = "The timeout when executing the command";
+                        };
+                      };
+                    });
+                    default = defaults.convert.wkhtmlpdf.command;
+                    description = "The system command";
+                  };
+                };
+              });
+              default = defaults.convert.wkhtmlpdf;
+              description = ''
+                To convert HTML files into PDF files, the external tool
+                wkhtmltopdf is used.
+              '';
+            };
+            tesseract = mkOption {
+              type = types.submodule({
+                options = {
+                  working-dir = mkOption {
+                    type = types.str;
+                    default = defaults.convert.tesseract.working-dir;
+                    description = "Directory where the conversion processes can put their temp files";
+                  };
+                  command = mkOption {
+                    type = types.submodule({
+                      options = {
+                        program = mkOption {
+                          type = types.str;
+                          default = defaults.convert.tesseract.command.program;
+                          description = "The path to the executable.";
+                        };
+                        args = mkOption {
+                          type = types.listOf types.str;
+                          default = defaults.convert.tesseract.command.args;
+                          description = "The arguments to the program";
+                        };
+                        timeout = mkOption {
+                          type = types.str;
+                          default = defaults.convert.tesseract.command.timeout;
+                          description = "The timeout when executing the command";
+                        };
+                      };
+                    });
+                    default = defaults.convert.tesseract.command;
+                    description = "The system command";
+                  };
+                };
+              });
+              default = defaults.convert.tesseract;
+              description = ''
+                To convert image files to PDF files, tesseract is used. This
+                also extracts the text in one go.
+              '';
+            };
+            unoconv = mkOption {
+              type = types.submodule({
+                options = {
+                  working-dir = mkOption {
+                    type = types.str;
+                    default = defaults.convert.unoconv.working-dir;
+                    description = "Directory where the conversion processes can put their temp files";
+                  };
+                  command = mkOption {
+                    type = types.submodule({
+                      options = {
+                        program = mkOption {
+                          type = types.str;
+                          default = defaults.convert.unoconv.command.program;
+                          description = "The path to the executable.";
+                        };
+                        args = mkOption {
+                          type = types.listOf types.str;
+                          default = defaults.convert.unoconv.command.args;
+                          description = "The arguments to the program";
+                        };
+                        timeout = mkOption {
+                          type = types.str;
+                          default = defaults.convert.unoconv.command.timeout;
+                          description = "The timeout when executing the command";
+                        };
+                      };
+                    });
+                    default = defaults.convert.unoconv.command;
+                    description = "The system command";
+                  };
+                };
+              });
+              default = defaults.convert.unoconv;
+              description = ''
+                To convert "office" files to PDF files, the external tool
+                unoconv is used. Unoconv uses libreoffice/openoffice for
+                converting. So it supports all formats that are possible to read
+                with libreoffice/openoffic.
+
+                Note: to greatly improve performance, it is recommended to start
+                a libreoffice listener by running `unoconv -l` in a separate
+                process.
+              '';
+            };
+          };
+        });
+        default = defaults.convert;
+        description = ''
+          Configuration for converting files into PDFs.
+
+          Most of it is delegated to external tools, which can be configured
+          below. They must be in the PATH environment or specify the full
+          path below via the `program` key.
+        '';
+      };
     };
   };
 
diff --git a/nix/module-server.nix b/nix/module-server.nix
index 7a160499..0ac4922b 100644
--- a/nix/module-server.nix
+++ b/nix/module-server.nix
@@ -34,7 +34,7 @@ let
       };
       files = {
         chunk-size = 524288;
-        valid-mime-types = ["application/pdf"];
+        valid-mime-types = [];
       };
     };
   };