mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-02 13:32:51 +00:00
Adopt nix modules to new config
This commit is contained in:
parent
74a037887d
commit
ec419c7bfd
@ -17,11 +17,11 @@ object Tesseract {
|
|||||||
blocker: Blocker,
|
blocker: Blocker,
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
|
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
|
||||||
val outBase = cfg.cmd.args.tail.headOption.getOrElse("out")
|
val outBase = cfg.command.args.tail.headOption.getOrElse("out")
|
||||||
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
||||||
ExternConv.readResultTesseract[F](outBase, blocker, chunkSize, logger)
|
ExternConv.readResultTesseract[F](outBase, blocker, chunkSize, logger)
|
||||||
|
|
||||||
ExternConv.toPDF[F, A]("tesseract", cfg.cmd.replace(Map("{{lang}}" -> lang.iso3)), cfg.workingDir, false, blocker, logger, reader)(in, handler)
|
ExternConv.toPDF[F, A]("tesseract", cfg.command.replace(Map("{{lang}}" -> lang.iso3)), cfg.workingDir, false, blocker, logger, reader)(in, handler)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -4,4 +4,4 @@ import java.nio.file.Path
|
|||||||
|
|
||||||
import docspell.common.SystemCommand
|
import docspell.common.SystemCommand
|
||||||
|
|
||||||
case class TesseractConfig (cmd: SystemCommand.Config, workingDir: Path)
|
case class TesseractConfig (command: SystemCommand.Config, workingDir: Path)
|
||||||
|
@ -19,7 +19,7 @@ object Unoconv {
|
|||||||
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
||||||
ExternConv.readResult[F](blocker, chunkSize, logger)
|
ExternConv.readResult[F](blocker, chunkSize, logger)
|
||||||
|
|
||||||
ExternConv.toPDF[F, A]("unoconv", cfg.cmd, cfg.workingDir, false, blocker, logger, reader)(in, handler)
|
ExternConv.toPDF[F, A]("unoconv", cfg.command, cfg.workingDir, false, blocker, logger, reader)(in, handler)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -4,4 +4,4 @@ import java.nio.file.Path
|
|||||||
|
|
||||||
import docspell.common.SystemCommand
|
import docspell.common.SystemCommand
|
||||||
|
|
||||||
case class UnoconvConfig (cmd: SystemCommand.Config, workingDir: Path)
|
case class UnoconvConfig (command: SystemCommand.Config, workingDir: Path)
|
||||||
|
@ -19,7 +19,7 @@ object WkHtmlPdf {
|
|||||||
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
||||||
ExternConv.readResult[F](blocker, chunkSize, logger)
|
ExternConv.readResult[F](blocker, chunkSize, logger)
|
||||||
|
|
||||||
ExternConv.toPDF[F, A]("wkhtmltopdf", cfg.cmd, cfg.workingDir, true, blocker, logger, reader)(in, handler)
|
ExternConv.toPDF[F, A]("wkhtmltopdf", cfg.command, cfg.workingDir, true, blocker, logger, reader)(in, handler)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -4,4 +4,4 @@ import java.nio.file.Path
|
|||||||
|
|
||||||
import docspell.common.SystemCommand
|
import docspell.common.SystemCommand
|
||||||
|
|
||||||
case class WkHtmlPdfConfig (cmd: SystemCommand.Config, workingDir: Path)
|
case class WkHtmlPdfConfig (command: SystemCommand.Config, workingDir: Path)
|
||||||
|
@ -154,7 +154,7 @@ object ConversionTest extends SimpleTestSuite with FileChecks {
|
|||||||
})
|
})
|
||||||
|
|
||||||
def commandsExist: Boolean =
|
def commandsExist: Boolean =
|
||||||
commandExists(convertConfig.unoconv.cmd.program) &&
|
commandExists(convertConfig.unoconv.command.program) &&
|
||||||
commandExists(convertConfig.wkhtmlpdf.cmd.program) &&
|
commandExists(convertConfig.wkhtmlpdf.command.program) &&
|
||||||
commandExists(convertConfig.tesseract.cmd.program)
|
commandExists(convertConfig.tesseract.command.program)
|
||||||
}
|
}
|
||||||
|
@ -173,7 +173,7 @@ docspell.joex {
|
|||||||
# To convert HTML files into PDF files, the external tool
|
# To convert HTML files into PDF files, the external tool
|
||||||
# wkhtmltopdf is used.
|
# wkhtmltopdf is used.
|
||||||
wkhtmlpdf {
|
wkhtmlpdf {
|
||||||
cmd = {
|
command = {
|
||||||
program = "wkhtmltopdf"
|
program = "wkhtmltopdf"
|
||||||
args = [
|
args = [
|
||||||
"-s",
|
"-s",
|
||||||
@ -191,7 +191,7 @@ docspell.joex {
|
|||||||
# To convert image files to PDF files, tesseract is used. This
|
# To convert image files to PDF files, tesseract is used. This
|
||||||
# also extracts the text in one go.
|
# also extracts the text in one go.
|
||||||
tesseract = {
|
tesseract = {
|
||||||
cmd = {
|
command = {
|
||||||
program = "tesseract"
|
program = "tesseract"
|
||||||
args = [
|
args = [
|
||||||
"{{infile}}",
|
"{{infile}}",
|
||||||
@ -215,7 +215,7 @@ docspell.joex {
|
|||||||
# a libreoffice listener by running `unoconv -l` in a separate
|
# a libreoffice listener by running `unoconv -l` in a separate
|
||||||
# process.
|
# process.
|
||||||
unoconv = {
|
unoconv = {
|
||||||
cmd = {
|
command = {
|
||||||
program = "unoconv"
|
program = "unoconv"
|
||||||
args = [
|
args = [
|
||||||
"-f",
|
"-f",
|
||||||
|
@ -72,7 +72,7 @@ multiple files, which are called **attachments**. And an item has
|
|||||||
- a **direction**: one of "incoming" or "outgoing"
|
- a **direction**: one of "incoming" or "outgoing"
|
||||||
- a **name**: some item name, defaults to the file name of the
|
- a **name**: some item name, defaults to the file name of the
|
||||||
attachments
|
attachments
|
||||||
- some **notes**: arbitraty descriptive text. You can use markdown
|
- some **notes**: arbitrary descriptive text. You can use markdown
|
||||||
here, which is appropriately formatted in the web application.
|
here, which is appropriately formatted in the web application.
|
||||||
|
|
||||||
### Collective
|
### Collective
|
||||||
|
@ -30,30 +30,74 @@ let
|
|||||||
wakeup-period = "30 minutes";
|
wakeup-period = "30 minutes";
|
||||||
};
|
};
|
||||||
extraction = {
|
extraction = {
|
||||||
page-range = {
|
pdf = {
|
||||||
begin = 10;
|
min-text-len = 10;
|
||||||
};
|
};
|
||||||
ghostscript = {
|
|
||||||
working-dir = "/tmp/docspell-extraction";
|
ocr = {
|
||||||
command = {
|
max-image-size = 14000000;
|
||||||
program = "${pkgs.ghostscript}/bin/gs";
|
page-range = {
|
||||||
args = [ "-dNOPAUSE" "-dBATCH" "-dSAFER" "-sDEVICE=tiffscaled8" "-sOutputFile={{outfile}}" "{{infile}}" ];
|
begin = 10;
|
||||||
timeout = "5 minutes";
|
};
|
||||||
|
ghostscript = {
|
||||||
|
working-dir = "/tmp/docspell-extraction";
|
||||||
|
command = {
|
||||||
|
program = "${pkgs.ghostscript}/bin/gs";
|
||||||
|
args = [ "-dNOPAUSE" "-dBATCH" "-dSAFER" "-sDEVICE=tiffscaled8" "-sOutputFile={{outfile}}" "{{infile}}" ];
|
||||||
|
timeout = "5 minutes";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
unpaper = {
|
||||||
|
command = {
|
||||||
|
program = "${pkgs.unpaper}/bin/unpaper";
|
||||||
|
args = [ "{{infile}}" "{{outfile}}" ];
|
||||||
|
timeout = "5 minutes";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
tesseract = {
|
||||||
|
command= {
|
||||||
|
program = "${pkgs.tesseract4}/bin/tesseract";
|
||||||
|
args = ["{{file}}" "stdout" "-l" "{{lang}}" ];
|
||||||
|
timeout = "5 minutes";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
unpaper = {
|
};
|
||||||
command = {
|
convert = {
|
||||||
program = "${pkgs.unpaper}/bin/unpaper";
|
chunk-size = 524288;
|
||||||
args = [ "{{infile}}" "{{outfile}}" ];
|
max-image-size = 14000000;
|
||||||
timeout = "5 minutes";
|
|
||||||
};
|
markdown = {
|
||||||
|
internal-css = ''
|
||||||
|
body { padding: 2em 5em; }
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
wkhtmlpdf = {
|
||||||
|
command = {
|
||||||
|
program = "${pkgs.wkhtmltopdf}/bin/wkhtmltopdf";
|
||||||
|
args = ["-s" "A4" "--encoding" "UTF-8" "-" "{{outfile}}"];
|
||||||
|
timeout = "2 minutes";
|
||||||
|
};
|
||||||
|
working-dir = "/tmp/docspell-convert";
|
||||||
|
};
|
||||||
|
|
||||||
tesseract = {
|
tesseract = {
|
||||||
command= {
|
command = {
|
||||||
program = "${pkgs.tesseract4}/bin/tesseract";
|
program = "${pkgs.tesseract4}/bin/tesseract";
|
||||||
args = ["{{file}}" "stdout" "-l" "{{lang}}" ];
|
args = ["{{infile}}" "out" "-l" "{{lang}}" "pdf" "txt"];
|
||||||
timeout = "5 minutes";
|
timeout = "5 minutes";
|
||||||
};
|
};
|
||||||
|
working-dir = "/tmp/docspell-convert";
|
||||||
|
};
|
||||||
|
|
||||||
|
unoconv = {
|
||||||
|
command = {
|
||||||
|
program = "${pkgs.unoconv}/bin/unoconv";
|
||||||
|
args = ["-f" "pdf" "-o" "{{outfile}}" "{{infile}}"];
|
||||||
|
timeout = "2 minutes";
|
||||||
|
};
|
||||||
|
working-dir = "/tmp/docspell-convert";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
@ -199,128 +243,164 @@ in {
|
|||||||
extraction = mkOption {
|
extraction = mkOption {
|
||||||
type = types.submodule({
|
type = types.submodule({
|
||||||
options = {
|
options = {
|
||||||
page-range = mkOption {
|
pdf = mkOption {
|
||||||
type = types.submodule({
|
type = types.submodule({
|
||||||
options = {
|
options = {
|
||||||
begin = mkOption {
|
min-text-len = mkOption {
|
||||||
type = types.int;
|
type = types.int;
|
||||||
default = defaults.extraction.page-range.begin;
|
default = defaults.extraction.pdf.min-text-len;
|
||||||
description = "Specifies the first N pages of a file to process.";
|
description = ''
|
||||||
|
For PDF files it is first tried to read the text parts of the
|
||||||
|
PDF. But PDFs can be complex documents and they may contain text
|
||||||
|
and images. If the returned text is shorter than the value
|
||||||
|
below, OCR is run afterwards. Then both extracted texts are
|
||||||
|
compared and the longer will be used.
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
default = defaults.extraction.page-range;
|
default = defaults.extraction.pdf;
|
||||||
description = ''
|
description = "Settings for PDF extraction";
|
||||||
Defines what pages to process. If a PDF with 600 pages is
|
};
|
||||||
submitted, it is probably not necessary to scan through all of
|
ocr = mkOption {
|
||||||
them. This would take a long time and occupy resources for no
|
type = types.submodule({
|
||||||
value. The first few pages should suffice. The default is first
|
options = {
|
||||||
10 pages.
|
max-image-size = mkOption {
|
||||||
|
type = types.int;
|
||||||
|
default = defaults.extraction.ocr.max-image-size;
|
||||||
|
description = ''
|
||||||
|
Images greater than this size are skipped. Note that every
|
||||||
|
image is loaded completely into memory for doing OCR.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
page-range = mkOption {
|
||||||
|
type = types.submodule({
|
||||||
|
options = {
|
||||||
|
begin = mkOption {
|
||||||
|
type = types.int;
|
||||||
|
default = defaults.extraction.page-range.begin;
|
||||||
|
description = "Specifies the first N pages of a file to process.";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.extraction.page-range;
|
||||||
|
description = ''
|
||||||
|
Defines what pages to process. If a PDF with 600 pages is
|
||||||
|
submitted, it is probably not necessary to scan through all of
|
||||||
|
them. This would take a long time and occupy resources for no
|
||||||
|
value. The first few pages should suffice. The default is first
|
||||||
|
10 pages.
|
||||||
|
|
||||||
If you want all pages being processed, set this number to -1.
|
If you want all pages being processed, set this number to -1.
|
||||||
|
|
||||||
Note: if you change the ghostscript command below, be aware that
|
Note: if you change the ghostscript command below, be aware that
|
||||||
this setting (if not -1) will add another parameter to the
|
this setting (if not -1) will add another parameter to the
|
||||||
beginning of the command.
|
beginning of the command.
|
||||||
'';
|
'';
|
||||||
};
|
|
||||||
ghostscript = mkOption {
|
|
||||||
type = types.submodule({
|
|
||||||
options = {
|
|
||||||
working-dir = mkOption {
|
|
||||||
type = types.str;
|
|
||||||
default = defaults.extraction.ghostscript.working-dir;
|
|
||||||
description = "Directory where the extraction processes can put their temp files";
|
|
||||||
};
|
};
|
||||||
command = mkOption {
|
ghostscript = mkOption {
|
||||||
type = types.submodule({
|
type = types.submodule({
|
||||||
options = {
|
options = {
|
||||||
program = mkOption {
|
working-dir = mkOption {
|
||||||
type = types.str;
|
type = types.str;
|
||||||
default = defaults.extraction.ghostscript.command.program;
|
default = defaults.extraction.ghostscript.working-dir;
|
||||||
description = "The path to the executable.";
|
description = "Directory where the extraction processes can put their temp files";
|
||||||
};
|
};
|
||||||
args = mkOption {
|
command = mkOption {
|
||||||
type = types.listOf types.str;
|
type = types.submodule({
|
||||||
default = defaults.extraction.ghostscript.command.args;
|
options = {
|
||||||
description = "The arguments to the program";
|
program = mkOption {
|
||||||
};
|
type = types.str;
|
||||||
timeout = mkOption {
|
default = defaults.extraction.ghostscript.command.program;
|
||||||
type = types.str;
|
description = "The path to the executable.";
|
||||||
default = defaults.extraction.ghostscript.command.timeout;
|
};
|
||||||
description = "The timeout when executing the command";
|
args = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = defaults.extraction.ghostscript.command.args;
|
||||||
|
description = "The arguments to the program";
|
||||||
|
};
|
||||||
|
timeout = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.extraction.ghostscript.command.timeout;
|
||||||
|
description = "The timeout when executing the command";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.extraction.ghostscript.command;
|
||||||
|
description = "The system command";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
default = defaults.extraction.ghostscript.command;
|
default = defaults.extraction.ghostscript;
|
||||||
description = "The system command";
|
description = "The ghostscript command.";
|
||||||
};
|
};
|
||||||
};
|
unpaper = mkOption {
|
||||||
});
|
|
||||||
default = defaults.extraction.ghostscript;
|
|
||||||
description = "The ghostscript command.";
|
|
||||||
};
|
|
||||||
unpaper = mkOption {
|
|
||||||
type = types.submodule({
|
|
||||||
options = {
|
|
||||||
command = mkOption {
|
|
||||||
type = types.submodule({
|
type = types.submodule({
|
||||||
options = {
|
options = {
|
||||||
program = mkOption {
|
command = mkOption {
|
||||||
type = types.str;
|
type = types.submodule({
|
||||||
default = defaults.extraction.unpaper.command.program;
|
options = {
|
||||||
description = "The path to the executable.";
|
program = mkOption {
|
||||||
};
|
type = types.str;
|
||||||
args = mkOption {
|
default = defaults.extraction.unpaper.command.program;
|
||||||
type = types.listOf types.str;
|
description = "The path to the executable.";
|
||||||
default = defaults.extraction.unpaper.command.args;
|
};
|
||||||
description = "The arguments to the program";
|
args = mkOption {
|
||||||
};
|
type = types.listOf types.str;
|
||||||
timeout = mkOption {
|
default = defaults.extraction.unpaper.command.args;
|
||||||
type = types.str;
|
description = "The arguments to the program";
|
||||||
default = defaults.extraction.unpaper.command.timeout;
|
};
|
||||||
description = "The timeout when executing the command";
|
timeout = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.extraction.unpaper.command.timeout;
|
||||||
|
description = "The timeout when executing the command";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.extraction.unpaper.command;
|
||||||
|
description = "The system command";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
default = defaults.extraction.unpaper.command;
|
default = defaults.extraction.unpaper;
|
||||||
description = "The system command";
|
description = "The unpaper command.";
|
||||||
};
|
};
|
||||||
};
|
tesseract = mkOption {
|
||||||
});
|
|
||||||
default = defaults.extraction.unpaper;
|
|
||||||
description = "The unpaper command.";
|
|
||||||
};
|
|
||||||
tesseract = mkOption {
|
|
||||||
type = types.submodule({
|
|
||||||
options = {
|
|
||||||
command = mkOption {
|
|
||||||
type = types.submodule({
|
type = types.submodule({
|
||||||
options = {
|
options = {
|
||||||
program = mkOption {
|
command = mkOption {
|
||||||
type = types.str;
|
type = types.submodule({
|
||||||
default = defaults.extraction.tesseract.command.program;
|
options = {
|
||||||
description = "The path to the executable.";
|
program = mkOption {
|
||||||
};
|
type = types.str;
|
||||||
args = mkOption {
|
default = defaults.extraction.tesseract.command.program;
|
||||||
type = types.listOf types.str;
|
description = "The path to the executable.";
|
||||||
default = defaults.extraction.tesseract.command.args;
|
};
|
||||||
description = "The arguments to the program";
|
args = mkOption {
|
||||||
};
|
type = types.listOf types.str;
|
||||||
timeout = mkOption {
|
default = defaults.extraction.tesseract.command.args;
|
||||||
type = types.str;
|
description = "The arguments to the program";
|
||||||
default = defaults.extraction.tesseract.command.timeout;
|
};
|
||||||
description = "The timeout when executing the command";
|
timeout = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.extraction.tesseract.command.timeout;
|
||||||
|
description = "The timeout when executing the command";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.extraction.tesseract.command;
|
||||||
|
description = "The system command";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
default = defaults.extraction.tesseract.command;
|
default = defaults.extraction.tesseract;
|
||||||
description = "The system command";
|
description = "The tesseract command.";
|
||||||
};
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
default = defaults.extraction.tesseract;
|
default = defaults.extraction.ocr;
|
||||||
description = "The tesseract command.";
|
description = "";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
@ -336,6 +416,182 @@ in {
|
|||||||
below.
|
below.
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
convert = mkOption {
|
||||||
|
type = types.submodule({
|
||||||
|
options = {
|
||||||
|
chunk-size = mkOption {
|
||||||
|
type = types.int;
|
||||||
|
default = defaults.convert.chunk-size;
|
||||||
|
description = ''
|
||||||
|
The chunk size used when storing files. This should be the same
|
||||||
|
as used with the rest server.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
max-image-size = mkOption {
|
||||||
|
type = types.int;
|
||||||
|
default = defaults.convert.max-image-size;
|
||||||
|
description = ''
|
||||||
|
When reading images, this is the maximum size. Images that are
|
||||||
|
larger are not processed.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
markdown = mkOption {
|
||||||
|
type = types.submodule({
|
||||||
|
options = {
|
||||||
|
internal-css = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.convert.markdown.internal-css;
|
||||||
|
description = ''
|
||||||
|
The CSS that is used to style the resulting HTML.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.convert.markdown;
|
||||||
|
description = ''
|
||||||
|
Settings when processing markdown files (and other text files)
|
||||||
|
to HTML.
|
||||||
|
|
||||||
|
In order to support text formats, text files are first converted
|
||||||
|
to HTML using a markdown processor. The resulting HTML is then
|
||||||
|
converted to a PDF file.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
wkhtmlpdf = mkOption {
|
||||||
|
type = types.submodule({
|
||||||
|
options = {
|
||||||
|
working-dir = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.convert.wktmlpdf.working-dir;
|
||||||
|
description = "Directory where the conversion processes can put their temp files";
|
||||||
|
};
|
||||||
|
command = mkOption {
|
||||||
|
type = types.submodule({
|
||||||
|
options = {
|
||||||
|
program = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.convert.wkhtmlpdf.command.program;
|
||||||
|
description = "The path to the executable.";
|
||||||
|
};
|
||||||
|
args = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = defaults.convert.wkhtmlpdf.command.args;
|
||||||
|
description = "The arguments to the program";
|
||||||
|
};
|
||||||
|
timeout = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.convert.wkhtmlpdf.command.timeout;
|
||||||
|
description = "The timeout when executing the command";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.convert.wkhtmlpdf.command;
|
||||||
|
description = "The system command";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.convert.wkhtmlpdf;
|
||||||
|
description = ''
|
||||||
|
To convert HTML files into PDF files, the external tool
|
||||||
|
wkhtmltopdf is used.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
tesseract = mkOption {
|
||||||
|
type = types.submodule({
|
||||||
|
options = {
|
||||||
|
working-dir = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.convert.tesseract.working-dir;
|
||||||
|
description = "Directory where the conversion processes can put their temp files";
|
||||||
|
};
|
||||||
|
command = mkOption {
|
||||||
|
type = types.submodule({
|
||||||
|
options = {
|
||||||
|
program = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.convert.tesseract.command.program;
|
||||||
|
description = "The path to the executable.";
|
||||||
|
};
|
||||||
|
args = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = defaults.convert.tesseract.command.args;
|
||||||
|
description = "The arguments to the program";
|
||||||
|
};
|
||||||
|
timeout = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.convert.tesseract.command.timeout;
|
||||||
|
description = "The timeout when executing the command";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.convert.tesseract.command;
|
||||||
|
description = "The system command";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.convert.tesseract;
|
||||||
|
description = ''
|
||||||
|
To convert image files to PDF files, tesseract is used. This
|
||||||
|
also extracts the text in one go.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
unoconv = mkOption {
|
||||||
|
type = types.submodule({
|
||||||
|
options = {
|
||||||
|
working-dir = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.convert.unoconv.working-dir;
|
||||||
|
description = "Directory where the conversion processes can put their temp files";
|
||||||
|
};
|
||||||
|
command = mkOption {
|
||||||
|
type = types.submodule({
|
||||||
|
options = {
|
||||||
|
program = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.convert.unoconv.command.program;
|
||||||
|
description = "The path to the executable.";
|
||||||
|
};
|
||||||
|
args = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = defaults.convert.unoconv.command.args;
|
||||||
|
description = "The arguments to the program";
|
||||||
|
};
|
||||||
|
timeout = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.convert.unoconv.command.timeout;
|
||||||
|
description = "The timeout when executing the command";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.convert.unoconv.command;
|
||||||
|
description = "The system command";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.convert.unoconv;
|
||||||
|
description = ''
|
||||||
|
To convert "office" files to PDF files, the external tool
|
||||||
|
unoconv is used. Unoconv uses libreoffice/openoffice for
|
||||||
|
converting. So it supports all formats that are possible to read
|
||||||
|
with libreoffice/openoffic.
|
||||||
|
|
||||||
|
Note: to greatly improve performance, it is recommended to start
|
||||||
|
a libreoffice listener by running `unoconv -l` in a separate
|
||||||
|
process.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.convert;
|
||||||
|
description = ''
|
||||||
|
Configuration for converting files into PDFs.
|
||||||
|
|
||||||
|
Most of it is delegated to external tools, which can be configured
|
||||||
|
below. They must be in the PATH environment or specify the full
|
||||||
|
path below via the `program` key.
|
||||||
|
'';
|
||||||
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ let
|
|||||||
};
|
};
|
||||||
files = {
|
files = {
|
||||||
chunk-size = 524288;
|
chunk-size = 524288;
|
||||||
valid-mime-types = ["application/pdf"];
|
valid-mime-types = [];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user