Make idle interval when clearing nlp cache configurable

This commit is contained in:
Eike Kettner
2021-01-06 00:35:58 +01:00
parent 73a9572835
commit a670bbb6c2
6 changed files with 109 additions and 64 deletions

View File

@ -276,6 +276,13 @@ docspell.joex {
# files.
working-dir = ${java.io.tmpdir}"/docspell-analysis"
# The StanfordCoreNLP library caches language models which
# requires quite some amount of memory. Setting this interval to a
# positive duration, the cache is cleared after this amount of
# idle time. Set it to 0 to disable it if you have enough memory,
# processing will be faster.
clear-stanford-nlp-interval = "15 minutes"
regex-ner {
# Whether to enable custom NER annotation. This uses the address
# book of a collective as input for NER tagging (to automatically

View File

@ -60,6 +60,7 @@ object Config {
case class TextAnalysis(
maxLength: Int,
workingDir: Path,
clearStanfordNlpInterval: Duration,
regexNer: RegexNer,
classification: Classification
) {
@ -67,6 +68,7 @@ object Config {
def textAnalysisConfig: TextAnalysisConfig =
TextAnalysisConfig(
maxLength,
clearStanfordNlpInterval,
TextClassifierConfig(
workingDir,
NonEmptyList