mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 10:28:27 +00:00
Choose nlp mode in config file
This commit is contained in:
@ -277,12 +277,27 @@ docspell.joex {
|
||||
# files.
|
||||
working-dir = ${java.io.tmpdir}"/docspell-analysis"
|
||||
|
||||
# The StanfordCoreNLP library caches language models which
|
||||
# requires quite some amount of memory. Setting this interval to a
|
||||
# positive duration, the cache is cleared after this amount of
|
||||
# idle time. Set it to 0 to disable it if you have enough memory,
|
||||
# processing will be faster.
|
||||
clear-stanford-nlp-interval = "15 minutes"
|
||||
nlp-config {
|
||||
# The StanfordCoreNLP library caches language models which
|
||||
# requires quite some amount of memory. Setting this interval to a
|
||||
# positive duration, the cache is cleared after this amount of
|
||||
# idle time. Set it to 0 to disable it if you have enough memory,
|
||||
# processing will be faster.
|
||||
#
|
||||
# This has only any effect, if mode != disabled.
|
||||
clear-interval = "15 minutes"
|
||||
|
||||
# The mode for configuring NLP models. Currently 3 are available:
|
||||
#
|
||||
# 1. full – builds the complete pipeline, run with -Xmx1500M or more
|
||||
# 2. basic - builds only the ner annotator, run with -Xmx600M or more
|
||||
# 3. disabled - doesn't use any stanford-nlp feature
|
||||
#
|
||||
# The basic variant does a quite good job for German and
|
||||
# English. It might be worse for French, always depending on the
|
||||
# type of text that is analysed.
|
||||
mode = full
|
||||
}
|
||||
|
||||
regex-ner {
|
||||
# Whether to enable custom NER annotation. This uses the address
|
||||
@ -295,6 +310,8 @@ docspell.joex {
|
||||
#
|
||||
# This setting might be moved to the collective settings in the
|
||||
# future.
|
||||
#
|
||||
# Note, this is only relevant if nlp-config.mode = full.
|
||||
enabled = true
|
||||
|
||||
# The NER annotation uses a file of patterns that is derived from
|
||||
|
@ -4,7 +4,8 @@ import java.nio.file.Path
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
|
||||
import docspell.analysis.{TextAnalysisConfig, classifier}
|
||||
import docspell.analysis.TextAnalysisConfig
|
||||
import docspell.analysis.classifier.TextClassifierConfig
|
||||
import docspell.backend.Config.Files
|
||||
import docspell.common._
|
||||
import docspell.convert.ConvertConfig
|
||||
@ -59,7 +60,7 @@ object Config {
|
||||
case class TextAnalysis(
|
||||
maxLength: Int,
|
||||
workingDir: Path,
|
||||
clearStanfordNlpInterval: Duration,
|
||||
nlpConfig: TextAnalysisConfig.NlpConfig,
|
||||
regexNer: RegexNer,
|
||||
classification: Classification
|
||||
) {
|
||||
@ -67,8 +68,8 @@ object Config {
|
||||
def textAnalysisConfig: TextAnalysisConfig =
|
||||
TextAnalysisConfig(
|
||||
maxLength,
|
||||
clearStanfordNlpInterval,
|
||||
classifier.TextClassifierConfig(
|
||||
nlpConfig,
|
||||
TextClassifierConfig(
|
||||
workingDir,
|
||||
NonEmptyList
|
||||
.fromList(classification.classifiers)
|
||||
|
Reference in New Issue
Block a user