Use collective data with NER annotation

This commit is contained in:
Eike Kettner
2020-08-24 23:25:57 +02:00
parent de5b33c40d
commit 3473cbb773
12 changed files with 413 additions and 76 deletions

View File

@ -248,6 +248,29 @@ docspell.joex {
# should suffice. Default is 10000, which are about 2-3 pages
# (just a rough guess, of course).
max-length = 10000
# A working directory for the analyser to store temporary/working
# files.
working-dir = ${java.io.tmpdir}"/docspell-analysis"
regex-ner {
# Whether to enable custom NER annotation. This uses the address
# book of a collective as input for NER tagging (to automatically
# find correspondent and concerned entities). If the address book
# is large, this can be quite memory intensive and also makes text
# analysis slower. But it greatly improves accuracy. If this is
# false, NER tagging uses only statistical models (that also work
# quite well).
#
# This setting might be moved to the collective settings in the
# future.
enabled = true
# The NER annotation uses a file of patterns that is derived from
# a collective's address book. This is is the time how long this
# file will be kept until a check for a state change is done.
file-cache-time = "1 minute"
}
}
# Configuration for converting files into PDFs.