mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-24 19:38:24 +00:00
Use collective data with NER annotation
This commit is contained in:
@ -248,6 +248,29 @@ docspell.joex {
|
||||
# should suffice. Default is 10000, which are about 2-3 pages
|
||||
# (just a rough guess, of course).
|
||||
max-length = 10000
|
||||
|
||||
# A working directory for the analyser to store temporary/working
|
||||
# files.
|
||||
working-dir = ${java.io.tmpdir}"/docspell-analysis"
|
||||
|
||||
regex-ner {
|
||||
# Whether to enable custom NER annotation. This uses the address
|
||||
# book of a collective as input for NER tagging (to automatically
|
||||
# find correspondent and concerned entities). If the address book
|
||||
# is large, this can be quite memory intensive and also makes text
|
||||
# analysis slower. But it greatly improves accuracy. If this is
|
||||
# false, NER tagging uses only statistical models (that also work
|
||||
# quite well).
|
||||
#
|
||||
# This setting might be moved to the collective settings in the
|
||||
# future.
|
||||
enabled = true
|
||||
|
||||
# The NER annotation uses a file of patterns that is derived from
|
||||
# a collective's address book. This is is the time how long this
|
||||
# file will be kept until a check for a state change is done.
|
||||
file-cache-time = "1 minute"
|
||||
}
|
||||
}
|
||||
|
||||
# Configuration for converting files into PDFs.
|
||||
|
Reference in New Issue
Block a user