mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Make the text length limit optional
This commit is contained in:
@ -269,9 +269,13 @@ docspell.joex {
|
||||
# All text to analyse must fit into RAM. A large document may take
|
||||
# too much heap. Also, most important information is at the
|
||||
# beginning of a document, so in most cases the first two pages
|
||||
# should suffice. Default is 8000, which are about 2-3 pages (just
|
||||
# a rough guess, of course).
|
||||
max-length = 8000
|
||||
# should suffice. Default is 5000, which are about 2 pages (just a
|
||||
# rough guess, of course). For my data, more than 80% of the
|
||||
# documents are less than 5000 characters.
|
||||
#
|
||||
# This values applies to nlp and the classifier. If this value is
|
||||
# <= 0, the limit is disabled.
|
||||
max-length = 5000
|
||||
|
||||
# A working directory for the analyser to store temporary/working
|
||||
# files.
|
||||
@ -363,6 +367,10 @@ docspell.joex {
|
||||
# If concerned with memory consumption, this restricts the
|
||||
# number of items to consider. More are better for training. A
|
||||
# negative value or zero means to train on all items.
|
||||
#
|
||||
# This limit and `text-analysis.max-length` define how much
|
||||
# memory is required. On weaker hardware, it is advised to play
|
||||
# with these values.
|
||||
item-count = 600
|
||||
|
||||
# These settings are used to configure the classifier. If
|
||||
|
Reference in New Issue
Block a user