From 1fc57fc2b24faaf0a9e8a3e8ff15385449172614 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Sat, 1 Aug 2020 15:44:46 +0200 Subject: [PATCH] Set default value for min-text-len to 500 This value is used to decide whether to try OCR or not. If text is below this value, OCR is run and both results are compared. It was set to 10, which is just one or two words. Since the context for docspell are documents, this value is too low. --- modules/joex/src/main/resources/reference.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/joex/src/main/resources/reference.conf b/modules/joex/src/main/resources/reference.conf index 059e6d05..bd0de234 100644 --- a/modules/joex/src/main/resources/reference.conf +++ b/modules/joex/src/main/resources/reference.conf @@ -169,7 +169,7 @@ docspell.joex { # below, OCR is run afterwards. Then both extracted texts are # compared and the longer will be used. pdf { - min-text-len = 10 + min-text-len = 500 } # Extracting text using OCR works for image and pdf files. It will