Merge pull request #1005 from eikek/fix/973-jpn-ocr

Use different japanese train files for tesseract
This commit is contained in:
mergify[bot] 2021-08-13 14:59:04 +00:00 committed by GitHub
commit 1d90095707
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -63,6 +63,12 @@ RUN wget ${joex_url:-https://github.com/eikek/docspell/releases/download/v$versi
rm docspell-joex-*.zip && \ rm docspell-joex-*.zip && \
ln -snf docspell-joex-* docspell-joex ln -snf docspell-joex-* docspell-joex
# Using these data files for japanese, because they work better. See #973
RUN \
wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_fast/master/jpn_vert.traineddata && \
wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_fast/master/jpn.traineddata && \
mv jpn*.traineddata /usr/share/tessdata
COPY joex-entrypoint.sh /opt/joex-entrypoint.sh COPY joex-entrypoint.sh /opt/joex-entrypoint.sh
ENTRYPOINT ["/opt/joex-entrypoint.sh", "-J-XX:+UseG1GC"] ENTRYPOINT ["/opt/joex-entrypoint.sh", "-J-XX:+UseG1GC"]