mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-05 10:59:33 +00:00
Use different japanese train files for tesseract
They seem to work better as suggested here: https://github.com/tesseract-ocr/tessdata/issues/119 Refs: #973
This commit is contained in:
parent
f79aa447b5
commit
326cf1c087
@ -63,6 +63,12 @@ RUN wget ${joex_url:-https://github.com/eikek/docspell/releases/download/v$versi
|
|||||||
rm docspell-joex-*.zip && \
|
rm docspell-joex-*.zip && \
|
||||||
ln -snf docspell-joex-* docspell-joex
|
ln -snf docspell-joex-* docspell-joex
|
||||||
|
|
||||||
|
# Using these data files for japanese, because they work better. See #973
|
||||||
|
RUN \
|
||||||
|
wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_fast/master/jpn_vert.traineddata && \
|
||||||
|
wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_fast/master/jpn.traineddata && \
|
||||||
|
mv jpn*.traineddata /usr/share/tessdata
|
||||||
|
|
||||||
COPY joex-entrypoint.sh /opt/joex-entrypoint.sh
|
COPY joex-entrypoint.sh /opt/joex-entrypoint.sh
|
||||||
|
|
||||||
ENTRYPOINT ["/opt/joex-entrypoint.sh", "-J-XX:+UseG1GC"]
|
ENTRYPOINT ["/opt/joex-entrypoint.sh", "-J-XX:+UseG1GC"]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user