From 40642dea1016406fad2e5ae883227c20a77ecfff Mon Sep 17 00:00:00 2001 From: xshadowlegendx Date: Fri, 17 Mar 2023 17:50:48 +0700 Subject: [PATCH] temporary download khmer traineddata before the package being added to registry --- docker/dockerfiles/joex.dockerfile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docker/dockerfiles/joex.dockerfile b/docker/dockerfiles/joex.dockerfile index 89033c4f..6214b4b1 100644 --- a/docker/dockerfiles/joex.dockerfile +++ b/docker/dockerfiles/joex.dockerfile @@ -33,7 +33,6 @@ RUN apk update && \ tesseract-ocr-data-pol \ tesseract-ocr-data-est \ tesseract-ocr-data-ukr \ - tesseract-ocr-data-khm \ unpaper \ weasyprint \ libreoffice \ @@ -70,6 +69,12 @@ RUN wget ${joex_url:-https://github.com/eikek/docspell/releases/download/v$versi ln -snf docspell-joex-* docspell-joex && \ rm docspell-joex/conf/docspell-joex.conf +# temporary download traineddata directly for khmer lang +# before tesseract-ocr-data-khm being added to the registry +RUN \ + wget https://github.com/tesseract-ocr/tessdata/raw/main/khm.traineddata && \ + mv khm.traineddata /usr/share/tessdata + # Using these data files for japanese, because they work better. See #973 RUN \ wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_fast/master/jpn_vert.traineddata && \