mirror of
				https://github.com/TheAnachronism/docspell.git
				synced 2025-10-31 17:50:11 +00:00 
			
		
		
		
	Since this file cannot be changed inside the image, and people need to specify a new file or env variables, it doesn't make sense to add it. Also if it is present, it is preferred to the env variables.
		
			
				
	
	
		
			81 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Docker
		
	
	
	
	
	
			
		
		
	
	
			81 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Docker
		
	
	
	
	
	
| FROM alpine:latest
 | |
| 
 | |
| ARG version=
 | |
| ARG joex_url=
 | |
| ARG UNO_URL=https://raw.githubusercontent.com/unoconv/unoconv/0.9.0/unoconv
 | |
| ARG TARGETPLATFORM
 | |
| 
 | |
| ENV JAVA_OPTS="-Xmx1536M"
 | |
| 
 | |
| RUN JDKPKG="openjdk11"; \
 | |
|     if [ "$TARGETPLATFORM" = "linux/arm/v7" ]; then JDKPKG="openjdk8"; fi; \
 | |
|     apk add --no-cache $JDKPKG \
 | |
|     tzdata \
 | |
|     bash \
 | |
|     curl \
 | |
|     ghostscript \
 | |
|     tesseract-ocr \
 | |
|     tesseract-ocr-data-deu \
 | |
|     tesseract-ocr-data-fra \
 | |
|     tesseract-ocr-data-ita \
 | |
|     tesseract-ocr-data-spa \
 | |
|     tesseract-ocr-data-por \
 | |
|     tesseract-ocr-data-ces \
 | |
|     tesseract-ocr-data-nld \
 | |
|     tesseract-ocr-data-dan \
 | |
|     tesseract-ocr-data-fin \
 | |
|     tesseract-ocr-data-nor \
 | |
|     tesseract-ocr-data-swe \
 | |
|     tesseract-ocr-data-rus \
 | |
|     tesseract-ocr-data-ron \
 | |
|     tesseract-ocr-data-lav \
 | |
|     tesseract-ocr-data-jpn \
 | |
|     tesseract-ocr-data-heb \
 | |
|     unpaper \
 | |
|     wkhtmltopdf \
 | |
|     libreoffice \
 | |
|     ttf-droid-nonlatin \
 | |
|     ttf-droid \
 | |
|     ttf-dejavu \
 | |
|     ttf-freefont \
 | |
|     ttf-liberation \
 | |
|     libxml2-dev \
 | |
|     libxslt-dev \
 | |
|     pngquant \
 | |
|     zlib-dev \
 | |
|     g++ \
 | |
|     qpdf \
 | |
|     py3-pip \
 | |
|     python3-dev \
 | |
|     libffi-dev\
 | |
|     qpdf-dev \
 | |
|     openssl-dev \
 | |
|     ocrmypdf \
 | |
|   && pip3 install --upgrade pip \
 | |
|   && pip3 install ocrmypdf \
 | |
|   && curl -Ls $UNO_URL -o /usr/local/bin/unoconv \
 | |
|   && chmod +x /usr/local/bin/unoconv \
 | |
|   && apk del curl libxml2-dev libxslt-dev zlib-dev g++ python3-dev py3-pip libffi-dev qpdf-dev openssl-dev \
 | |
|   && ln -s /usr/bin/python3 /usr/bin/python
 | |
| 
 | |
| WORKDIR /opt
 | |
| RUN wget ${joex_url:-https://github.com/eikek/docspell/releases/download/v$version/docspell-joex-$version.zip} && \
 | |
|   unzip docspell-joex-*.zip && \
 | |
|   rm docspell-joex-*.zip && \
 | |
|   ln -snf docspell-joex-* docspell-joex && \
 | |
|   rm docspell-joex/conf/docspell-joex.conf
 | |
| 
 | |
| # Using these data files for japanese, because they work better. See #973
 | |
| RUN \
 | |
|   wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_fast/master/jpn_vert.traineddata && \
 | |
|   wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_fast/master/jpn.traineddata && \
 | |
|   mv jpn*.traineddata /usr/share/tessdata
 | |
| 
 | |
| COPY joex-entrypoint.sh /opt/joex-entrypoint.sh
 | |
| 
 | |
| ENTRYPOINT ["/opt/joex-entrypoint.sh", "-J-XX:+UseG1GC"]
 | |
| EXPOSE 7878
 | |
| 
 | |
| HEALTHCHECK --interval=1m --timeout=10s --retries=2 --start-period=30s \
 | |
|   CMD wget --spider http://localhost:7878/api/info/version
 |