Created
January 4, 2024 18:43
-
-
Save bademux/473c79cd7bf3a4d67c31cb3074c03bf5 to your computer and use it in GitHub Desktop.
Dockerfile.tesseract-ocr-mrz
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM alpine:3.19 as mrz-builder | |
RUN apk --no-cache add --update make wget findutils bash unzip bc leptonica-dev python3 py3-pip tesseract-ocr tesseract-ocr-data-eng tesseract-ocr-data-pol | |
WORKDIR /build | |
RUN wget -qO- https://github.com/tesseract-ocr/tesstrain/archive/master.tar.gz | tar -zxv --strip-components=1 | |
RUN pip install -r requirements.txt --break-system-packages | |
RUN make tesseract-langdata -e -j | |
RUN wget -qO- https://github.com/DoubangoTelecom/tesseractMRZ/archive/master.tar.gz | tar -zxv --strip-components=1 | |
ENV GROUND_TRUTH_DIR=dataset | |
ENV MODEL_NAME=mrz | |
#comment out training and uncomment COPY to get prepared prepared | |
RUN make training -e -j && make traineddata -e -j | |
#COPY /build/tessdata_best/mrz.traineddat /build/data/mrz.traineddata | |
#docker build -f tesseract-ocr-mrz.Dockerfile -t tesseract-ocr-mrz | |
#docker run -i tesseract-ocr-mrz < image.png | |
FROM alpine:3.19 | |
RUN apk --no-cache add --update tesseract-ocr tesseract-ocr-data-eng tesseract-ocr-data-pol | |
COPY --from=mrz-builder /build/data/mrz.traineddata /usr/share/tessdata/mrz.traineddata | |
RUN adduser -D -H user | |
USER user | |
ENTRYPOINT ["tesseract"] | |
CMD ["stdin", "stdout", "-l", "mrz", "--psm", "6"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment