Created
May 5, 2019 01:03
-
-
Save ludat/ec6d30a2d8e92c7d0cc6e23dbaf91620 to your computer and use it in GitHub Desktop.
Python 10pines recibo OCR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import os | |
import sys | |
import numpy as np | |
def findTemplateInImage(img, templatePath): | |
template = cv2.imread(templatePath, 0) | |
# Apply template Matching | |
res = cv2.matchTemplate(img, template, cv2.TM_SQDIFF_NORMED) | |
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) | |
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum | |
return min_loc | |
img = cv2.imread(sys.argv[1],0) | |
subtotal = findTemplateInImage(img, os.environ['GAN_RECOG_WORKDIR'] + '/subtotal.template.png') | |
cuil = findTemplateInImage(img, os.environ['GAN_RECOG_WORKDIR'] + '/cuil.template.png') | |
# CUIL | |
############################################################################## | |
cuilImage = img[cuil[1]+15:cuil[1]+40, cuil[0]+150:cuil[0]+270] | |
cuilImageName = 'cuil.png' | |
cv2.imwrite(cuilImageName, cuilImage) | |
# cv2.imshow(cuilImageName, cuilImage) | |
# cv2.waitKey(0) | |
print('written cuil') | |
# SUBTOTAL | |
############################################################################## | |
subtotalImage = img[subtotal[1]+15:subtotal[1]+40, subtotal[0]+160:subtotal[0]+240] | |
subtotalImageName = 'subtotal.png' | |
cv2.imwrite(subtotalImageName, subtotalImage) | |
# cv2.imshow(subtotalImageName, subtotalImage) | |
# cv2.waitKey(0) | |
print('written subtotal') | |
# cv2.destroyAllWindows() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/bin/bash | |
set -xeuo pipefail | |
export GAN_RECOG_WORKDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | |
function detect_single_recipe() { | |
echo "Translating $1" | |
TEMPDIR="$(mktemp -d --suffix=ganancias)" | |
cp "$1" "$TEMPDIR" | |
cd "$TEMPDIR" | |
python "$GAN_RECOG_WORKDIR/detect.py" "$1" | |
convert -units PixelsPerInch "cuil.png" -resample 300 "cuil.png" | |
convert -units PixelsPerInch "subtotal.png" -resample 300 "subtotal.png" | |
CUIL=$(tesseract "cuil.png" stdout | sed 's/[ \n]//g') | |
SUBTOTAL=$(tesseract "subtotal.png" stdout | sed 's/[ \n]//g') | |
cd - | |
rm -rf "$TEMPDIR" | |
echo "$SUBTOTAL" > "$CUIL.txt" | |
} | |
PDF_NAME=$(realpath "${1}") | |
WORKING_DIRECTORY=${1%.*} | |
mkdir -p "$WORKING_DIRECTORY" | |
cd "$WORKING_DIRECTORY" | |
pdftk "$PDF_NAME" burst | |
parallel --eta -j 4 'convert -density 150 {} -quality 90 {.}.png' ::: pg*.pdf | |
export -f detect_single_recipe | |
parallel --eta -j 4 'detect_single_recipe {}' ::: pg*.png |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment