Skip to content

Instantly share code, notes, and snippets.

@ludat
Created May 5, 2019 01:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ludat/ec6d30a2d8e92c7d0cc6e23dbaf91620 to your computer and use it in GitHub Desktop.
Save ludat/ec6d30a2d8e92c7d0cc6e23dbaf91620 to your computer and use it in GitHub Desktop.
Python 10pines recibo OCR
import cv2
import os
import sys
import numpy as np
def findTemplateInImage(img, templatePath):
template = cv2.imread(templatePath, 0)
# Apply template Matching
res = cv2.matchTemplate(img, template, cv2.TM_SQDIFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
return min_loc
img = cv2.imread(sys.argv[1],0)
subtotal = findTemplateInImage(img, os.environ['GAN_RECOG_WORKDIR'] + '/subtotal.template.png')
cuil = findTemplateInImage(img, os.environ['GAN_RECOG_WORKDIR'] + '/cuil.template.png')
# CUIL
##############################################################################
cuilImage = img[cuil[1]+15:cuil[1]+40, cuil[0]+150:cuil[0]+270]
cuilImageName = 'cuil.png'
cv2.imwrite(cuilImageName, cuilImage)
# cv2.imshow(cuilImageName, cuilImage)
# cv2.waitKey(0)
print('written cuil')
# SUBTOTAL
##############################################################################
subtotalImage = img[subtotal[1]+15:subtotal[1]+40, subtotal[0]+160:subtotal[0]+240]
subtotalImageName = 'subtotal.png'
cv2.imwrite(subtotalImageName, subtotalImage)
# cv2.imshow(subtotalImageName, subtotalImage)
# cv2.waitKey(0)
print('written subtotal')
# cv2.destroyAllWindows()
#/bin/bash
set -xeuo pipefail
export GAN_RECOG_WORKDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
function detect_single_recipe() {
echo "Translating $1"
TEMPDIR="$(mktemp -d --suffix=ganancias)"
cp "$1" "$TEMPDIR"
cd "$TEMPDIR"
python "$GAN_RECOG_WORKDIR/detect.py" "$1"
convert -units PixelsPerInch "cuil.png" -resample 300 "cuil.png"
convert -units PixelsPerInch "subtotal.png" -resample 300 "subtotal.png"
CUIL=$(tesseract "cuil.png" stdout | sed 's/[ \n]//g')
SUBTOTAL=$(tesseract "subtotal.png" stdout | sed 's/[ \n]//g')
cd -
rm -rf "$TEMPDIR"
echo "$SUBTOTAL" > "$CUIL.txt"
}
PDF_NAME=$(realpath "${1}")
WORKING_DIRECTORY=${1%.*}
mkdir -p "$WORKING_DIRECTORY"
cd "$WORKING_DIRECTORY"
pdftk "$PDF_NAME" burst
parallel --eta -j 4 'convert -density 150 {} -quality 90 {.}.png' ::: pg*.pdf
export -f detect_single_recipe
parallel --eta -j 4 'detect_single_recipe {}' ::: pg*.png
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment