-
-
Save ahmedtijaninet/1038476b0d9708d0c0c51d537c49f77a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import numpy as np | |
import pytesseract | |
import os | |
import csv | |
import re | |
per = 25 | |
pixelThreshold = 500 | |
""" run ImgKeyPointSelector.py with the query image and get the key points """ | |
roi = [[(1206, 304), (1800, 400), 'text ', 'Nic No'], | |
[(964, 888), (1884, 984), 'text', 'Name']] | |
#set path of the tesseract excecutable | |
pytesseract.pytesseract.tesseract_cmd = "C:/Users/PunsisiK.LOITL-SE03/AppData/Local/Tesseract-OCR/tesseract.exe" | |
""" format data (OPTIONAL) """ | |
def idnoformat(id): | |
if id != '': | |
arr = re.findall(r'\b\d+\b', id) | |
if len(arr) == 0: | |
try: | |
arr = str(re.search(r'\d+', id).group()) | |
pos = id.find(arr[0]) | |
arr = id[pos:(pos+12)] | |
except Exception : | |
arr = 'N/A' | |
return arr | |
else: | |
return arr[0] | |
def nameformat(name): | |
if name != '': | |
formattedname = "" | |
arr = str(name).split() | |
for i in arr: | |
if i.isalpha(): | |
formattedname = formattedname + " " + i | |
else: | |
reg = re.sub('[^a-zA-Z]+', '', i) | |
formattedname = formattedname + " " + reg | |
formattedname.lstrip() | |
return formattedname | |
""" read the query image """ | |
imgQ = cv2.imread('QueryImg/New NIC.png') | |
h, w, c = imgQ.shape | |
""" getting the key points and the descriptors of the query image """ | |
orb = cv2.ORB_create(7900) | |
kp1, des1 = orb.detectAndCompute(imgQ, None) | |
""" reading all the images in the NIC folder """ | |
path = 'NIC' #name of the folder | |
myPicList = os.listdir(path) | |
print(myPicList) | |
NICdata = [] | |
for j, y in enumerate(myPicList): | |
img = cv2.imread(path + "/" + y) | |
kp2, des2 = orb.detectAndCompute(img, None) | |
""" using the Brute Force Matcher, the key points of the query image are matched with | |
the key points of the sample images and filters only the good matches among them""" | |
bf = cv2.BFMatcher(cv2.NORM_HAMMING) | |
matches = bf.match(des2, des1) | |
matches.sort(key=lambda x: x.distance) | |
good = matches[:int(len(matches) * (per / 100))] | |
imgMatch = cv2.drawMatches(img, kp2, imgQ, kp1, good[:100], None, flags=2) | |
""" extracts the location of the matched key points of both images and scan the sample image | |
to give an output image similar to the query image """ | |
srcPoints = np.float32([kp2[m.queryIdx].pt for m in good]).reshape(-1, 1, 2) | |
dstPoints = np.float32([kp1[m.trainIdx].pt for m in good]).reshape(-1, 1, 2) | |
M, _ = cv2.findHomography(srcPoints, dstPoints, cv2.RANSAC, 5.0) | |
imgScan = cv2.warpPerspective(img, M, (w, h)) | |
imgShow = imgScan.copy() | |
imgMask = np.zeros_like(imgShow) | |
myData = [] | |
print(f'################## Extracting Data from Form {j} ##################') | |
for x, r in enumerate(roi): | |
""" The points which are defined by the roi above are being masked andhighlighted in | |
rectangels to read the data inside them""" | |
cv2.rectangle(imgMask, (r[0][0], r[0][1]), (r[1][0], r[1][1]), (0, 255, 0), cv2.FILLED) | |
imgShow = cv2.addWeighted(imgShow, 0.99, imgMask, 0.1, 0) | |
imgCrop = imgScan[r[0][1]:r[1][1], r[0][0]:r[1][0]] | |
if r[2] == 'text': | |
print('{} :{}'.format(r[3], pytesseract.image_to_string(imgCrop))) | |
myData.append(pytesseract.image_to_string(imgCrop)) | |
NICdata.append(myData) | |
cv2.imshow(y+"2", imgShow) | |
print(NICdata) | |
""" Writting data to a CSV file (OPTIONAL) """ | |
with open('NicData.csv', 'w', newline='', encoding='utf-16') as f: | |
theWriter = csv.writer(f) | |
theWriter.writerow(['ID', 'Name']) | |
for data in NICdata: | |
if len(data) == 0: | |
theWriter.writerow(["N/A", "N/A"]) | |
elif len(data) == 1: | |
theWriter.writerow([idnoformat(data[0]), "N/A"]) | |
else: | |
theWriter.writerow([idnoformat(data[0]), nameformat(data[1])]) | |
cv2.imshow("Output", imgQ) | |
cv2.waitKey(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment