-
-
Save SatyakiDe2019/ec543b5892db4c05de10b80266358d71 to your computer and use it in GitHub Desktop.
This is the main class of python script that will invoke the extraction of texts from a WebCAM.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################## | |
#### Written By: SATYAKI DE #### | |
#### Written On: 22-Jul-2022 #### | |
#### Modified On 25-Jul-2022 #### | |
#### #### | |
#### Objective: This is the main class of #### | |
#### python script that will invoke the #### | |
#### extraction of texts from a WebCAM. #### | |
#### #### | |
################################################## | |
# Importing necessary packages | |
from clsConfig import clsConfig as cf | |
from imutils.object_detection import non_max_suppression | |
import numpy as np | |
import pytesseract | |
import imutils | |
import time | |
import cv2 | |
import time | |
############################################### | |
### Global Section ### | |
############################################### | |
# Two output layer names for the text detector model | |
lNames = cf.conf['LAYER_DET'] | |
# Tesseract OCR text param values | |
strVal = "-l " + str(cf.conf['LANG']) + " --oem " + str(cf.conf['OEM_VAL']) + " --psm " + str(cf.conf['PSM_VAL']) + "" | |
config = (strVal) | |
############################################### | |
### End of Global Section ### | |
############################################### | |
class clsReadingTextFromStream: | |
def __init__(self): | |
self.sep = str(cf.conf['SEP']) | |
self.Curr_Path = str(cf.conf['INIT_PATH']) | |
self.CacheL = int(cf.conf['CACHE_LIM']) | |
self.modelPath = str(cf.conf['MODEL_PATH']) + str(cf.conf['MODEL_FILE_NAME']) | |
self.minConf = float(cf.conf['MIN_CONFIDENCE']) | |
self.wt = int(cf.conf['WIDTH']) | |
self.ht = int(cf.conf['HEIGHT']) | |
self.pad = float(cf.conf['PADDING']) | |
self.title = str(cf.conf['TITLE']) | |
self.Otitle = str(cf.conf['ORIG_TITLE']) | |
self.drawTag = cf.conf['DRAW_TAG'] | |
self.aRange = int(cf.conf['ASCII_RANGE']) | |
self.sParam = cf.conf['SUBTRACT_PARAM'] | |
def findBoundBox(self, boxes, res, rW, rH, orig, origW, origH, pad): | |
try: | |
# Loop over the bounding boxes | |
for (spX, spY, epX, epY) in boxes: | |
# Scale the bounding box coordinates based on the respective | |
# ratios | |
spX = int(spX * rW) | |
spY = int(spY * rH) | |
epX = int(epX * rW) | |
epY = int(epY * rH) | |
# To obtain a better OCR of the text we can potentially | |
# apply a bit of padding surrounding the bounding box. | |
# And, computing the deltas in both the x and y directions | |
dX = int((epX - spX) * pad) | |
dY = int((epY - spY) * pad) | |
# Apply padding to each side of the bounding box, respectively | |
spX = max(0, spX - dX) | |
spY = max(0, spY - dY) | |
epX = min(origW, epX + (dX * 2)) | |
epY = min(origH, epY + (dY * 2)) | |
# Extract the actual padded ROI | |
roi = orig[spY:epY, spX:epX] | |
# Choose the proper OCR Config | |
text = pytesseract.image_to_string(roi, config=config) | |
# Add the bounding box coordinates and OCR'd text to the list | |
# of results | |
res.append(((spX, spY, epX, epY), text)) | |
# Sort the results bounding box coordinates from top to bottom | |
res = sorted(res, key=lambda r:r[0][1]) | |
return res | |
except Exception as e: | |
x = str(e) | |
print(x) | |
return res | |
def predictText(self, imgScore, imgGeo): | |
try: | |
minConf = self.minConf | |
# Initializing the bounding box rectangles & confidence score by | |
# extracting the rows & columns from the imgScore volume. | |
(numRows, numCols) = imgScore.shape[2:4] | |
rects = [] | |
confScore = [] | |
for y in range(0, numRows): | |
# Extract the imgScore probabilities to derive potential | |
# bounding box coordinates that surround text | |
imgScoreData = imgScore[0, 0, y] | |
xVal0 = imgGeo[0, 0, y] | |
xVal1 = imgGeo[0, 1, y] | |
xVal2 = imgGeo[0, 2, y] | |
xVal3 = imgGeo[0, 3, y] | |
anglesData = imgGeo[0, 4, y] | |
for x in range(0, numCols): | |
# If our score does not have sufficient probability, | |
# ignore it | |
if imgScoreData[x] < minConf: | |
continue | |
# Compute the offset factor as our resulting feature | |
# maps will be 4x smaller than the input frame | |
(offX, offY) = (x * 4.0, y * 4.0) | |
# Extract the rotation angle for the prediction and | |
# then compute the sin and cosine | |
angle = anglesData[x] | |
cos = np.cos(angle) | |
sin = np.sin(angle) | |
# Derive the width and height of the bounding box from | |
# imgGeo | |
h = xVal0[x] + xVal2[x] | |
w = xVal1[x] + xVal3[x] | |
# Compute both the starting and ending (x, y)-coordinates | |
# for the text prediction bounding box | |
epX = int(offX + (cos * xVal1[x]) + (sin * xVal2[x])) | |
epY = int(offY - (sin * xVal1[x]) + (cos * xVal2[x])) | |
spX = int(epX - w) | |
spY = int(epY - h) | |
# Adding bounding box coordinates and probability score | |
# to the respective lists | |
rects.append((spX, spY, epX, epY)) | |
confScore.append(imgScoreData[x]) | |
# return a tuple of the bounding boxes and associated confScore | |
return (rects, confScore) | |
except Exception as e: | |
x = str(e) | |
print(x) | |
rects = [] | |
confScore = [] | |
return (rects, confScore) | |
def processStream(self, debugInd, var): | |
try: | |
sep = self.sep | |
Curr_Path = self.Curr_Path | |
CacheL = self.CacheL | |
modelPath = self.modelPath | |
minConf = self.minConf | |
wt = self.wt | |
ht = self.ht | |
pad = self.pad | |
title = self.title | |
Otitle = self.Otitle | |
drawTag = self.drawTag | |
aRange = self.aRange | |
sParam = self.sParam | |
val = 0 | |
# Initialize the video stream and allow the camera sensor to warm up | |
print("[INFO] Starting video stream...") | |
cap = cv2.VideoCapture(0) | |
# Loading the pre-trained text detector | |
print("[INFO] Loading Text Detector...") | |
net = cv2.dnn.readNet(modelPath) | |
# Loop over the frames from the video stream | |
while True: | |
try: | |
# Grab the frame from our video stream and resize it | |
success, frame = cap.read() | |
orig = frame.copy() | |
(origH, origW) = frame.shape[:2] | |
# Setting new width and height and then determine the ratio in change | |
# for both the width and height | |
(newW, newH) = (wt, ht) | |
rW = origW / float(newW) | |
rH = origH / float(newH) | |
# Resize the frame and grab the new frame dimensions | |
frame = cv2.resize(frame, (newW, newH)) | |
(H, W) = frame.shape[:2] | |
# Construct a blob from the frame and then perform a forward pass of | |
# the model to obtain the two output layer sets | |
blob = cv2.dnn.blobFromImage(frame, 1.0, (W, H), sParam, swapRB=True, crop=False) | |
net.setInput(blob) | |
(confScore, imgGeo) = net.forward(lNames) | |
# Decode the predictions, then apply non-maxima suppression to | |
# suppress weak, overlapping bounding boxes | |
(rects, confidences) = self.predictText(confScore, imgGeo) | |
boxes = non_max_suppression(np.array(rects), probs=confidences) | |
# Initialize the list of results | |
res = [] | |
# Getting BoundingBox boundaries | |
res = self.findBoundBox(boxes, res, rW, rH, orig, origW, origH, pad) | |
for ((spX, spY, epX, epY), text) in res: | |
# Display the text OCR by using Tesseract APIs | |
print("Reading Text::") | |
print("=" *60) | |
print(text) | |
print("=" *60) | |
# Removing the non-ASCII text so it can draw the text on the frame | |
# using OpenCV, then draw the text and a bounding box surrounding | |
# the text region of the input frame | |
text = "".join([c if ord(c) < aRange else "" for c in text]).strip() | |
output = orig.copy() | |
cv2.rectangle(output, (spX, spY), (epX, epY), drawTag, 2) | |
cv2.putText(output, text, (spX, spY - 20), cv2.FONT_HERSHEY_SIMPLEX, 1.2, drawTag, 3) | |
# Show the output frame | |
cv2.imshow(title, output) | |
#cv2.imshow(Otitle, frame) | |
# If the `q` key was pressed, break from the loop | |
if cv2.waitKey(1) == ord('q'): | |
break | |
val = 0 | |
except Exception as e: | |
x = str(e) | |
print(x) | |
val = 1 | |
# Performing cleanup at the end | |
cap.release() | |
cv2.destroyAllWindows() | |
return val | |
except Exception as e: | |
x = str(e) | |
print('Error:', x) | |
return 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment