Skip to content

Instantly share code, notes, and snippets.

@Soumi7
Created March 2, 2021 17:34
Show Gist options
  • Save Soumi7/f0d06804db2f3725387289f640b16598 to your computer and use it in GitHub Desktop.
Save Soumi7/f0d06804db2f3725387289f640b16598 to your computer and use it in GitHub Desktop.
from flask import Flask, request, jsonify, render_template, url_for, make_response
from werkzeug.utils import secure_filename
import os
import json
import numpy as np
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = "./imgdir"
import numpy as np
import cv2
import pandas as pd
import pytesseract
import io
from PIL import Image
@app.route('/')
def home():
return render_template('index.html')
@app.route('/predict_api', methods=["GET","POST"])
def list_post():
file = request.files['file']
npimg = np.fromfile(file, np.uint8)
img = cv2.imdecode(npimg, 0)
thresh,img_bin = cv2.threshold(img,128,255,cv2.THRESH_BINARY)
img_bin = 255-img_bin
img_bin1 = 255-img
thresh1,img_bin1_otsu = cv2.threshold(img_bin1,128,255,cv2.THRESH_OTSU)
img_bin2 = 255-img
thresh1,img_bin_otsu = cv2.threshold(img_bin2,128,255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, np.array(img).shape[1]//100))
eroded_image = cv2.erode(img_bin_otsu, vertical_kernel, iterations=3)
vertical_lines = cv2.dilate(eroded_image, vertical_kernel, iterations=3)
hor_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (np.array(img).shape[1]//100, 1))
horizontal_lines = cv2.erode(img_bin, hor_kernel, iterations=5)
horizontal_lines = cv2.dilate(horizontal_lines, hor_kernel, iterations=5)
vertical_horizontal_lines = cv2.addWeighted(vertical_lines, 0.5, horizontal_lines, 0.5, 0.0)
vertical_horizontal_lines = cv2.erode(~vertical_horizontal_lines, kernel, iterations=3)
thresh, vertical_horizontal_lines = cv2.threshold(vertical_horizontal_lines,128,255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
bitxor = cv2.bitwise_xor(img,vertical_horizontal_lines)
bitnot = cv2.bitwise_not(bitxor)
contours, hierarchy = cv2.findContours(vertical_horizontal_lines, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
boundingBoxes = [cv2.boundingRect(contour) for contour in contours]
(contours, boundingBoxes) = zip(*sorted(zip(contours, boundingBoxes),key=lambda x:x[1][1]))
boxes = []
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
if (w<1000 and h<500):
image = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
boxes.append([x,y,w,h])
rows=[]
columns=[]
heights = [boundingBoxes[i][3] for i in range(len(boundingBoxes))]
mean = np.mean(heights)
print(mean)
columns.append(boxes[0])
previous=boxes[0]
for i in range(1,len(boxes)):
if(boxes[i][1]<=previous[1]+mean/2):
columns.append(boxes[i])
previous=boxes[i]
if(i==len(boxes)-1):
rows.append(columns)
else:
rows.append(columns)
columns=[]
previous = boxes[i]
columns.append(boxes[i])
# print("Rows")
for row in rows:
print(row)
total_cells=0
for i in range(len(row)):
if len(row[i]) > total_cells:
total_cells = len(row[i])
# print(total_cells)
center = [int(rows[i][j][0]+rows[i][j][2]/2) for j in range(len(rows[i])) if rows[0]]
# print(center)
center=np.array(center)
center.sort()
# print(center)
boxes_list = []
for i in range(len(rows)):
l=[]
for k in range(total_cells):
l.append([])
for j in range(len(rows[i])):
diff = abs(center-(rows[i][j][0]+rows[i][j][2]/4))
minimum = min(diff)
indexing = list(diff).index(minimum)
l[indexing].append(rows[i][j])
boxes_list.append(l)
# for box in boxes_list:
# print(box)
dataframe_final=[]
for i in range(len(boxes_list)):
for j in range(len(boxes_list[i])):
s=''
if(len(boxes_list[i][j])==0):
dataframe_final.append(' ')
else:
for k in range(len(boxes_list[i][j])):
y,x,w,h = boxes_list[i][j][k][0],boxes_list[i][j][k][1], boxes_list[i][j][k][2],boxes_list[i][j][k][3]
roi = bitnot[x:x+h, y:y+w]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 1))
border = cv2.copyMakeBorder(roi,2,2,2,2, cv2.BORDER_CONSTANT,value=[255,255])
resizing = cv2.resize(border, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
dilation = cv2.dilate(resizing, kernel,iterations=1)
erosion = cv2.erode(dilation, kernel,iterations=2)
out = pytesseract.image_to_string(erosion)
if(len(out)==0):
out = pytesseract.image_to_string(erosion)
s = s +" "+ out
dataframe_final.append(s)
# print(dataframe_final)
arr = np.array(dataframe_final)
# print(arr)
dataframe = pd.DataFrame(arr.reshape(len(rows), total_cells))
data = dataframe.style.set_properties(align="left")
dataframe.to_csv("output.csv")
dataframe=pd.read_csv("output.csv")
res = {}
# dataframe = pd.DataFrame(res)
parsed = json.loads(dataframe.to_json(orient="split"))
s = json.dumps(parsed, indent=4)
print(s)
return jsonify(result= json.dumps(parsed, indent=4))
@app.route('/',methods=['POST','GET'])
def predict():
#for HTML GUI rendering
file = request.files['file']
print(file.filename)
if file.filename.split(".")[-1]!="png":
return render_template('Wrong_file_type.html')
npimg = np.fromfile(file, np.uint8)
img = cv2.imdecode(npimg, 0)
# f.save(os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(f.filename)))
# img = cv2.imread(os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(f.filename)))
# img = cv2.imdecode(img, 0)
thresh,img_bin = cv2.threshold(img,128,255,cv2.THRESH_BINARY)
img_bin = 255-img_bin
img_bin1 = 255-img
thresh1,img_bin1_otsu = cv2.threshold(img_bin1,128,255,cv2.THRESH_OTSU)
img_bin2 = 255-img
thresh1,img_bin_otsu = cv2.threshold(img_bin2,128,255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, np.array(img).shape[1]//100))
eroded_image = cv2.erode(img_bin_otsu, vertical_kernel, iterations=3)
vertical_lines = cv2.dilate(eroded_image, vertical_kernel, iterations=3)
hor_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (np.array(img).shape[1]//100, 1))
horizontal_lines = cv2.erode(img_bin, hor_kernel, iterations=5)
horizontal_lines = cv2.dilate(horizontal_lines, hor_kernel, iterations=5)
vertical_horizontal_lines = cv2.addWeighted(vertical_lines, 0.5, horizontal_lines, 0.5, 0.0)
vertical_horizontal_lines = cv2.erode(~vertical_horizontal_lines, kernel, iterations=3)
thresh, vertical_horizontal_lines = cv2.threshold(vertical_horizontal_lines,128,255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
bitxor = cv2.bitwise_xor(img,vertical_horizontal_lines)
bitnot = cv2.bitwise_not(bitxor)
contours, hierarchy = cv2.findContours(vertical_horizontal_lines, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
boundingBoxes = [cv2.boundingRect(contour) for contour in contours]
(contours, boundingBoxes) = zip(*sorted(zip(contours, boundingBoxes),key=lambda x:x[1][1]))
boxes = []
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
if (w<1000 and h<500):
image = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
boxes.append([x,y,w,h])
rows=[]
columns=[]
heights = [boundingBoxes[i][3] for i in range(len(boundingBoxes))]
mean = np.mean(heights)
print(mean)
columns.append(boxes[0])
previous=boxes[0]
for i in range(1,len(boxes)):
if(boxes[i][1]<=previous[1]+mean/2):
columns.append(boxes[i])
previous=boxes[i]
if(i==len(boxes)-1):
rows.append(columns)
else:
rows.append(columns)
columns=[]
previous = boxes[i]
columns.append(boxes[i])
# print("Rows")
for row in rows:
print(row)
total_cells=0
for i in range(len(row)):
if len(row[i]) > total_cells:
total_cells = len(row[i])
# print(total_cells)
center = [int(rows[i][j][0]+rows[i][j][2]/2) for j in range(len(rows[i])) if rows[0]]
# print(center)
center=np.array(center)
center.sort()
# print(center)
boxes_list = []
for i in range(len(rows)):
l=[]
for k in range(total_cells):
l.append([])
for j in range(len(rows[i])):
diff = abs(center-(rows[i][j][0]+rows[i][j][2]/4))
minimum = min(diff)
indexing = list(diff).index(minimum)
l[indexing].append(rows[i][j])
boxes_list.append(l)
# for box in boxes_list:
# print(box)
dataframe_final=[]
for i in range(len(boxes_list)):
for j in range(len(boxes_list[i])):
s=''
if(len(boxes_list[i][j])==0):
dataframe_final.append(' ')
else:
for k in range(len(boxes_list[i][j])):
y,x,w,h = boxes_list[i][j][k][0],boxes_list[i][j][k][1], boxes_list[i][j][k][2],boxes_list[i][j][k][3]
roi = bitnot[x:x+h, y:y+w]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 1))
border = cv2.copyMakeBorder(roi,2,2,2,2, cv2.BORDER_CONSTANT,value=[255,255])
resizing = cv2.resize(border, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
dilation = cv2.dilate(resizing, kernel,iterations=1)
erosion = cv2.erode(dilation, kernel,iterations=2)
out = pytesseract.image_to_string(erosion)
if(len(out)==0):
out = pytesseract.image_to_string(erosion)
s = s +" "+ out
dataframe_final.append(s)
# print(dataframe_final)
arr = np.array(dataframe_final)
# print(arr)
dataframe = pd.DataFrame(arr.reshape(len(rows), total_cells))
data = dataframe.style.set_properties(align="left")
resp = make_response(dataframe.to_csv())
resp.headers["Content-Disposition"] = "attachment; filename=table.csv"
resp.headers["Content-Type"] = "text/csv"
return resp
if __name__ == '__main__':
app.run(host='0.0.0.0',port=5000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment