Created
February 3, 2018 14:28
-
-
Save Deepayan137/e91cd22f309e84846524627eefb5a5eb to your computer and use it in GitHub Desktop.
This code returns the text output for a given document image/images
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import subprocess | |
import glob | |
import numpy as np | |
import cv2 | |
import sys | |
import shutil | |
import pdb | |
def segmentation(path): | |
for root, dirs, files in os.walk(path): | |
print(root, dirs, files) | |
for d in dirs: | |
files = os.listdir(os.path.join(root,d)) | |
for each_file in files: | |
print (os.path.join(root,d,each_file)) | |
subprocess.call('./j-layout'+' '+os.path.join(root,d,each_file),shell=True) | |
os.remove(os.path.join(root,d,each_file)+'.blocks.txt') | |
os.remove(os.path.join(root,d,each_file)+'.words.txt') | |
get_features(root,d) | |
#multiocr('/data5/deepayan/webocr/task/') | |
def get_features(root,di): | |
ImageFiles = glob.glob(os.path.join(root,di) + "/*.png") | |
print (root) | |
ann_folder = ('annotated_images') | |
if os.path.exists(root + ann_folder) == False: | |
os.mkdir(root + ann_folder) | |
feature_folder= ('features') | |
if os.path.exists(root + feature_folder) == False: | |
os.mkdir(root + feature_folder) | |
print("Processing...") | |
count= 0 | |
#pdb.set_trace() | |
for each_ImageFile in ImageFiles: | |
count+=1 | |
#pdb.set_trace() | |
#image_name = ImagePath.split('/')[-1] | |
PageNo = each_ImageFile.split('-')[-1].split('.')[0] | |
text_file = os.path.join(each_ImageFile+'.lines.txt') | |
ImagePath = each_ImageFile | |
image_name = ImagePath.split('/')[-1] | |
print (image_name) | |
featureFile = open(os.path.join(root,feature_folder,image_name)+ '_Features.txt', 'a') | |
try: | |
image = cv2.imread(ImagePath) | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
(rows, cols, chans) = image.shape | |
data = np.loadtxt(text_file) | |
for i in range(len(data)): | |
x1 = int(data[i][0]) | |
y1 = int(data[i][1]) | |
x2 = int(data[i][2]+x1) | |
y2 = int(data[i][3]+y1) | |
font = cv2.FONT_HERSHEY_SIMPLEX | |
# cv2.putText(image, str(LineNo), (xcoordoflinenumber,ycoordoflinenumber), font, 1, (0, 255, 0), 2) | |
cv2.putText(image, str(i), (x1 - 50, y1 + 25), font, 1, (0, 255, 0), 2) | |
cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2) | |
line_crop = thresh1[int(y1):int(y2), int(x1):int(x2)] | |
cv2.imwrite(os.path.join(root,ann_folder,image_name), image) | |
newHeight = 32 | |
aspectRatio = (float(x2 - x1) / float(y2 - y1)) | |
newWidth = int(np.ceil(aspectRatio * newHeight)) | |
try: | |
resized_image = cv2.resize(line_crop, (int(newWidth), int(newHeight)), interpolation=cv2.INTER_AREA) | |
# cv2.imshow('image',line_crop) | |
# cv2.imshow('image2', resized_image) | |
# cv2.waitKey(0) | |
(cropped_rows, cropped_cols) = resized_image.shape | |
except Exception as e: | |
print (int(newWidth), int(newHeight)) | |
print (str((x2 - x1)) + '\n' + str(y2 - y1)) | |
pixels = [] | |
for r in range(cropped_rows): | |
for c in range(cropped_cols): | |
pixel = resized_image[r, c] | |
if pixel == 255: | |
pixels.append(str(1)) | |
else: | |
pixels.append(str(0)) | |
A = np.array(pixels) | |
B = np.reshape(A, (-1, cropped_cols)) | |
b = ' '.join(pixels) | |
featureFile.write("===Begin==" + "\n") | |
featureFile.write("TAG:" + str(dir) + '/' + str(PageNo) + '_' + str(i) + '\n') | |
featureFile.write("TRUTH:" + ' ' + '\n') | |
featureFile.write("FEATURE:" + str(i) + ' ' + str(newHeight) + ' ' + str(newWidth) + ' ' + b + '\n') | |
featureFile.write("==END===" + '\n') | |
except Exception as e: | |
print (e) | |
featureFile.close() | |
#pdb.set_trace() | |
featureFiles = os.listdir(os.path.join(root,feature_folder)) | |
language = sys.argv[1] | |
output_text_folder = ('OCR_text') | |
if os.path.exists(root + output_text_folder) == False: | |
os.mkdir(root + output_text_folder) | |
for each_featureFile in featureFiles: | |
output_textFile = each_featureFile.split('.png_')[0]+'.output.txt' | |
#get_ocr_output(each_featureFile,language,output_textFile) | |
subprocess.call('./runBLSTM_new.sh' + ' ' + language + ' ' + os.path.join(root,feature_folder,each_featureFile ) + ' ' + output_textFile,shell=True) | |
shutil.move(output_textFile,output_text_folder) | |
#get_features('task/','sanskrit/') | |
def ocr_output(): | |
language = sys.argv[1] | |
feature_folder_path = sys.argv[2] | |
output_text_folder = sys.argv[3] | |
#book_name = sys.argv[4] | |
root = os.path.dirname(feature_folder_path) | |
#feature_folder_path = os.path.join('/data5/deepayan/webocr/',feature_folder_path) | |
featureFiles = os.listdir(feature_folder_path) | |
#pdb.set_trace() | |
if os.path.exists(output_text_folder) == False: | |
os.mkdir(output_text_folder) | |
for each_featureFile in featureFiles: | |
output_textFile = each_featureFile.split('.png_')[0] + '.output.txt' | |
# get_ocr_output(each_featureFile,language,output_textFile) | |
subprocess.call('./runBLSTM_new.sh' + ' ' + language + ' ' + os.path.join(feature_folder_path, each_featureFile) | |
+ ' ' + output_textFile, shell=True) | |
shutil.move(output_textFile,os.path.join(output_text_folder)) | |
ocr_output() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
ocr_output will return a text file containing the recognized text.