Skip to content

Instantly share code, notes, and snippets.

@Deepayan137
Created February 3, 2018 14:28
Show Gist options
  • Save Deepayan137/e91cd22f309e84846524627eefb5a5eb to your computer and use it in GitHub Desktop.
Save Deepayan137/e91cd22f309e84846524627eefb5a5eb to your computer and use it in GitHub Desktop.
This code returns the text output for a given document image/images
import os
import subprocess
import glob
import numpy as np
import cv2
import sys
import shutil
import pdb
def segmentation(path):
for root, dirs, files in os.walk(path):
print(root, dirs, files)
for d in dirs:
files = os.listdir(os.path.join(root,d))
for each_file in files:
print (os.path.join(root,d,each_file))
subprocess.call('./j-layout'+' '+os.path.join(root,d,each_file),shell=True)
os.remove(os.path.join(root,d,each_file)+'.blocks.txt')
os.remove(os.path.join(root,d,each_file)+'.words.txt')
get_features(root,d)
#multiocr('/data5/deepayan/webocr/task/')
def get_features(root,di):
ImageFiles = glob.glob(os.path.join(root,di) + "/*.png")
print (root)
ann_folder = ('annotated_images')
if os.path.exists(root + ann_folder) == False:
os.mkdir(root + ann_folder)
feature_folder= ('features')
if os.path.exists(root + feature_folder) == False:
os.mkdir(root + feature_folder)
print("Processing...")
count= 0
#pdb.set_trace()
for each_ImageFile in ImageFiles:
count+=1
#pdb.set_trace()
#image_name = ImagePath.split('/')[-1]
PageNo = each_ImageFile.split('-')[-1].split('.')[0]
text_file = os.path.join(each_ImageFile+'.lines.txt')
ImagePath = each_ImageFile
image_name = ImagePath.split('/')[-1]
print (image_name)
featureFile = open(os.path.join(root,feature_folder,image_name)+ '_Features.txt', 'a')
try:
image = cv2.imread(ImagePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
(rows, cols, chans) = image.shape
data = np.loadtxt(text_file)
for i in range(len(data)):
x1 = int(data[i][0])
y1 = int(data[i][1])
x2 = int(data[i][2]+x1)
y2 = int(data[i][3]+y1)
font = cv2.FONT_HERSHEY_SIMPLEX
# cv2.putText(image, str(LineNo), (xcoordoflinenumber,ycoordoflinenumber), font, 1, (0, 255, 0), 2)
cv2.putText(image, str(i), (x1 - 50, y1 + 25), font, 1, (0, 255, 0), 2)
cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)
line_crop = thresh1[int(y1):int(y2), int(x1):int(x2)]
cv2.imwrite(os.path.join(root,ann_folder,image_name), image)
newHeight = 32
aspectRatio = (float(x2 - x1) / float(y2 - y1))
newWidth = int(np.ceil(aspectRatio * newHeight))
try:
resized_image = cv2.resize(line_crop, (int(newWidth), int(newHeight)), interpolation=cv2.INTER_AREA)
# cv2.imshow('image',line_crop)
# cv2.imshow('image2', resized_image)
# cv2.waitKey(0)
(cropped_rows, cropped_cols) = resized_image.shape
except Exception as e:
print (int(newWidth), int(newHeight))
print (str((x2 - x1)) + '\n' + str(y2 - y1))
pixels = []
for r in range(cropped_rows):
for c in range(cropped_cols):
pixel = resized_image[r, c]
if pixel == 255:
pixels.append(str(1))
else:
pixels.append(str(0))
A = np.array(pixels)
B = np.reshape(A, (-1, cropped_cols))
b = ' '.join(pixels)
featureFile.write("===Begin==" + "\n")
featureFile.write("TAG:" + str(dir) + '/' + str(PageNo) + '_' + str(i) + '\n')
featureFile.write("TRUTH:" + ' ' + '\n')
featureFile.write("FEATURE:" + str(i) + ' ' + str(newHeight) + ' ' + str(newWidth) + ' ' + b + '\n')
featureFile.write("==END===" + '\n')
except Exception as e:
print (e)
featureFile.close()
#pdb.set_trace()
featureFiles = os.listdir(os.path.join(root,feature_folder))
language = sys.argv[1]
output_text_folder = ('OCR_text')
if os.path.exists(root + output_text_folder) == False:
os.mkdir(root + output_text_folder)
for each_featureFile in featureFiles:
output_textFile = each_featureFile.split('.png_')[0]+'.output.txt'
#get_ocr_output(each_featureFile,language,output_textFile)
subprocess.call('./runBLSTM_new.sh' + ' ' + language + ' ' + os.path.join(root,feature_folder,each_featureFile ) + ' ' + output_textFile,shell=True)
shutil.move(output_textFile,output_text_folder)
#get_features('task/','sanskrit/')
def ocr_output():
language = sys.argv[1]
feature_folder_path = sys.argv[2]
output_text_folder = sys.argv[3]
#book_name = sys.argv[4]
root = os.path.dirname(feature_folder_path)
#feature_folder_path = os.path.join('/data5/deepayan/webocr/',feature_folder_path)
featureFiles = os.listdir(feature_folder_path)
#pdb.set_trace()
if os.path.exists(output_text_folder) == False:
os.mkdir(output_text_folder)
for each_featureFile in featureFiles:
output_textFile = each_featureFile.split('.png_')[0] + '.output.txt'
# get_ocr_output(each_featureFile,language,output_textFile)
subprocess.call('./runBLSTM_new.sh' + ' ' + language + ' ' + os.path.join(feature_folder_path, each_featureFile)
+ ' ' + output_textFile, shell=True)
shutil.move(output_textFile,os.path.join(output_text_folder))
ocr_output()
@Deepayan137
Copy link
Author

ocr_output will return a text file containing the recognized text.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment