Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save elnazsn1988/f3c184bd55ec452cb7b9da4590c41bfe to your computer and use it in GitHub Desktop.
Save elnazsn1988/f3c184bd55ec452cb7b9da4590c41bfe to your computer and use it in GitHub Desktop.
Code to use Google Vision API to create a text object map from Structured Documents
import os
import cv2
from itertools import chain
import base64
import pandas as pd
import requests
import json
def ocr_using_google_api(image_path, request_url):
'''
This function uses Google Vision API for Text Detection
Args :
image_path : Input image path
Returns:
pd.DataFrame having coordinates of each text box along with detected
text inside it.
'''
lstr_filename, str_extension = os.path.splitext(str(image_path))
image_arr = cv2.imread(image_path)
_, image_buffer = cv2.imencode("."+str_extension,
image_arr)
int_respose_code = 0
json_request_header = {
'content-type': 'application/json',
'Accept-Charset': 'UTF-8'
}
str_encode_image = base64.b64encode(image_buffer).decode()
json_request_payload = {'requests':
[
{
"image":
{
'content':str_encode_image
},
'features':
[
{
'type': 'DOCUMENT_TEXT_DETECTION'
}
],
}
]
}
list_block_coordinates = []
list_block_word_coordinates = []
list_each_word_coordinate = []
str_http_response = \
requests.post(
request_url,
data=json.dumps(json_request_payload),
headers=json_request_header,
verify=False
)
int_respose_code = str_http_response.status_code
if int_respose_code != 200:
return list_block_coordinates
else:
json_response_data = json.loads(str_http_response.text)
if json_response_data['responses'][0]:
list_bounding_boxes = \
json_response_data['responses'][0]['fullTextAnnotation']\
['pages'][0]['blocks']
list_vertices = \
[boundingBox['boundingBox'] for boundingBox in \
list_bounding_boxes if 'boundingBox' in boundingBox]
list_block_coordinates = \
[list(chain(*[[x['x'], x['y']] for x in i['vertices']])) \
for i in list_vertices]
list_block_words = []
for bounding_box in list_bounding_boxes:
list_paragraphs = bounding_box["paragraphs"]
str_word = ""
list_bounding_box = []
for paragraphs in list_paragraphs:
list_words = paragraphs['words']
for words in list_words:
list_vertices = []
str_text = ""
llst_symbols = words['symbols']
list_bounding_box.append(words['boundingBox'])
for symbols in llst_symbols:
str_text = (str_text + symbols['text']).strip()
list_vertices.append(symbols['boundingBox'])
str_word = (str_word + " " + str_text).strip()
list_word_coords = \
list(chain(*[[x['x'], x['y']] for x in \
words['boundingBox']['vertices']]))
list_word_coords.insert(0, str_text)
list_each_word_coordinate.append(list_word_coords)
list_word_coordinates = \
[list(chain(*[[x['x'], x['y']] for x in \
i['vertices']])) for i in list_bounding_box]
list_block_words.append(str_word)
list_block_word_coordinates.append(list_word_coordinates)
for int_index, llst_block_coordinate in enumerate(list_block_coordinates):
llst_block_coordinate.insert(0, list_block_words[int_index])
list_word_objects = \
[[min(item[1],item[5]), min(item[2],item[6]),
max(item[1],item[5]), max(item[2],item[6]),
item[0]] for i, item in enumerate(list_each_word_coordinate)]
return df = pd.DataFrame(list_word_objects,
columns=['xmin', 'ymin', 'xmax', 'ymax', 'Object'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment