vinovator/azure_ocr.py

## azure_ocr.py
# azure_ocr.py
# Python 3.5

"""
Python script to analyze and read text from image URLs using Micorost Azure Cognitive services OCR API
"""

from IGNORE import azure_secrets  # To fetch API endpoint and key
import requests
import json

headers = {
    # Request headers.
    "Content-Type": "application/json",
    "Ocp-Apim-Subscription-Key": azure_secrets.api["key1"],
}


def analyze_image_from_image_url(payload):
    """
    Given an image url, analyze the image
    """

    # Query parameters to analyze image
    params = {
    # Request parameters. All of them are optional.
    "visualFeatures": "Categories,Description,Color, Faces",
    "details": "Celebrities,Landmarks",
    "language": "en",
    }

    # https://westcentralus.api.cognitive.microsoft.com/vision/v1.0/analyze"
    resp = requests.post(azure_secrets.api["endpoint"] + "/analyze", json = payload, params = params, headers=headers)
    resp.raise_for_status()
    # print(resp.url)

    parsed = json.loads(resp.text)

    # Text description of the image
    # print(json.dumps(parsed, sort_keys=True, indent=2))
    return parsed["description"]["captions"][0]["text"]


def detect_text_from_image_url(payload):
    """
    Given an image url, detect the text
    """

    # Query parameters to analyze image
    params = {
    # Request parameters. The language setting "unk" means automatically detect the language.
    'language': 'unk',
    'detectOrientation ': 'true',
    }

    # https://westcentralus.api.cognitive.microsoft.com/vision/v1.0/ocr"
    resp = requests.post(azure_secrets.api["endpoint"] + "/ocr", json = payload, params = params, headers=headers)
    resp.raise_for_status()
    # print(resp.url)

    parsed = json.loads(resp.text)

    # Text description of the image
    # print (json.dumps(parsed, sort_keys=True, indent=2))

    result = list()

    for region in parsed["regions"]:
        for line in region["lines"]:
            for word in line["words"]:
                result.extend([word["text"]])

    return " ".join(result)


if __name__ == "__main__":
    """ main block """

    # The URL of a JPEG image to analyze.
    image_analysis_payload = {'url':'https://upload.wikimedia.org/wikipedia/commons/1/12/Broadway_and_Times_Square_by_night.jpg'}
    image_analysis_payload2 = {"url": "http://www.chatelaine.com/wp-content/uploads/2012/07/9f51bbc649ceaa8a58187c454269-660x440.jpg"}
    image_analysis_payload3 = {"url": "http://www.dccomics.com/sites/default/files/GalleryChar_1900x900_MOS_52e05e3fe24a61.04593858.jpg"}
    image_analysis_payload4 = {"url": "http://www.oneindia.com/img/2015/04/24-1429855668-sachincupindiagate-600-jpg.jpg"}  # sachin

    # URL to detect text
    detect_text_payload = {'url':'https://upload.wikimedia.org/wikipedia/commons/thumb/a/af/Atomist_quote_from_Democritus.png/338px-Atomist_quote_from_Democritus.png'}
    detect_text_payload1 = {'url':'http://www.drivingtesttips.biz/images/motorway-sign.jpg'}  # UK motorway board
    detect_text_payload2 = {'url':'http://images.macworld.com/images/reviews/graphics/147246-nofear_original.jpg'}  # Shakespeare

    try:

        # Image analysis from url
        print (analyze_image_from_image_url(image_analysis_payload4))

        # detect text from url
        print (detect_text_from_image_url(detect_text_payload2))


    except Exception as e:
        print(e)
	# azure_ocr.py
	# Python 3.5

	"""
	Python script to analyze and read text from image URLs using Micorost Azure Cognitive services OCR API
	"""

	from IGNORE import azure_secrets # To fetch API endpoint and key
	import requests
	import json

	headers = {
	# Request headers.
	"Content-Type": "application/json",
	"Ocp-Apim-Subscription-Key": azure_secrets.api["key1"],
	}


	def analyze_image_from_image_url(payload):
	"""
	Given an image url, analyze the image
	"""

	# Query parameters to analyze image
	params = {
	# Request parameters. All of them are optional.
	"visualFeatures": "Categories,Description,Color, Faces",
	"details": "Celebrities,Landmarks",
	"language": "en",
	}

	# https://westcentralus.api.cognitive.microsoft.com/vision/v1.0/analyze"
	resp = requests.post(azure_secrets.api["endpoint"] + "/analyze", json = payload, params = params, headers=headers)
	resp.raise_for_status()
	# print(resp.url)

	parsed = json.loads(resp.text)

	# Text description of the image
	# print(json.dumps(parsed, sort_keys=True, indent=2))
	return parsed["description"]["captions"][0]["text"]


	def detect_text_from_image_url(payload):
	"""
	Given an image url, detect the text
	"""

	# Query parameters to analyze image
	params = {
	# Request parameters. The language setting "unk" means automatically detect the language.
	'language': 'unk',
	'detectOrientation ': 'true',
	}

	# https://westcentralus.api.cognitive.microsoft.com/vision/v1.0/ocr"
	resp = requests.post(azure_secrets.api["endpoint"] + "/ocr", json = payload, params = params, headers=headers)
	resp.raise_for_status()
	# print(resp.url)

	parsed = json.loads(resp.text)

	# Text description of the image
	# print (json.dumps(parsed, sort_keys=True, indent=2))

	result = list()

	for region in parsed["regions"]:
	for line in region["lines"]:
	for word in line["words"]:
	result.extend([word["text"]])

	return " ".join(result)


	if __name__ == "__main__":
	""" main block """

	# The URL of a JPEG image to analyze.
	image_analysis_payload = {'url':'https://upload.wikimedia.org/wikipedia/commons/1/12/Broadway_and_Times_Square_by_night.jpg'}
	image_analysis_payload2 = {"url": "http://www.chatelaine.com/wp-content/uploads/2012/07/9f51bbc649ceaa8a58187c454269-660x440.jpg"}
	image_analysis_payload3 = {"url": "http://www.dccomics.com/sites/default/files/GalleryChar_1900x900_MOS_52e05e3fe24a61.04593858.jpg"}
	image_analysis_payload4 = {"url": "http://www.oneindia.com/img/2015/04/24-1429855668-sachincupindiagate-600-jpg.jpg"} # sachin

	# URL to detect text
	detect_text_payload = {'url':'https://upload.wikimedia.org/wikipedia/commons/thumb/a/af/Atomist_quote_from_Democritus.png/338px-Atomist_quote_from_Democritus.png'}
	detect_text_payload1 = {'url':'http://www.drivingtesttips.biz/images/motorway-sign.jpg'} # UK motorway board
	detect_text_payload2 = {'url':'http://images.macworld.com/images/reviews/graphics/147246-nofear_original.jpg'} # Shakespeare

	try:

	# Image analysis from url
	print (analyze_image_from_image_url(image_analysis_payload4))

	# detect text from url
	print (detect_text_from_image_url(detect_text_payload2))


	except Exception as e:
	print(e)