atuyosi/do_ocr_ms.py

## do_ocr_ms.py
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
import http.client, urllib.request, urllib.parse, urllib.error, base64

# https://dev.projectoxford.ai/docs/services/56f91f2d778daf23d8ec6739/operations/56f91f2e778daf14a499e1fc

import http.client, urllib.request, urllib.parse, urllib.error, base64
import os, json, argparse


api_key = os.environ['MSCV_API_KEY']
# print(api_key)

parser = argparse.ArgumentParser()
parser.add_argument("imagefile", help = "image file for ocr(png, jpg)")
args = parser.parse_args()
filename = args.imagefile

# print(filename)

headers = {
    # Request headers
    #'Content-Type': 'application/json',
    'Content-Type': 'application/octet-stream',
    'Ocp-Apim-Subscription-Key': api_key,
}

params = urllib.parse.urlencode({
    # Request parameters
    'language': 'unk',
    'detectOrientation ': 'true',
})

try:
    conn = http.client.HTTPSConnection('westus.api.cognitive.microsoft.com')
    img  = open(filename, 'rb').read()
    conn.request("POST", "/vision/v1.0/ocr?%s" % params, img, headers)
    response = conn.getresponse()

    data = response.read().decode( "utf-8")
    json_dict = json.loads(data)

#    print(json_dict['regions'])
    for region  in json_dict['regions'] :
        print("region: {}".format(region['boundingBox']))

        for line in region['lines'] :
            print("line: {}".format(line['boundingBox']))
            for word in line['words'] :
                print(word['text'], end="")

            print("\n")
   # print("\n")

    conn.close()

except Exception as e:
    print("[Errno {0}] {1}".format(e.errno, e.strerror))
	#! /usr/bin/env python3
	# -- coding: utf-8 --
	import http.client, urllib.request, urllib.parse, urllib.error, base64

	# https://dev.projectoxford.ai/docs/services/56f91f2d778daf23d8ec6739/operations/56f91f2e778daf14a499e1fc

	import http.client, urllib.request, urllib.parse, urllib.error, base64
	import os, json, argparse


	api_key = os.environ['MSCV_API_KEY']
	# print(api_key)

	parser = argparse.ArgumentParser()
	parser.add_argument("imagefile", help = "image file for ocr(png, jpg)")
	args = parser.parse_args()
	filename = args.imagefile

	# print(filename)

	headers = {
	# Request headers
	#'Content-Type': 'application/json',
	'Content-Type': 'application/octet-stream',
	'Ocp-Apim-Subscription-Key': api_key,
	}

	params = urllib.parse.urlencode({
	# Request parameters
	'language': 'unk',
	'detectOrientation ': 'true',
	})

	try:
	conn = http.client.HTTPSConnection('westus.api.cognitive.microsoft.com')
	img = open(filename, 'rb').read()
	conn.request("POST", "/vision/v1.0/ocr?%s" % params, img, headers)
	response = conn.getresponse()

	data = response.read().decode( "utf-8")
	json_dict = json.loads(data)

	# print(json_dict['regions'])
	for region in json_dict['regions'] :
	print("region: {}".format(region['boundingBox']))

	for line in region['lines'] :
	print("line: {}".format(line['boundingBox']))
	for word in line['words'] :
	print(word['text'], end="")

	print("\n")
	# print("\n")

	conn.close()

	except Exception as e:
	print("[Errno {0}] {1}".format(e.errno, e.strerror))