azyobuzin/AzureOCR.py

## AzureOCR.py
from dataclasses import dataclass
from pathlib import Path
import typing
from PIL import Image, ImageDraw
import requests

API_KEY = 'xxx'
ENDPOINT = 'https://japaneast.api.cognitive.microsoft.com/'

def get_ocr_result_path(imagefilepath):
    return imagefilepath.with_name(imagefilepath.stem + '_ocr.json')

def call_ocr_api(imagefilepath: Path) -> Path:
    request_url = ENDPOINT + 'vision/v2.0/ocr?language=en'
    headers = {
        'Content-Type': 'application/octet-stream',
        'Ocp-Apim-Subscription-Key': API_KEY }
    with imagefilepath.open('rb') as imagefile:
        res = requests.post(request_url, data=imagefile, headers=headers)
    res.raise_for_status()
    result_path = get_ocr_result_path(imagefilepath)
    with result_path.open('wb') as result_file:
        result_file.write(res.content)
    return result_path

@dataclass
class BoundingBox:
    left: int
    top: int
    width: int
    height: int

    @classmethod
    def from_string(cls, s: str) -> 'BoundingBox':
        # "x,y,w,h" 形式
        return cls(*[int(x) for x in s.split(',')])

@dataclass
class Word:
    bounding_box: BoundingBox
    text: str

    @classmethod
    def from_json_dict(cls, d: dict) -> 'Word':
        return cls(
            bounding_box=BoundingBox.from_string(d['boundingBox']),
            text=d['text'])

@dataclass
class Line:
    bounding_box: BoundingBox
    words: typing.Sequence[Word]

    @classmethod
    def from_json_dict(cls, d: dict) -> 'Line':
        return cls(
            bounding_box=BoundingBox.from_string(d['boundingBox']),
            words=[Word.from_json_dict(x) for x in d['words']])

@dataclass
class Region:
    bounding_box: BoundingBox
    lines: typing.Sequence[Line]

    @classmethod
    def from_json_dict(cls, d: dict) -> 'Region':
        return cls(
            bounding_box=BoundingBox.from_string(d['boundingBox']),
            lines=[Line.from_json_dict(x) for x in d['lines']])

@dataclass
class OcrResult:
    language: str
    text_angle: float
    orientation: str
    regions: typing.Sequence[Region]

    @classmethod
    def from_json_dict(cls, d: dict) -> 'OcrResult':
        return cls(
            language=d['language'],
            text_angle=d['textAngle'],
            orientation=d['orientation'],
            regions=[Region.from_json_dict(x) for x in d['regions']])

def get_ocr_result(imagefilepath: Path) -> dict:
    result_path = get_ocr_result_path(imagefilepath)
    if not result_path.exists():
        call_ocr_api(imagefilepath)
    with result_path.open('rb') as result_file:
        import json
        d = json.load(result_file)
    return OcrResult.from_json_dict(d)

def draw_boxes(imagefilepath: Path) -> Image.Image:
    ocr_result = get_ocr_result(imagefilepath)
    im = Image.open(imagefilepath).convert('RGBA')
    boxesim = Image.new('RGBA', im.size, (255,255,255,0))
    draw = ImageDraw.Draw(boxesim)

    def draw_box(bb: BoundingBox, color):
        draw.rectangle(
            (bb.left, bb.top, bb.left + bb.width, bb.top + bb.height),
            outline=color,
            width=2)

    for region in ocr_result.regions:
        draw_box(region.bounding_box, (255, 0, 0, 128))
        #for line in region.lines:
        #    draw_box(line.bounding_box, (0, 255, 0, 128))
        #    for word in line.words:
        #        draw_box(word.bounding_box, (0, 0, 255, 128))

    return Image.alpha_composite(im, boxesim)

## Pipfile
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true

[dev-packages]
jupyter = "*"

[packages]
requests = "~=2.21.0"
pillow = "~=6.0.0"

[requires]
python_version = "3.7"
	from dataclasses import dataclass
	from pathlib import Path
	import typing
	from PIL import Image, ImageDraw
	import requests

	API_KEY = 'xxx'
	ENDPOINT = 'https://japaneast.api.cognitive.microsoft.com/'

	def get_ocr_result_path(imagefilepath):
	return imagefilepath.with_name(imagefilepath.stem + '_ocr.json')

	def call_ocr_api(imagefilepath: Path) -> Path:
	request_url = ENDPOINT + 'vision/v2.0/ocr?language=en'
	headers = {
	'Content-Type': 'application/octet-stream',
	'Ocp-Apim-Subscription-Key': API_KEY }
	with imagefilepath.open('rb') as imagefile:
	res = requests.post(request_url, data=imagefile, headers=headers)
	res.raise_for_status()
	result_path = get_ocr_result_path(imagefilepath)
	with result_path.open('wb') as result_file:
	result_file.write(res.content)
	return result_path

	@dataclass
	class BoundingBox:
	left: int
	top: int
	width: int
	height: int

	@classmethod
	def from_string(cls, s: str) -> 'BoundingBox':
	# "x,y,w,h" 形式
	return cls(*[int(x) for x in s.split(',')])

	@dataclass
	class Word:
	bounding_box: BoundingBox
	text: str

	@classmethod
	def from_json_dict(cls, d: dict) -> 'Word':
	return cls(
	bounding_box=BoundingBox.from_string(d['boundingBox']),
	text=d['text'])

	@dataclass
	class Line:
	bounding_box: BoundingBox
	words: typing.Sequence[Word]

	@classmethod
	def from_json_dict(cls, d: dict) -> 'Line':
	return cls(
	bounding_box=BoundingBox.from_string(d['boundingBox']),
	words=[Word.from_json_dict(x) for x in d['words']])

	@dataclass
	class Region:
	bounding_box: BoundingBox
	lines: typing.Sequence[Line]

	@classmethod
	def from_json_dict(cls, d: dict) -> 'Region':
	return cls(
	bounding_box=BoundingBox.from_string(d['boundingBox']),
	lines=[Line.from_json_dict(x) for x in d['lines']])

	@dataclass
	class OcrResult:
	language: str
	text_angle: float
	orientation: str
	regions: typing.Sequence[Region]

	@classmethod
	def from_json_dict(cls, d: dict) -> 'OcrResult':
	return cls(
	language=d['language'],
	text_angle=d['textAngle'],
	orientation=d['orientation'],
	regions=[Region.from_json_dict(x) for x in d['regions']])

	def get_ocr_result(imagefilepath: Path) -> dict:
	result_path = get_ocr_result_path(imagefilepath)
	if not result_path.exists():
	call_ocr_api(imagefilepath)
	with result_path.open('rb') as result_file:
	import json
	d = json.load(result_file)
	return OcrResult.from_json_dict(d)

	def draw_boxes(imagefilepath: Path) -> Image.Image:
	ocr_result = get_ocr_result(imagefilepath)
	im = Image.open(imagefilepath).convert('RGBA')
	boxesim = Image.new('RGBA', im.size, (255,255,255,0))
	draw = ImageDraw.Draw(boxesim)

	def draw_box(bb: BoundingBox, color):
	draw.rectangle(
	(bb.left, bb.top, bb.left + bb.width, bb.top + bb.height),
	outline=color,
	width=2)

	for region in ocr_result.regions:
	draw_box(region.bounding_box, (255, 0, 0, 128))
	#for line in region.lines:
	# draw_box(line.bounding_box, (0, 255, 0, 128))
	# for word in line.words:
	# draw_box(word.bounding_box, (0, 0, 255, 128))

	return Image.alpha_composite(im, boxesim)
	[[source]]
	name = "pypi"
	url = "https://pypi.org/simple"
	verify_ssl = true

	[dev-packages]
	jupyter = "*"

	[packages]
	requests = "~=2.21.0"
	pillow = "~=6.0.0"

	[requires]
	python_version = "3.7"