Skip to content

Instantly share code, notes, and snippets.

@azyobuzin
Last active May 5, 2019 19:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save azyobuzin/f0c4aee1b1a4b306250a7395548759ca to your computer and use it in GitHub Desktop.
Save azyobuzin/f0c4aee1b1a4b306250a7395548759ca to your computer and use it in GitHub Desktop.
from dataclasses import dataclass
from pathlib import Path
import typing
from PIL import Image, ImageDraw
import requests
API_KEY = 'xxx'
ENDPOINT = 'https://japaneast.api.cognitive.microsoft.com/'
def get_ocr_result_path(imagefilepath):
return imagefilepath.with_name(imagefilepath.stem + '_ocr.json')
def call_ocr_api(imagefilepath: Path) -> Path:
request_url = ENDPOINT + 'vision/v2.0/ocr?language=en'
headers = {
'Content-Type': 'application/octet-stream',
'Ocp-Apim-Subscription-Key': API_KEY }
with imagefilepath.open('rb') as imagefile:
res = requests.post(request_url, data=imagefile, headers=headers)
res.raise_for_status()
result_path = get_ocr_result_path(imagefilepath)
with result_path.open('wb') as result_file:
result_file.write(res.content)
return result_path
@dataclass
class BoundingBox:
left: int
top: int
width: int
height: int
@classmethod
def from_string(cls, s: str) -> 'BoundingBox':
# "x,y,w,h" 形式
return cls(*[int(x) for x in s.split(',')])
@dataclass
class Word:
bounding_box: BoundingBox
text: str
@classmethod
def from_json_dict(cls, d: dict) -> 'Word':
return cls(
bounding_box=BoundingBox.from_string(d['boundingBox']),
text=d['text'])
@dataclass
class Line:
bounding_box: BoundingBox
words: typing.Sequence[Word]
@classmethod
def from_json_dict(cls, d: dict) -> 'Line':
return cls(
bounding_box=BoundingBox.from_string(d['boundingBox']),
words=[Word.from_json_dict(x) for x in d['words']])
@dataclass
class Region:
bounding_box: BoundingBox
lines: typing.Sequence[Line]
@classmethod
def from_json_dict(cls, d: dict) -> 'Region':
return cls(
bounding_box=BoundingBox.from_string(d['boundingBox']),
lines=[Line.from_json_dict(x) for x in d['lines']])
@dataclass
class OcrResult:
language: str
text_angle: float
orientation: str
regions: typing.Sequence[Region]
@classmethod
def from_json_dict(cls, d: dict) -> 'OcrResult':
return cls(
language=d['language'],
text_angle=d['textAngle'],
orientation=d['orientation'],
regions=[Region.from_json_dict(x) for x in d['regions']])
def get_ocr_result(imagefilepath: Path) -> dict:
result_path = get_ocr_result_path(imagefilepath)
if not result_path.exists():
call_ocr_api(imagefilepath)
with result_path.open('rb') as result_file:
import json
d = json.load(result_file)
return OcrResult.from_json_dict(d)
def draw_boxes(imagefilepath: Path) -> Image.Image:
ocr_result = get_ocr_result(imagefilepath)
im = Image.open(imagefilepath).convert('RGBA')
boxesim = Image.new('RGBA', im.size, (255,255,255,0))
draw = ImageDraw.Draw(boxesim)
def draw_box(bb: BoundingBox, color):
draw.rectangle(
(bb.left, bb.top, bb.left + bb.width, bb.top + bb.height),
outline=color,
width=2)
for region in ocr_result.regions:
draw_box(region.bounding_box, (255, 0, 0, 128))
#for line in region.lines:
# draw_box(line.bounding_box, (0, 255, 0, 128))
# for word in line.words:
# draw_box(word.bounding_box, (0, 0, 255, 128))
return Image.alpha_composite(im, boxesim)
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true
[dev-packages]
jupyter = "*"
[packages]
requests = "~=2.21.0"
pillow = "~=6.0.0"
[requires]
python_version = "3.7"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment