Skip to content

Instantly share code, notes, and snippets.

@dantmnf

dantmnf/winocr.py

Last active Jun 4, 2021
Embed
What would you like to do?
WinRT OcrEngine from Python
import sys
import asyncio
import base64
import copy
import pprint
# pip3 install winrt
from winrt.windows.media.ocr import OcrEngine
from winrt.windows.globalization import Language
from winrt.windows.graphics.imaging import *
from winrt.windows.security.cryptography import CryptographicBuffer
from PIL import Image
class rect:
def __init__(self, x, y, w, h):
self.x = x
self.y = y
self.width = w
self.height = h
def __repr__(self):
return 'rect(%d, %d, %d, %d)' % (self.x, self.y, self.width, self.height)
def right(self):
return self.x + self.width
def bottom(self):
return self.y + self.height
def set_right(self, value):
self.width = value - self.x
def set_bottom(self, value):
self.height = value - self.y
def dump_rect(rtrect: winrt.windows.foundation.Rect):
return rect(rtrect.x, rtrect.y, rtrect.width, rtrect.height)
def dump_ocrword(word):
return {
'bounding_rect': dump_rect(word.bounding_rect),
'text': word.text
}
def merge_words(words):
if len(words) == 0:
return words
new_words = [copy.deepcopy(words[0])]
words = words[1:]
for word in words:
lastnewword = new_words[-1]
lastnewwordrect = new_words[-1]['bounding_rect']
wordrect = word['bounding_rect']
if len(word['text']) == 1 and wordrect.x - lastnewwordrect.right() <= wordrect.width * 0.2:
lastnewword['text'] += word['text']
lastnewwordrect.x = min((wordrect.x, lastnewwordrect.x))
lastnewwordrect.y = min((wordrect.y, lastnewwordrect.y))
lastnewwordrect.set_right(max((wordrect.right(), lastnewwordrect.right())))
lastnewwordrect.set_bottom(max((wordrect.bottom(), lastnewwordrect.bottom())))
else:
new_words.append(copy.deepcopy(word))
return new_words
def dump_ocrline(line):
words = list(map(dump_ocrword, line.words))
merged = merge_words(words)
return {
'text': line.text,
'words': words,
'merged_words': merged,
'merged_text': ' '.join(map(lambda x: x['text'], merged))
}
def dump_ocrresult(ocrresult):
lines = list(map(dump_ocrline, ocrresult.lines))
return {
'text': ocrresult.text,
'text_angle': ocrresult.text_angle.value if ocrresult.text_angle else None,
'lines': lines,
'merged_text': ' '.join(map(lambda x: x['merged_text'], lines))
}
def ibuffer(s):
"""create WinRT IBuffer instance from a bytes-like object"""
return CryptographicBuffer.decode_from_base64_string(base64.b64encode(s).decode('ascii'))
def swbmp_from_pil_image(img):
if img.mode != "RGBA":
img = img.convert("RGBA")
pybuf = img.tobytes()
rtbuf = ibuffer(pybuf)
return SoftwareBitmap.create_copy_from_buffer(rtbuf, BitmapPixelFormat.RGBA8, img.width, img.height, BitmapAlphaMode.STRAIGHT)
async def ensure_coroutine(awaitable):
return await awaitable
def blocking_wait(awaitable):
return asyncio.run(ensure_coroutine(awaitable))
def recognize_pil_image(img, lang):
lang = Language(lang)
assert(OcrEngine.is_language_supported(lang))
eng = OcrEngine.try_create_from_language(lang)
swbmp = swbmp_from_pil_image(img)
return dump_ocrresult(blocking_wait(eng.recognize_async(swbmp)))
def recognize_file(filename, lang):
img = Image.open(filename)
return recognize_pil_image(img, lang)
if __name__ == '__main__':
if 2 <= len(sys.argv) <= 3:
lang = 'zh-hans-cn' if len(sys.argv) == 2 else sys.argv[1]
result = recognize_file(sys.argv[-1], lang)
pprint.pprint(result, width=128)
else:
print('usage: %s [language=zh-hans-cn] filename' % sys.argv[0])
langs = list(map(lambda x: x.language_tag, OcrEngine.get_available_recognizer_languages()))
print('installed languages:', ', '.join(langs))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment