Skip to content

Instantly share code, notes, and snippets.

@kn-neeraj
Forked from RhetTbull/vision.py
Created March 30, 2024 18:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kn-neeraj/29c371f30410ee85e2cbca6108a578df to your computer and use it in GitHub Desktop.
Save kn-neeraj/29c371f30410ee85e2cbca6108a578df to your computer and use it in GitHub Desktop.
Use Apple's Vision framework from Python to detect text in images
""" Use Apple's Vision Framework via PyObjC to detect text in images
To use:
python3 -m pip install pyobjc-core pyobjc-framework-Quartz pyobjc-framework-Vision wurlitzer
"""
import pathlib
import Quartz
import Vision
from Cocoa import NSURL
from Foundation import NSDictionary
# needed to capture system-level stderr
from wurlitzer import pipes
def image_to_text(img_path, lang="eng"):
input_url = NSURL.fileURLWithPath_(img_path)
with pipes() as (out, err):
# capture stdout and stderr from system calls
# otherwise, Quartz.CIImage.imageWithContentsOfURL_
# prints to stderr something like:
# 2020-09-20 20:55:25.538 python[73042:5650492] Creating client/daemon connection: B8FE995E-3F27-47F4-9FA8-559C615FD774
# 2020-09-20 20:55:25.652 python[73042:5650492] Got the query meta data reply for: com.apple.MobileAsset.RawCamera.Camera, response: 0
input_image = Quartz.CIImage.imageWithContentsOfURL_(input_url)
vision_options = NSDictionary.dictionaryWithDictionary_({})
vision_handler = Vision.VNImageRequestHandler.alloc().initWithCIImage_options_(
input_image, vision_options
)
results = []
handler = make_request_handler(results)
vision_request = Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(handler)
error = vision_handler.performRequests_error_([vision_request], None)
return results
def make_request_handler(results):
""" results: list to store results """
if not isinstance(results, list):
raise ValueError("results must be a list")
def handler(request, error):
if error:
print(f"Error! {error}")
else:
observations = request.results()
for text_observation in observations:
recognized_text = text_observation.topCandidates_(1)[0]
results.append([recognized_text.string(), recognized_text.confidence()])
return handler
def main():
import sys
import pathlib
img_path = pathlib.Path(sys.argv[1])
if not img_path.is_file():
sys.exit("Invalid image path")
img_path = str(img_path.resolve())
results = image_to_text(img_path)
print(results)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment