Skip to content

Instantly share code, notes, and snippets.

@hansent
Created October 30, 2013 08:39
Show Gist options
  • Save hansent/7229099 to your computer and use it in GitHub Desktop.
Save hansent/7229099 to your computer and use it in GitHub Desktop.
import os
import sys
import json
import threading
import requests
DIGITIZE_IO_API_KEY = os.environ.get("DIGITIZE_IO_API_KEY", "<your-api-key>")
DIGITIZE_IO_ENDPOINT = "https://digitize.io/api/ocr"
_session = requests.Session()
_session.params.update({"apiKey": DIGITIZE_IO_API_KEY})
class OCRJob(object):
def __init__(self, fname, auto_submit=True):
self.fname = fname
self.status = None
self.data = {}
if auto_submit:
self.submit()
def submit(self):
f_data = open(self.fname, 'rb').read()
resp_data = _session.post(
DIGITIZE_IO_ENDPOINT,
headers={'Content-Type': 'image/png'},
data=f_data
).json()
self.status = "Submitted"
self._session_id = resp_data.get('sessionId', '')
def poll_status(self):
self.data = _session.get(
DIGITIZE_IO_ENDPOINT,
params={'sessionId': self._session_id}
).json()
self.status = self.data.get('status')
return self.status
def poll_job_status(job):
status = job.poll_status()
print "status for job '{0}': {1}".format(job.fname, status)
if status == "In Progress":
threading.Timer(2.0, poll_job_status, [job]).start()
return
if status == "Success":
finalize_job(job)
def finalize_job(job):
print "ocr processing done: {0}".format(job.fname)
fname_json = "{0}.json".format(*os.path.splitext(job.fname))
print " -- writing ocr results to: {0}".format(fname_json)
json.dump(job.data, open(fname_json, 'w'))
if __name__ == "__main__":
file_list = sys.argv[1:]
if len(file_list) == 0:
print "You must pass at least one file to upload."
exit(1)
for fname in file_list:
job = OCRJob(fname)
threading.Timer(2.0, poll_job_status, [job]).start()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment