Created
September 18, 2016 00:42
-
-
Save shibacow/c010adebf07f945edf4fc36c1e969431 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding:utf-8 -*- | |
# -*- tab-width : 4 -*- | |
import argparse | |
# [START detect_text] | |
import base64 | |
import os | |
import re | |
import sys | |
from mog_op import MongoOp | |
from googleapiclient import discovery | |
from googleapiclient import errors | |
from oauth2client.client import GoogleCredentials | |
from pprint import pprint | |
from PIL import Image | |
DISCOVERY_URL = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}' # noqa | |
BATCH_SIZE = 10 | |
from dotenv import load_dotenv, find_dotenv | |
load_dotenv(find_dotenv()) | |
class VisionApi: | |
"""Construct and use the Google Vision API service.""" | |
def __init__(self): | |
self.credentials = GoogleCredentials.get_application_default() | |
self.service = discovery.build( | |
'vision', 'v1', credentials=self.credentials, | |
discoveryServiceUrl=DISCOVERY_URL) | |
self.image_body={} | |
self.text_response={} | |
self.body_size=0 | |
self.img_num=0 | |
def add_image(self,srcf): | |
with open(srcf,'rb') as image_file: | |
bd=image_file.read() | |
bd=base64.b64encode(bd).decode('UTF_8') | |
self.body_size+=len(bd) | |
self.img_num+=1 | |
self.image_body[srcf]=bd | |
print "add_image num={} size={} srcf={}".format(self.img_num,self.body_size,srcf) | |
def is_fill(self): | |
print "is fill num={} size={}".format(self.img_num,self.body_size) | |
if self.img_num>7 or self.body_size > 6*1024*1024: | |
return True | |
else: | |
return False | |
def get_result(self): | |
return self.text_response | |
def detect_text(self, num_retries=3, max_results=6): | |
"""Uses the Vision API to detect text in the given file.""" | |
batch_request = [] | |
for filename in self.image_body: | |
batch_request.append({ | |
'image': { | |
'content': self.image_body[filename] | |
}, | |
'features': [{ | |
'type': 'TEXT_DETECTION', | |
'maxResults': max_results, | |
}] | |
}) | |
request = self.service.images().annotate( | |
body={'requests': batch_request}) | |
try: | |
responses = request.execute(num_retries=num_retries) | |
if 'responses' not in responses: | |
return None | |
for filename, response in zip(self.image_body, responses['responses']): | |
if 'error' in response: | |
print("API Error for %s: %s" % ( | |
filename, | |
response['error']['message'] | |
if 'message' in response['error'] | |
else 'error but not message')) | |
continue | |
if 'textAnnotations' in response: | |
self.text_response[filename] = response['textAnnotations'] | |
else: | |
self.text_response[filename] = [] | |
except errors.HttpError as e: | |
print("Http Error for %s: %s" % (filename, e)) | |
except KeyError as e2: | |
print("Key error: %s" % e2) | |
finally: | |
self.image_body={} | |
self.body_size=0 | |
self.img_num=0 | |
# [END detect_text] | |
def get_img_size(srcf): | |
try: | |
with Image.open(srcf) as im: | |
return im.size | |
except IOError,err: | |
return (0,0) | |
def main(): | |
vi = VisionApi() | |
fdir='img/bj01' | |
ll=os.listdir(fdir) | |
for i,f in enumerate(sorted(ll)): | |
if not vi.is_fill(): | |
srcf=fdir+os.sep+f | |
vi.add_image(srcf) | |
else: | |
vi.detect_text() | |
#if i>30: | |
# break | |
vi.detect_text() | |
mp=MongoOp('localhost') | |
for k,v in vi.get_result().items(): | |
(width,height)=get_img_size(k) | |
dkt=dict(filename=k,result=v,width=width,height=height) | |
if not mp.exists_page(k): | |
mp.insertdata(dkt) | |
def add_size(): | |
mp=MongoOp('localhost') | |
fdir='img/bj01' | |
ll=os.listdir(fdir) | |
for i,f in enumerate(sorted(ll)): | |
srcf=fdir+os.sep+f | |
try: | |
with Image.open(srcf) as im: | |
width,height=im.size | |
#print "w={} h={}".format(width,height) | |
rrdd = mp.fuki.update_one({"filename":srcf},{"$set":{"width":width,"height":height}}) | |
print rrdd | |
except IOError,err: | |
print "f={} err={}".format(srcf,err) | |
if __name__=='__main__': | |
main() | |
#add_size() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment