Skip to content

Instantly share code, notes, and snippets.

@shibacow
Created September 18, 2016 00:42
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shibacow/c010adebf07f945edf4fc36c1e969431 to your computer and use it in GitHub Desktop.
Save shibacow/c010adebf07f945edf4fc36c1e969431 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# -*- tab-width : 4 -*-
import argparse
# [START detect_text]
import base64
import os
import re
import sys
from mog_op import MongoOp
from googleapiclient import discovery
from googleapiclient import errors
from oauth2client.client import GoogleCredentials
from pprint import pprint
from PIL import Image
DISCOVERY_URL = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}' # noqa
BATCH_SIZE = 10
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
class VisionApi:
"""Construct and use the Google Vision API service."""
def __init__(self):
self.credentials = GoogleCredentials.get_application_default()
self.service = discovery.build(
'vision', 'v1', credentials=self.credentials,
discoveryServiceUrl=DISCOVERY_URL)
self.image_body={}
self.text_response={}
self.body_size=0
self.img_num=0
def add_image(self,srcf):
with open(srcf,'rb') as image_file:
bd=image_file.read()
bd=base64.b64encode(bd).decode('UTF_8')
self.body_size+=len(bd)
self.img_num+=1
self.image_body[srcf]=bd
print "add_image num={} size={} srcf={}".format(self.img_num,self.body_size,srcf)
def is_fill(self):
print "is fill num={} size={}".format(self.img_num,self.body_size)
if self.img_num>7 or self.body_size > 6*1024*1024:
return True
else:
return False
def get_result(self):
return self.text_response
def detect_text(self, num_retries=3, max_results=6):
"""Uses the Vision API to detect text in the given file."""
batch_request = []
for filename in self.image_body:
batch_request.append({
'image': {
'content': self.image_body[filename]
},
'features': [{
'type': 'TEXT_DETECTION',
'maxResults': max_results,
}]
})
request = self.service.images().annotate(
body={'requests': batch_request})
try:
responses = request.execute(num_retries=num_retries)
if 'responses' not in responses:
return None
for filename, response in zip(self.image_body, responses['responses']):
if 'error' in response:
print("API Error for %s: %s" % (
filename,
response['error']['message']
if 'message' in response['error']
else 'error but not message'))
continue
if 'textAnnotations' in response:
self.text_response[filename] = response['textAnnotations']
else:
self.text_response[filename] = []
except errors.HttpError as e:
print("Http Error for %s: %s" % (filename, e))
except KeyError as e2:
print("Key error: %s" % e2)
finally:
self.image_body={}
self.body_size=0
self.img_num=0
# [END detect_text]
def get_img_size(srcf):
try:
with Image.open(srcf) as im:
return im.size
except IOError,err:
return (0,0)
def main():
vi = VisionApi()
fdir='img/bj01'
ll=os.listdir(fdir)
for i,f in enumerate(sorted(ll)):
if not vi.is_fill():
srcf=fdir+os.sep+f
vi.add_image(srcf)
else:
vi.detect_text()
#if i>30:
# break
vi.detect_text()
mp=MongoOp('localhost')
for k,v in vi.get_result().items():
(width,height)=get_img_size(k)
dkt=dict(filename=k,result=v,width=width,height=height)
if not mp.exists_page(k):
mp.insertdata(dkt)
def add_size():
mp=MongoOp('localhost')
fdir='img/bj01'
ll=os.listdir(fdir)
for i,f in enumerate(sorted(ll)):
srcf=fdir+os.sep+f
try:
with Image.open(srcf) as im:
width,height=im.size
#print "w={} h={}".format(width,height)
rrdd = mp.fuki.update_one({"filename":srcf},{"$set":{"width":width,"height":height}})
print rrdd
except IOError,err:
print "f={} err={}".format(srcf,err)
if __name__=='__main__':
main()
#add_size()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment