shibacow/manga_fukidasi_detector.py

## manga_fukidasi_detector.py
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# -*- tab-width : 4 -*-
import argparse
# [START detect_text]
import base64
import os
import re
import sys
from mog_op import MongoOp
from googleapiclient import discovery
from googleapiclient import errors
from oauth2client.client import GoogleCredentials
from pprint import pprint
from PIL import Image

DISCOVERY_URL = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}'  # noqa
BATCH_SIZE = 10
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

class VisionApi:
    """Construct and use the Google Vision API service."""
    def __init__(self):
        self.credentials = GoogleCredentials.get_application_default()
        self.service = discovery.build(
            'vision', 'v1', credentials=self.credentials,
            discoveryServiceUrl=DISCOVERY_URL)
        self.image_body={}
        self.text_response={}
        self.body_size=0
        self.img_num=0
    def add_image(self,srcf):
        with open(srcf,'rb') as image_file:
            bd=image_file.read()
            bd=base64.b64encode(bd).decode('UTF_8')
            self.body_size+=len(bd)
            self.img_num+=1
            self.image_body[srcf]=bd
        print "add_image num={} size={} srcf={}".format(self.img_num,self.body_size,srcf)
    def is_fill(self):
        print "is fill num={} size={}".format(self.img_num,self.body_size)
        if self.img_num>7 or self.body_size > 6*1024*1024:
            return True
        else:
            return False
    def get_result(self):
        return self.text_response

    def detect_text(self, num_retries=3, max_results=6):
        """Uses the Vision API to detect text in the given file."""
        batch_request = []
        for filename in self.image_body:
            batch_request.append({
                'image': {
                    'content': self.image_body[filename]
                },
                'features': [{
                    'type': 'TEXT_DETECTION',
                    'maxResults': max_results,
                }]
            })
        request = self.service.images().annotate(
            body={'requests': batch_request})
        try:
            responses = request.execute(num_retries=num_retries)
            if 'responses' not in responses:
                return None
            for filename, response in zip(self.image_body, responses['responses']):
                if 'error' in response:
                    print("API Error for %s: %s" % (
                        filename,
                        response['error']['message']
                        if 'message' in response['error']
                        else 'error but not message'))
                    continue
                if 'textAnnotations' in response:
                    self.text_response[filename] = response['textAnnotations']
                else:
                    self.text_response[filename] = []
        except errors.HttpError as e:
            print("Http Error for %s: %s" % (filename, e))
        except KeyError as e2:
            print("Key error: %s" % e2)
        finally:
            self.image_body={}
            self.body_size=0
            self.img_num=0
        # [END detect_text]

def get_img_size(srcf):
    try:
        with Image.open(srcf) as im:
            return im.size
    except IOError,err:
        return (0,0)

def main():
    vi = VisionApi()
    fdir='img/bj01'
    ll=os.listdir(fdir)
    for i,f in enumerate(sorted(ll)):
        if not vi.is_fill():
            srcf=fdir+os.sep+f
            vi.add_image(srcf)
        else:
            vi.detect_text()
        #if i>30:
        #    break
    vi.detect_text()
    mp=MongoOp('localhost')
    for k,v in vi.get_result().items():
        (width,height)=get_img_size(k)
        dkt=dict(filename=k,result=v,width=width,height=height)
        if not mp.exists_page(k):
            mp.insertdata(dkt)
def add_size():
    mp=MongoOp('localhost')
    fdir='img/bj01'
    ll=os.listdir(fdir)
    for i,f in enumerate(sorted(ll)):
        srcf=fdir+os.sep+f
        try:
            with Image.open(srcf) as im:
                width,height=im.size
                #print "w={} h={}".format(width,height)
                rrdd = mp.fuki.update_one({"filename":srcf},{"$set":{"width":width,"height":height}})
                print rrdd
        except IOError,err:
            print "f={} err={}".format(srcf,err)
if __name__=='__main__':
    main()
    #add_size()
	#!/usr/bin/env python
	# -- coding:utf-8 --
	# -- tab-width : 4 --
	import argparse
	# [START detect_text]
	import base64
	import os
	import re
	import sys
	from mog_op import MongoOp
	from googleapiclient import discovery
	from googleapiclient import errors
	from oauth2client.client import GoogleCredentials
	from pprint import pprint
	from PIL import Image

	DISCOVERY_URL = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}' # noqa
	BATCH_SIZE = 10
	from dotenv import load_dotenv, find_dotenv
	load_dotenv(find_dotenv())

	class VisionApi:
	"""Construct and use the Google Vision API service."""
	def __init__(self):
	self.credentials = GoogleCredentials.get_application_default()
	self.service = discovery.build(
	'vision', 'v1', credentials=self.credentials,
	discoveryServiceUrl=DISCOVERY_URL)
	self.image_body={}
	self.text_response={}
	self.body_size=0
	self.img_num=0
	def add_image(self,srcf):
	with open(srcf,'rb') as image_file:
	bd=image_file.read()
	bd=base64.b64encode(bd).decode('UTF_8')
	self.body_size+=len(bd)
	self.img_num+=1
	self.image_body[srcf]=bd
	print "add_image num={} size={} srcf={}".format(self.img_num,self.body_size,srcf)
	def is_fill(self):
	print "is fill num={} size={}".format(self.img_num,self.body_size)
	if self.img_num>7 or self.body_size > 610241024:
	return True
	else:
	return False
	def get_result(self):
	return self.text_response

	def detect_text(self, num_retries=3, max_results=6):
	"""Uses the Vision API to detect text in the given file."""
	batch_request = []
	for filename in self.image_body:
	batch_request.append({
	'image': {
	'content': self.image_body[filename]
	},
	'features': [{
	'type': 'TEXT_DETECTION',
	'maxResults': max_results,
	}]
	})
	request = self.service.images().annotate(
	body={'requests': batch_request})
	try:
	responses = request.execute(num_retries=num_retries)
	if 'responses' not in responses:
	return None
	for filename, response in zip(self.image_body, responses['responses']):
	if 'error' in response:
	print("API Error for %s: %s" % (
	filename,
	response['error']['message']
	if 'message' in response['error']
	else 'error but not message'))
	continue
	if 'textAnnotations' in response:
	self.text_response[filename] = response['textAnnotations']
	else:
	self.text_response[filename] = []
	except errors.HttpError as e:
	print("Http Error for %s: %s" % (filename, e))
	except KeyError as e2:
	print("Key error: %s" % e2)
	finally:
	self.image_body={}
	self.body_size=0
	self.img_num=0
	# [END detect_text]

	def get_img_size(srcf):
	try:
	with Image.open(srcf) as im:
	return im.size
	except IOError,err:
	return (0,0)

	def main():
	vi = VisionApi()
	fdir='img/bj01'
	ll=os.listdir(fdir)
	for i,f in enumerate(sorted(ll)):
	if not vi.is_fill():
	srcf=fdir+os.sep+f
	vi.add_image(srcf)
	else:
	vi.detect_text()
	#if i>30:
	# break
	vi.detect_text()
	mp=MongoOp('localhost')
	for k,v in vi.get_result().items():
	(width,height)=get_img_size(k)
	dkt=dict(filename=k,result=v,width=width,height=height)
	if not mp.exists_page(k):
	mp.insertdata(dkt)
	def add_size():
	mp=MongoOp('localhost')
	fdir='img/bj01'
	ll=os.listdir(fdir)
	for i,f in enumerate(sorted(ll)):
	srcf=fdir+os.sep+f
	try:
	with Image.open(srcf) as im:
	width,height=im.size
	#print "w={} h={}".format(width,height)
	rrdd = mp.fuki.update_one({"filename":srcf},{"$set":{"width":width,"height":height}})
	print rrdd
	except IOError,err:
	print "f={} err={}".format(srcf,err)
	if __name__=='__main__':
	main()
	#add_size()