zhangjaycee/bwfw.py

## bwfw.py
#!/usr/local/bin/python
# -*- coding: utf-8 -*-

import pytesseract
from PIL import Image
from PIL import ImageGrab
import webbrowser
import time
import jieba
import urllib
import threading
from multiprocessing import Process, Queue

#DEBUG = True
DEBUG = False
#CUT = False
CUT = True

def start_browser(s):
    #pass
    webbrowser.open_new_tab(s)

def options(q):
    o = ImageGrab.grab((60, 395, 380, 640))
    if DEBUG:
        o.save('/Users/Jaycee/test/ocr/iphone/options.png')
    ostr = pytesseract.image_to_string(o, lang='chi_sim').encode(encoding='UTF-8',errors='strict')
    ostr_l = ostr.split('\n')
    q.put(ostr_l)

#@profile
def main():
    while True:
        t00 = time.time()

        q = Queue(maxsize = 10)
        o_p = Process(target = options, args = (q, ))
        o_p.start()

        # (y1, x1, y2, x2)
        # 50 170 520 320
        # 40 140 445 300
        image = ImageGrab.grab((50, 170, 540, 330))
        t0 = time.time()
        #image = ImageGrab.grab((40, 140, 455, 300))
        #image.save('/Users/Jaycee/test/ocr/iphone/1.png')
        if DEBUG:
            t1 = time.time() # grab time
            grab_time = t1 - t0
            image.save('/Users/Jaycee/test/ocr/iphone/1.png')
            #image = Image.open('/Users/Jaycee/test/ocr/iphone/1.png')
            t1 = time.time()

        # open image
        #code = pytesseract.image_to_string(image, lang='chi_sim').encode(encoding='UTF-8',errors='strict')
        code = pytesseract.image_to_string(image, lang='chi_sim')
        print code
        if CUT:
            jieba_s = jieba.cut_for_search(code)
            jieba_s = ' '.join(jieba_s)
            code = jieba_s
        if DEBUG:
            t2 = time.time() # ocr time
            ocr_time = t2 - t1
        url = "http://www.baidu.com/s?rn=50&wd=" + code.encode(encoding='UTF-8',errors='strict')
        #url = "https://www.google.com/search?q=" + code.encode(encoding='UTF-8',errors='strict')
        p = Process(target = start_browser, args = (url, ))
        p.start()
        if DEBUG:
            t3 = time.time()
            open_browser_time = t3 - t2
        t000 = time.time()
        res = urllib.urlopen(url).read()
        t111 = time.time()
        print "Download Html Time:", t111 - t000

        o_p.join()
        ostr_l = q.get()
        try:
            o1str = ostr_l[0]
            o2str = ostr_l[2]
            o3str = ostr_l[4]
        except:
            a = raw_input("Error! Press 'Enter' to process next..")
            continue
        else:
            pass
        o1str_c = jieba.cut_for_search(o1str)
        o2str_c = jieba.cut_for_search(o2str)
        o3str_c = jieba.cut_for_search(o3str)
        o1cnt = 0
        o2cnt = 0
        o3cnt = 0
        for i in o1str_c:
            o1cnt += res.count(i.encode(encoding='UTF-8'))
        for i in o2str_c:
            o2cnt += res.count(i.encode(encoding='UTF-8'))
        for i in o3str_c:
            o3cnt += res.count(i.encode(encoding='UTF-8'))
        if DEBUG:
            t4 = time.time()
            option_count_time = t4 - t3
            print "grab time:", grab_time, "ocr time:", ocr_time, "open browser time:", open_browser_time, "open_browser_time:", open_browser_time
        print "A:\t[OCR] %s  [COUNT] %d" % (o1str, o1cnt)
        print "B:\t[OCR] %s [COUNT] %d" % (o2str, o2cnt)
        print "C:\t[OCR] %s [COUNT] %d" % (o3str, o3cnt)
        t11 = time.time()
        print "Total Time:", t11-t00
        a = raw_input("Press 'Enter' to process next..")

if __name__ == "__main__":
    main()
	#!/usr/local/bin/python
	# -- coding: utf-8 --

	import pytesseract
	from PIL import Image
	from PIL import ImageGrab
	import webbrowser
	import time
	import jieba
	import urllib
	import threading
	from multiprocessing import Process, Queue

	#DEBUG = True
	DEBUG = False
	#CUT = False
	CUT = True

	def start_browser(s):
	#pass
	webbrowser.open_new_tab(s)

	def options(q):
	o = ImageGrab.grab((60, 395, 380, 640))
	if DEBUG:
	o.save('/Users/Jaycee/test/ocr/iphone/options.png')
	ostr = pytesseract.image_to_string(o, lang='chi_sim').encode(encoding='UTF-8',errors='strict')
	ostr_l = ostr.split('\n')
	q.put(ostr_l)

	#@profile
	def main():
	while True:
	t00 = time.time()

	q = Queue(maxsize = 10)
	o_p = Process(target = options, args = (q, ))
	o_p.start()

	# (y1, x1, y2, x2)
	# 50 170 520 320
	# 40 140 445 300
	image = ImageGrab.grab((50, 170, 540, 330))
	t0 = time.time()
	#image = ImageGrab.grab((40, 140, 455, 300))
	#image.save('/Users/Jaycee/test/ocr/iphone/1.png')
	if DEBUG:
	t1 = time.time() # grab time
	grab_time = t1 - t0
	image.save('/Users/Jaycee/test/ocr/iphone/1.png')
	#image = Image.open('/Users/Jaycee/test/ocr/iphone/1.png')
	t1 = time.time()

	# open image
	#code = pytesseract.image_to_string(image, lang='chi_sim').encode(encoding='UTF-8',errors='strict')
	code = pytesseract.image_to_string(image, lang='chi_sim')
	print code
	if CUT:
	jieba_s = jieba.cut_for_search(code)
	jieba_s = ' '.join(jieba_s)
	code = jieba_s
	if DEBUG:
	t2 = time.time() # ocr time
	ocr_time = t2 - t1
	url = "http://www.baidu.com/s?rn=50&wd=" + code.encode(encoding='UTF-8',errors='strict')
	#url = "https://www.google.com/search?q=" + code.encode(encoding='UTF-8',errors='strict')
	p = Process(target = start_browser, args = (url, ))
	p.start()
	if DEBUG:
	t3 = time.time()
	open_browser_time = t3 - t2
	t000 = time.time()
	res = urllib.urlopen(url).read()
	t111 = time.time()
	print "Download Html Time:", t111 - t000

	o_p.join()
	ostr_l = q.get()
	try:
	o1str = ostr_l[0]
	o2str = ostr_l[2]
	o3str = ostr_l[4]
	except:
	a = raw_input("Error! Press 'Enter' to process next..")
	continue
	else:
	pass
	o1str_c = jieba.cut_for_search(o1str)
	o2str_c = jieba.cut_for_search(o2str)
	o3str_c = jieba.cut_for_search(o3str)
	o1cnt = 0
	o2cnt = 0
	o3cnt = 0
	for i in o1str_c:
	o1cnt += res.count(i.encode(encoding='UTF-8'))
	for i in o2str_c:
	o2cnt += res.count(i.encode(encoding='UTF-8'))
	for i in o3str_c:
	o3cnt += res.count(i.encode(encoding='UTF-8'))
	if DEBUG:
	t4 = time.time()
	option_count_time = t4 - t3
	print "grab time:", grab_time, "ocr time:", ocr_time, "open browser time:", open_browser_time, "open_browser_time:", open_browser_time
	print "A:\t[OCR] %s [COUNT] %d" % (o1str, o1cnt)
	print "B:\t[OCR] %s [COUNT] %d" % (o2str, o2cnt)
	print "C:\t[OCR] %s [COUNT] %d" % (o3str, o3cnt)
	t11 = time.time()
	print "Total Time:", t11-t00
	a = raw_input("Press 'Enter' to process next..")

	if __name__ == "__main__":
	main()