Skip to content

Instantly share code, notes, and snippets.

@zhangjaycee
Last active September 9, 2018 06:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zhangjaycee/ddb53306e49125fd6ab6f7da6d9b5d54 to your computer and use it in GitHub Desktop.
Save zhangjaycee/ddb53306e49125fd6ab6f7da6d9b5d54 to your computer and use it in GitHub Desktop.
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
import pytesseract
from PIL import Image
from PIL import ImageGrab
import webbrowser
import time
import jieba
import urllib
import threading
from multiprocessing import Process, Queue
#DEBUG = True
DEBUG = False
#CUT = False
CUT = True
def start_browser(s):
#pass
webbrowser.open_new_tab(s)
def options(q):
o = ImageGrab.grab((60, 395, 380, 640))
if DEBUG:
o.save('/Users/Jaycee/test/ocr/iphone/options.png')
ostr = pytesseract.image_to_string(o, lang='chi_sim').encode(encoding='UTF-8',errors='strict')
ostr_l = ostr.split('\n')
q.put(ostr_l)
#@profile
def main():
while True:
t00 = time.time()
q = Queue(maxsize = 10)
o_p = Process(target = options, args = (q, ))
o_p.start()
# (y1, x1, y2, x2)
# 50 170 520 320
# 40 140 445 300
image = ImageGrab.grab((50, 170, 540, 330))
t0 = time.time()
#image = ImageGrab.grab((40, 140, 455, 300))
#image.save('/Users/Jaycee/test/ocr/iphone/1.png')
if DEBUG:
t1 = time.time() # grab time
grab_time = t1 - t0
image.save('/Users/Jaycee/test/ocr/iphone/1.png')
#image = Image.open('/Users/Jaycee/test/ocr/iphone/1.png')
t1 = time.time()
# open image
#code = pytesseract.image_to_string(image, lang='chi_sim').encode(encoding='UTF-8',errors='strict')
code = pytesseract.image_to_string(image, lang='chi_sim')
print code
if CUT:
jieba_s = jieba.cut_for_search(code)
jieba_s = ' '.join(jieba_s)
code = jieba_s
if DEBUG:
t2 = time.time() # ocr time
ocr_time = t2 - t1
url = "http://www.baidu.com/s?rn=50&wd=" + code.encode(encoding='UTF-8',errors='strict')
#url = "https://www.google.com/search?q=" + code.encode(encoding='UTF-8',errors='strict')
p = Process(target = start_browser, args = (url, ))
p.start()
if DEBUG:
t3 = time.time()
open_browser_time = t3 - t2
t000 = time.time()
res = urllib.urlopen(url).read()
t111 = time.time()
print "Download Html Time:", t111 - t000
o_p.join()
ostr_l = q.get()
try:
o1str = ostr_l[0]
o2str = ostr_l[2]
o3str = ostr_l[4]
except:
a = raw_input("Error! Press 'Enter' to process next..")
continue
else:
pass
o1str_c = jieba.cut_for_search(o1str)
o2str_c = jieba.cut_for_search(o2str)
o3str_c = jieba.cut_for_search(o3str)
o1cnt = 0
o2cnt = 0
o3cnt = 0
for i in o1str_c:
o1cnt += res.count(i.encode(encoding='UTF-8'))
for i in o2str_c:
o2cnt += res.count(i.encode(encoding='UTF-8'))
for i in o3str_c:
o3cnt += res.count(i.encode(encoding='UTF-8'))
if DEBUG:
t4 = time.time()
option_count_time = t4 - t3
print "grab time:", grab_time, "ocr time:", ocr_time, "open browser time:", open_browser_time, "open_browser_time:", open_browser_time
print "A:\t[OCR] %s [COUNT] %d" % (o1str, o1cnt)
print "B:\t[OCR] %s [COUNT] %d" % (o2str, o2cnt)
print "C:\t[OCR] %s [COUNT] %d" % (o3str, o3cnt)
t11 = time.time()
print "Total Time:", t11-t00
a = raw_input("Press 'Enter' to process next..")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment