Skip to content

Instantly share code, notes, and snippets.

@borgle
Created March 13, 2016 09:13
Show Gist options
  • Save borgle/72e8971e1f83768f6824 to your computer and use it in GitHub Desktop.
Save borgle/72e8971e1f83768f6824 to your computer and use it in GitHub Desktop.
杭州市机动车驾驶人考试互联网预约平台预约考试人员统计脚本
#coding: utf-8
import requests,re
headers = {'User-Agent': 'Mozilla/4.0',
'Accept-Language':'en-US,en;q=0.8',
'Accept-Encoding':'gzip, deflate, sdch',
'Cache-Control': 'max-age=0',
'Accept': 'text/html;q=0.9,*/*;q=0.8'
}
result = dict()
pageindex, totalpage = 1, 0
while(True):
# print 'page {}'.format(pageindex)
data = {'currentPage':pageindex,'kskm':2,'pageSize':20,'ykrq':'2016-03-20'}
r = requests.post("http://www.hzti.com:9004/drv_web/queryKsmd.do", data=data, headers=headers)
html = re.sub('\r|\n', '', r.text)
html = re.sub('\s{2,}', ' ', html)
html = re.sub('>\s+<', '><', html)
html = re.sub('\s*>', '>', html)
html = re.sub('&nbsp;', '', html)
r.close()
s = re.findall('<tr onMouseOver="mouseover\(this\);" onMouseOut="mouseout\(this\);"><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td></tr>', html)
if len(s) < 1:
print 'redo', pageindex
time.sleep(2)
continue
for x in s:
#print x[0].strip(),x[1].strip(),x[2].strip()
m = x[2].strip()
n = x[3].strip()
if not result.has_key(m):
result[m] = dict()
if not result[m].has_key(n):
result[m][n] = 0
result[m][n] = result[m][n]+1
if totalpage < 1:
s = re.findall(u'共(\d+)页', html)
totalpage = int(s[0])
if pageindex+1>totalpage:
break
pageindex = pageindex + 1
time.sleep(0.8)
print u'科目二 2016-03-20 统计结果:'
total = 0
for m in result:
for n in result[m]:
total = total + result[m][n]
print "{:<20}{:<16}{}".format(m.encode('gbk'), n.encode('gbk'), result[m][n]), u'人'
print u'总共{}人'.format(total)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment