Skip to content

Instantly share code, notes, and snippets.

@ficapy
Last active August 29, 2015 14:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ficapy/da009402855f962eba3e to your computer and use it in GitHub Desktop.
Save ficapy/da009402855f962eba3e to your computer and use it in GitHub Desktop.
多线程请求
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Ficapy
# Create: '15/8/6'
import time
import math
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from xlrd import open_workbook
USERNAME = 'XXXXXXXXX'
PASSWORD = 'XXXXXXXXX'
session = requests.Session()
headers = {
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.'
'50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)',
'Host': 'www.szceb.com',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
}
session.headers.update(headers)
raw_data = []
with open_workbook('1.xls') as book:
sheet = book.sheet_by_index(0)
row_number = sheet.nrows
for row in range(1, row_number):
raw_data.append(sheet.cell(row, 1).value)
# 模拟登陆,网站有漏洞,不会验证验证码有效性
session.get('http://www.szceb.com/szceb/login.jsp')
loging_url = 'http://www.szceb.com/szceb/login.do?method=login'
login = session.post(loging_url, data={
'loginSignal': 1,
'username': USERNAME,
'password': PASSWORD,
'yanzheng': 1234,
})
assert '登陆后显示的主页面' in login.text
def work(input_data, retry=3):
url = 'http://www.szceb.com/szceb/goodsFilingStatus.do?method=filingStatus'
data = {
'goodsRegCiqList.itemNo': input_data,
'goodsRegCiqList.GName': '',
'goodsRegCiq.ebcCode': '',
'goodsRegCiq.ebpCode': '',
'goodsRegCiqList.operType': '',
'goodsRegCiqList.ciqStatus': '',
'strStatDate': '',
'strEndDate': '',
'page': 1,
}
try:
result = session.post(url, data=data, timeout=60)
result.raise_for_status()
if '没有找到任何数据' in result.text:
return input_data
except:
retry -= 1
time.sleep(math.pow(2, retry))
print('{}重试第{}次'.format(input_data, 3 - retry))
if retry < 0:
raise ValueError(input_data)
return work(input_data, retry)
return True
def main():
non_data = []
print('开始咯~~~')
with ThreadPoolExecutor(max_workers=10) as executor:
future_to_url = {executor.submit(work, input_data): input_data for input_data in raw_data}
for future in as_completed(future_to_url):
input_data = future_to_url[future]
try:
result = future.result()
except Exception as exc:
print('%r 请求出错: %s' % (input_data, exc))
else:
if result is not True:
non_data.append(input_data)
print('共查询{}条数据, 没有任何数据的有{}条'.format(len(raw_data), len(non_data)))
print('以下条目没有查询到数据:')
print('\n'.join(sorted(non_data)))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment