Created
March 12, 2016 14:48
-
-
Save anonymous/e837f2f5d9c6c57d7db8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding=utf-8 | |
import json | |
import requests | |
import re | |
class spprec(object): | |
def __init__(self, url): | |
self.session = requests.session() | |
self.session.headers.update({ | |
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36' | |
}) | |
self.url = url | |
self.post_data = {} | |
def getInfo(self, num): | |
if len(self.post_data) == 0: | |
res = self.session.get(self.url) | |
else: | |
self.post_data['__EVENTARGUMENT'] = num | |
self.post_data["__EVENTTARGET"] = "MoreInfoList1$Pager" | |
res = self.session.post(url=self.url, data=self.post_data) | |
result = re.findall('<input type="hidden" name="([^"]+)" id="([^"]+)" value="([^"]+)" />', res.text, re.S) | |
post_data = {} | |
for item in result: | |
post_data.update({item[0]: item[2]}) | |
self.post_data = post_data | |
result = re.findall('<a href="(/sczw/InfoDetail/Default.aspx[^"]+)" target="_blank" title="([^"]+)"', res.text, re.S) | |
return result | |
obj = spprec("http://www.spprec.com/sczw/jyfwpt/005001/005001001/MoreInfo.aspx?CategoryNum=005001001") | |
for i in xrange(1, 5): | |
data = obj.getInfo(i) | |
for item in data: | |
print item[1] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment