|
#!/usr/bin/env python |
|
# encoding:utf-8 |
|
|
|
""" |
|
Fetch url and secret code from http://www.ftoow.com/read.php?tid-727.html |
|
|
|
Store in local file as a format of "url code" |
|
""" |
|
|
|
import re |
|
import urllib2 |
|
import gzip |
|
import cookielib |
|
import codecs |
|
import cStringIO |
|
|
|
cookie_file = 'cookie.txt' |
|
cookie = cookielib.MozillaCookieJar(cookie_file) |
|
handler = urllib2.HTTPCookieProcessor(cookie) |
|
opener = urllib2.build_opener(handler) |
|
opener.addheaders = [ |
|
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'), |
|
('Accept-Encoding', 'gzip, deflate, sdch'), |
|
('Accept-Language', 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4'), |
|
('Connection', 'keep-alive'), |
|
('Host', 'www.ftoow.com'), |
|
('User-Agent', |
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/44.0.2403.89 Chrome/44.0.2403.89 Safari/537.36') |
|
] |
|
urllib2.install_opener(opener) |
|
|
|
|
|
def get_source(url): |
|
res = urllib2.urlopen(url) |
|
cookie.save(cookie_file) |
|
unziped = gzip.GzipFile(fileobj=cStringIO.StringIO(res.read())) |
|
return unziped.read().decode('gbk') |
|
|
|
|
|
source = get_source('http://www.ftoow.com/read.php?tid-727.html') |
|
match = re.findall(u'\"(http://www.ftoow.com/read.php?.*?)\"', source) |
|
|
|
f = codecs.open('result.txt', 'w', 'gbk') |
|
for url in match: |
|
s = get_source(url) |
|
pattern = u'\"(http://pan.baidu.com/.*?)\".*?百度网盘.*?密码:(\w{4})' |
|
m = re.search(pattern, s) |
|
if m is not None: |
|
data = u'%s %s\n' % (m.group(1), m.group(2)) |
|
f.write(data) |
|
print data |
|
|
|
f.close() |