Skip to content

Instantly share code, notes, and snippets.

@upbit
Created December 10, 2015 12:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save upbit/b58c595e4b28085b4e75 to your computer and use it in GitHub Desktop.
Save upbit/b58c595e4b28085b4e75 to your computer and use it in GitHub Desktop.
QZone 说说抓取脚本
# -*- coding: utf-8 -*-
import os
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
sys.dont_write_bytecode = True
import time
import requests
from pyv8 import PyV8
def fetch_feeds_html(uin, start=0, count=10, prefix = 'dump'):
url = "http://ic2.qzone.qq.com/cgi-bin/feeds/feeds_html_act_all"
params = {
'hostuin': uin,
'start': start,
'count': count,
}
response = requests.get(url, params=params).text
with open(prefix + '_access.log', 'w+') as f:
f.write(response)
with PyV8.JSContext() as ctxt:
ctxt.eval("var data = null; function _Callback(obj) { data = obj; }")
ctxt.eval(open(prefix + '_access.log', 'rb').read())
friend_data = ctxt.locals.data.data.friend_data
for f in friend_data.keys():
friend = friend_data[f]
if not friend: continue
nickname = friend.nickname
with open(prefix + '.html', 'a+') as f:
f.write(friend.html.strip())
def main():
max_size = 4
page_size = 25
target = {
"1487921930": [ (i*page_size, page_size) for i in range(max_size) ],
"1248475740": [ (i*page_size, page_size) for i in range(max_size) ],
}
if not os.path.exists("./outputs/"):
os.mkdir("./outputs/")
for uin,ranges in target.items():
for (s,l) in ranges:
print(">> fetch %s [%s-%s] ..." % (uin, s, s+l))
fetch_feeds_html(uin, s, l, "./outputs/%s_dumps" % (uin))
time.sleep(1)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment