Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import requests
from db import db
"""
The Douban Group API which not display on http://developers.douban.com/wiki/?title=api_v2
Base url: https://api.douban.com/v2
Group info: /group/:id
Group topics: /group/:id/topics
Group comments: /group/topic/:id/comments
REF: http://www.douban.com/group/topic/33507002/
"""
base_url = 'http://api.douban.com/v2/group/%s/topics'
PER_PAGE_COUNT = 100
def real_fetch(group_id, start=0):
headers = {
'Host': 'api.douban.com',
'Referer':'api.douban.com',
'Cookie': '',
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36',
'Connection': 'keep-alive',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':'zh-cn,zh;q=0.5',
'Accept-Charset':'GB2312,utf-8;q=0.7,*;q=0.7',
}
params = {'start': start, 'count': PER_PAGE_COUNT}
url = base_url % group_id
r = requests.get(url, params=params, headers=headers)
ret = r.json()
for topic in ret['topics']:
db.topic.insert(group_id = group_id,
title=topic['title'],
content=topic['content'],
url=topic['alt'],
created=topic['created'])
db.flush()
def fetch(group_id):
for i in range(10):
print i
real_fetch(group_id, i * PER_PAGE_COUNT)
group_ids = ['zhufang', 'xiaotanzi']
if __name__ == '__main__':
for group_id in group_ids:
print group_id
fetch(group_id)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.