Skip to content

Instantly share code, notes, and snippets.

@iamsk
Created September 2, 2014 12:15
Show Gist options
  • Save iamsk/1bc0760dfeeeb950f18e to your computer and use it in GitHub Desktop.
Save iamsk/1bc0760dfeeeb950f18e to your computer and use it in GitHub Desktop.
import requests
from db import db
"""
The Douban Group API which not display on http://developers.douban.com/wiki/?title=api_v2
Base url: https://api.douban.com/v2
Group info: /group/:id
Group topics: /group/:id/topics
Group comments: /group/topic/:id/comments
REF: http://www.douban.com/group/topic/33507002/
"""
base_url = 'http://api.douban.com/v2/group/%s/topics'
PER_PAGE_COUNT = 100
def real_fetch(group_id, start=0):
headers = {
'Host': 'api.douban.com',
'Referer':'api.douban.com',
'Cookie': '',
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36',
'Connection': 'keep-alive',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':'zh-cn,zh;q=0.5',
'Accept-Charset':'GB2312,utf-8;q=0.7,*;q=0.7',
}
params = {'start': start, 'count': PER_PAGE_COUNT}
url = base_url % group_id
r = requests.get(url, params=params, headers=headers)
ret = r.json()
for topic in ret['topics']:
db.topic.insert(group_id = group_id,
title=topic['title'],
content=topic['content'],
url=topic['alt'],
created=topic['created'])
db.flush()
def fetch(group_id):
for i in range(10):
print i
real_fetch(group_id, i * PER_PAGE_COUNT)
group_ids = ['zhufang', 'xiaotanzi']
if __name__ == '__main__':
for group_id in group_ids:
print group_id
fetch(group_id)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment