Skip to content

Instantly share code, notes, and snippets.

Created March 11, 2015 16:18
Show Gist options
  • Save anonymous/9cc4e94afb244f335fee to your computer and use it in GitHub Desktop.
Save anonymous/9cc4e94afb244f335fee to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
def PageCal(Page):
return (Page-1)*25
UrlBase0='http://book.douban.com/subject/'
UrlBase1='/reviews'
UrlBase2='?start='
def GetReviews(BookID):
Url=UrlBase0+BookID+UrlBase1
#<div class="paginator">
try:
Page=requests.get(Url)
except:
pass
else:
PageSoup=BeautifulSoup(Page.content)
PageNumber=PageSoup.find('div',{'class':'paginator'}).findAll('a')[-2].text
for i in range(1,int(PageNumber)+1):
#<div class="ctsh">
try:
nPage=requests.get(UrlBase0+BookID+UrlBase1+UrlBase2+str(PageCal(i)))
except:
pass
else:
nPageSoup=BeautifulSoup(nPage.content)
Comment=nPageSoup.findAll('div',{'class':'ctsh'})
for CommentItem in Comment:
print CommentItem
GetReviews('26317953')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment