Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@zippera
Created July 27, 2013 13:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zippera/6094881 to your computer and use it in GitHub Desktop.
Save zippera/6094881 to your computer and use it in GitHub Desktop.
# coding: utf-8
import urllib2
import urllib
import re
pat = re.compile(r'<div.*?class="content".*?ti.*?>(.*?)</div>', re.S)#有问题
nexturl1 = "http://www.qiushibaike.com/month/page/"
nexturl2 = "?s=4582723&slow"
count = 1
while count < 2:
print "Page " + str(count) + "\n"
myurl = nexturl1 + str(count) + nexturl2
print myurl + "\n"
myres = urllib2.urlopen(myurl)
mypage = myres.read()
ucpage = mypage.decode("utf-8") #转码
mat = pat.findall(ucpage)
count += 1;
if len(mat):
cnt = 1
for item in mat:
print 'No. '+ str(cnt) + "\n"
print item + "\n"
cnt += 1
f = open('qb.txt', 'w+')
item = item.replace(' ','').replace('\n','').replace('\r','')
f.writelines(item)
f.close()
else:
print "no data"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment