Skip to content

Instantly share code, notes, and snippets.

@wilbeibi
Created February 28, 2013 06:53
Show Gist options
  • Save wilbeibi/5054796 to your computer and use it in GitHub Desktop.
Save wilbeibi/5054796 to your computer and use it in GitHub Desktop.
# Grab captcha from douban login page
import urllib
import urllib2
import requests
import re
url = 'http://www.douban.com/accounts/login'
local = '/home/wilbeibi/Dropbox/Papers/Materials/Pool/douban/do'
i = 0
name = ""
while i < 200:
page = urllib.urlopen(url).read()
pat = "http:\/\/www\.douban\.com\/misc\/captcha\?id=\w{24,26}\&amp;size=s"
pic_url=re.search(pat,page).group(0)
pic_url=pic_url[:-11]+";size=s"
name = str(i) + ".jpg"
urllib.urlretrieve(pic_url,local + name)
print "Grabbing %s successful!" %name
i += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment