Skip to content

Instantly share code, notes, and snippets.

@superalsrk
Forked from wilbeibi/gist:5054796
Last active December 14, 2015 07:59
Show Gist options
  • Save superalsrk/5054852 to your computer and use it in GitHub Desktop.
Save superalsrk/5054852 to your computer and use it in GitHub Desktop.
douban CAPTCHA Download
# Grab captcha from douban login page
import urllib
import urllib2
import requests
import re
url = 'http://www.douban.com/accounts/login'
local = '/home/wilbeibi/Dropbox/Papers/Materials/Pool/douban/do'
i = 0
name = ""
while i < 200:
page = urllib.urlopen(url).read()
pat = "http:\/\/www\.douban\.com\/misc\/captcha\?id=\w{24,26}\&amp;size=s"
pic_url=re.search(pat,page).group(0)
pic_url=pic_url[:-11]+";size=s"
name = str(i) + ".jpg"
urllib.urlretrieve(pic_url,local + name)
print "Grabbing %s successful!" %name
i += 1
@superalsrk
Copy link
Author

from pyquery import PyQuery as pq
d = pq('ww.douban.com/accounts/login')
print d('#captcha_image').attr('src')

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment