Skip to content

Instantly share code, notes, and snippets.

@taoyu
taoyu / txt_wrap_by.py
Created August 21, 2011 02:43
txt_wrap_by
import re
def txt_wrap_by(start_str, end, html):
start = html.find(start_str)
if start >= 0:
start += len(start_str)
end = html.find(end, start)
if end >= 0:
return html[start:end].strip()
def txt_wrap_by_all(start_str, end, html):
@taoyu
taoyu / douban_book.py
Created August 10, 2011 13:40
crawl books from douban
from BeautifulSoup import BeautifulSoup
import urllib2
import time
import Queue
start_id = 3288908
id_queue = Queue.Queue()
id_queue.put(start_id)
@taoyu
taoyu / renrengirl.py
Created July 31, 2011 12:44
download pictures from renrengirl.com
# -*- coding: utf-8 -*-
import urllib2
from BeautifulSoup import BeautifulSoup
import json, os, time, re
a = urllib2.urlopen('http://www.google.com/reader/api/0/stream/contents/feed/http://www.renrengirl.com/feed?n=1000').read()
a = json.loads(a)
items = a['items']
imglist = []
for item in items:
title = item['title']
@taoyu
taoyu / yupoo.py
Created July 31, 2011 12:42
crawl picture from yupoo
album_url = 'http://www.yupoo.com/photos/gonatafb/albums/1993305/'
album_dir = 'magicpeach'
import re, os, urllib2, time
i = 1
urllist = []
while(1):
print i
album = urllib2.urlopen(album_url+'page'+str(i)).read()
@taoyu
taoyu / renren.py
Created July 30, 2011 09:27
download an album from renren.com
#change the two variables below as your need
#then, run python renren.py, you got it
#you also need an empty folder called "download"
data = (('email', 'test@example.com'), ('password', '123456'), ('origURL', 'http://www.renren.com/Home.do'), ('domain', "renren.com"))
album_url = 'http://photo.renren.com/photo/249794035/album-484759819'
import cookielib