seckcoder/async_fetch_weibo.py

## async_fetch_weibo.py
import math
import gevent
import gevent.greenlet
from gevent import monkey
from functools import partial
monkey.patch_all()
from weibo import APIClient
import time

class ActionCounter:
    def __init__(self, count=0, finished=0):
        self.count = count
        self.finished = finished


def main(mid, access_token):
    client = APIClient(None, None)
    client.set_access_token(access_token, time.time() + 3600 * 60)
    weibo_id = client.get.statuses__queryid(
        access_token=access_token, mid=mid, type=1, isBase62=1)['id']
    root_tweet = client.get.statuses__show(access_token=access_token, id=weibo_id)
    nreposts_per_page = 200
    def get_reposts(page, callback=None):
        try:
            reply = client.get.statuses__repost_timeline(
                id=weibo_id, count=nreposts_per_page, page=page)
            if callback:
                callback(reply)
        except Exception, e:
            print "Exception %s happended" % e
    tweets = {}
    def crawl_callback(depth, reply):
        if reply and reply.has_key("reposts"):
            for repost in reply["reposts"]:
                if add_tweet(repost):
                    crawl_reposts(repost, depth + 1)
    def crawl_reposts(tweet, depth):
        if depth >= 5:
            return
        if not (tweet and tweet.has_key("reposts_count")):
            return
        total_reposts_count = tweet.reposts_count
        page_num = int(math.ceil(float(total_reposts_count) / nreposts_per_page))
        #print tweet.user.screen_name, page_num
        if page_num > 0 and total_reposts_count >= 2:
            # Instead of using link of Greenlet, I use callback directly since spawning greenlet in the link seems not work
            gs = [gevent.spawn(get_reposts, page, partial(crawl_callback, depth)) for page in xrange(1, page_num+1)]
            gevent.joinall(gs)
    def add_tweet(tweet):
        if not tweets.has_key(tweet.id) and tweet.has_key("reposts_count"):
            tweets[tweet.id] = {
                "id": tweet.id,
                "text": tweet.text,
                "reposts_count": tweet.reposts_count
            }
            return True
        return False
    crawl_reposts(root_tweet, 1)
    return tweets
if __name__ == "__main__":
    try:
        now = time.time()
        mid = "zbQaYdw5V"
        access_token = "xxx"
        print "This will take some time..."
        tweets = main(mid, access_token)
        print "Total number of tweets:", len(tweets)
        print "Take %s seconds" % (time.time() - now)
    except Exception, e:
        print e
	import math
	import gevent
	import gevent.greenlet
	from gevent import monkey
	from functools import partial
	monkey.patch_all()
	from weibo import APIClient
	import time

	class ActionCounter:
	def __init__(self, count=0, finished=0):
	self.count = count
	self.finished = finished


	def main(mid, access_token):
	client = APIClient(None, None)
	client.set_access_token(access_token, time.time() + 3600 * 60)
	weibo_id = client.get.statuses__queryid(
	access_token=access_token, mid=mid, type=1, isBase62=1)['id']
	root_tweet = client.get.statuses__show(access_token=access_token, id=weibo_id)
	nreposts_per_page = 200
	def get_reposts(page, callback=None):
	try:
	reply = client.get.statuses__repost_timeline(
	id=weibo_id, count=nreposts_per_page, page=page)
	if callback:
	callback(reply)
	except Exception, e:
	print "Exception %s happended" % e
	tweets = {}
	def crawl_callback(depth, reply):
	if reply and reply.has_key("reposts"):
	for repost in reply["reposts"]:
	if add_tweet(repost):
	crawl_reposts(repost, depth + 1)
	def crawl_reposts(tweet, depth):
	if depth >= 5:
	return
	if not (tweet and tweet.has_key("reposts_count")):
	return
	total_reposts_count = tweet.reposts_count
	page_num = int(math.ceil(float(total_reposts_count) / nreposts_per_page))
	#print tweet.user.screen_name, page_num
	if page_num > 0 and total_reposts_count >= 2:
	# Instead of using link of Greenlet, I use callback directly since spawning greenlet in the link seems not work
	gs = [gevent.spawn(get_reposts, page, partial(crawl_callback, depth)) for page in xrange(1, page_num+1)]
	gevent.joinall(gs)
	def add_tweet(tweet):
	if not tweets.has_key(tweet.id) and tweet.has_key("reposts_count"):
	tweets[tweet.id] = {
	"id": tweet.id,
	"text": tweet.text,
	"reposts_count": tweet.reposts_count
	}
	return True
	return False
	crawl_reposts(root_tweet, 1)
	return tweets
	if __name__ == "__main__":
	try:
	now = time.time()
	mid = "zbQaYdw5V"
	access_token = "xxx"
	print "This will take some time..."
	tweets = main(mid, access_token)
	print "Total number of tweets:", len(tweets)
	print "Take %s seconds" % (time.time() - now)
	except Exception, e:
	print e