Created
July 15, 2013 10:58
-
-
Save seckcoder/5999150 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2013 Jike Inc. All Rights Reserved. | |
# Author: liwei@jike.com | |
from gevent import monkey | |
monkey.patch_all() | |
from gevent.pool import Pool | |
import time | |
from weibo_offline_base.ttypes import PlatForm | |
from hbase_rabbitMQ_interface import HbaseRabbitMQ | |
from utils import thrift_utils | |
class TrainDataCrawler(object): | |
def crawl_by_user(self, uid, platform): | |
pass | |
def crawl_by_weibo(self, weibo_id, platform): | |
pass | |
class TrainDataCrawlerFromHBase(TrainDataCrawler): | |
def __init__(self, host, port): | |
self.client_maker = thrift_utils.ThriftClientMaker(host, | |
port, | |
HbaseRabbitMQ.Client) | |
def crawl_by_user(self, uid, platform, time_delta): | |
with self.client_maker.thrift_client() as client: | |
moos = client.getUserWeiboInfoListTs(uid, | |
platform, | |
time.time() - time_delta) | |
return moos | |
def crawl_by_weibo(self, weibo_id, platform): | |
with self.client_maker.thrift_client() as client: | |
moo = client.getWeiboInfo(weibo_id, platform) | |
crawler = TrainDataCrawlerFromHBase("127.0.0.1", 8080) | |
def crawl_from_userids(uids, platform=PlatForm.SINA, time_delta=10, link=None): | |
pool = Pool(10) | |
for uid in uids: | |
gs = pool.spawn(crawler.crawl_by_user, uid=uid, | |
platform=platform, | |
time_delta=time_delta) | |
gs.link(link) | |
pool.join() | |
def crawl_from_weiboids(weiboids, platform=PlatForm.SINA, link=None): | |
pass | |
if __name__ == '__main__': | |
def link(gs): | |
if not gs.ready(): | |
raise RuntimeError("Something unexpected") | |
if gs.successful(): | |
print gs.value | |
else: | |
print gs.exception | |
crawl_from_userids([1, 2, 3], 1, 1, link) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment