Skip to content

Instantly share code, notes, and snippets.

@seckcoder
Created July 15, 2013 10:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save seckcoder/5999150 to your computer and use it in GitHub Desktop.
Save seckcoder/5999150 to your computer and use it in GitHub Desktop.
# Copyright 2013 Jike Inc. All Rights Reserved.
# Author: liwei@jike.com
from gevent import monkey
monkey.patch_all()
from gevent.pool import Pool
import time
from weibo_offline_base.ttypes import PlatForm
from hbase_rabbitMQ_interface import HbaseRabbitMQ
from utils import thrift_utils
class TrainDataCrawler(object):
def crawl_by_user(self, uid, platform):
pass
def crawl_by_weibo(self, weibo_id, platform):
pass
class TrainDataCrawlerFromHBase(TrainDataCrawler):
def __init__(self, host, port):
self.client_maker = thrift_utils.ThriftClientMaker(host,
port,
HbaseRabbitMQ.Client)
def crawl_by_user(self, uid, platform, time_delta):
with self.client_maker.thrift_client() as client:
moos = client.getUserWeiboInfoListTs(uid,
platform,
time.time() - time_delta)
return moos
def crawl_by_weibo(self, weibo_id, platform):
with self.client_maker.thrift_client() as client:
moo = client.getWeiboInfo(weibo_id, platform)
crawler = TrainDataCrawlerFromHBase("127.0.0.1", 8080)
def crawl_from_userids(uids, platform=PlatForm.SINA, time_delta=10, link=None):
pool = Pool(10)
for uid in uids:
gs = pool.spawn(crawler.crawl_by_user, uid=uid,
platform=platform,
time_delta=time_delta)
gs.link(link)
pool.join()
def crawl_from_weiboids(weiboids, platform=PlatForm.SINA, link=None):
pass
if __name__ == '__main__':
def link(gs):
if not gs.ready():
raise RuntimeError("Something unexpected")
if gs.successful():
print gs.value
else:
print gs.exception
crawl_from_userids([1, 2, 3], 1, 1, link)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment