Created
July 28, 2014 22:10
-
-
Save risuoku/324ba24a8f5ed8de1ff8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
""" | |
An example collector that verifies the answer to life, the universe, and | |
everything does not change. | |
#### Dependencies | |
* twitter access token | |
* stapi | |
""" | |
import diamond.collector | |
import logging, pytz, time, sys, functools | |
sys.path.append('/home/risuo/local/projects/risuo-diamond-collecter/lib/stapi') | |
from stapi.api import API | |
from stapi.error import StError | |
class TwitterCollector(diamond.collector.Collector): | |
def get_default_config_help(self): | |
config_help = super(TwitterCollector, self).get_default_config_help() | |
config_help.update({ | |
}) | |
return config_help | |
def get_default_config(self): | |
""" | |
Returns the default collector settings | |
""" | |
config = super(TwitterCollector, self).get_default_config() | |
config.update({ | |
'enabled': 'True', | |
'path': 'twitter', | |
'interval': '60' | |
}) | |
return config | |
def collect(self): | |
""" | |
Overrides the Collector.collect method | |
""" | |
# get api object | |
self.api = API() | |
# Set Metric Name | |
metric_name = "twitter" | |
COUNT = 200 | |
MAX_PAGE = 3200/COUNT | |
LIST_IDS = [ | |
859513, | |
10539722, | |
103916053, | |
82939670, | |
82936090, | |
82933898, | |
20851112 | |
] | |
INTERVAL = int(self.config['interval']) | |
# running | |
result_list = map( | |
functools.partial( | |
self._get_single_result, | |
max_page = MAX_PAGE, | |
count = COUNT, | |
interval = INTERVAL | |
), | |
LIST_IDS | |
) | |
# Publish Metric | |
for n in xrange(len(LIST_IDS)): | |
self.publish( | |
'list.%s.total%s'%(result_list[n]['slug'], n), | |
result_list[n]['tweet_count'] | |
) | |
self.publish( | |
'list.%s.per-100user%s'%(result_list[n]['slug'], n), | |
result_list[n]['tweet_count_per_user']*100, | |
precision=2 | |
) | |
### private | |
def _get_single_result(self, list_id=None, max_page=1, count=None, interval=300): | |
# timestamp sequence | |
timestamp_seq = list() | |
for p in xrange(max_page): | |
try: | |
r = self.api.list_timeline(list_id=list_id, page=1+p, count=count) | |
timestamp_seq += map( | |
self._format_time, | |
map( | |
lambda s: s.created_at, | |
r | |
) | |
) | |
if timestamp_seq[len(timestamp_seq)-1] < time.time()-interval: | |
timestamp_seq = filter( | |
lambda s: s > time.time()-interval, | |
timestamp_seq | |
) | |
break | |
except StError, e: | |
logging.info(e.reason+'.. waiting') | |
time.sleep(180) | |
# get list info | |
s = self.api.get_list(list_id=list_id) | |
member_count = s.member_count | |
slug = s.slug | |
# result | |
return { | |
'tweet_count':len(timestamp_seq), | |
'tweet_count_per_user':float(len(timestamp_seq))/member_count, | |
'slug':slug | |
} | |
def _format_time(self, s): | |
return long( | |
time.mktime( | |
pytz.utc.localize( | |
s | |
).astimezone( | |
pytz.timezone('Asia/Tokyo') | |
).timetuple() | |
) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment