Skip to content

Instantly share code, notes, and snippets.

@risuoku
Created July 28, 2014 22:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save risuoku/324ba24a8f5ed8de1ff8 to your computer and use it in GitHub Desktop.
Save risuoku/324ba24a8f5ed8de1ff8 to your computer and use it in GitHub Desktop.
# coding=utf-8
"""
An example collector that verifies the answer to life, the universe, and
everything does not change.
#### Dependencies
* twitter access token
* stapi
"""
import diamond.collector
import logging, pytz, time, sys, functools
sys.path.append('/home/risuo/local/projects/risuo-diamond-collecter/lib/stapi')
from stapi.api import API
from stapi.error import StError
class TwitterCollector(diamond.collector.Collector):
def get_default_config_help(self):
config_help = super(TwitterCollector, self).get_default_config_help()
config_help.update({
})
return config_help
def get_default_config(self):
"""
Returns the default collector settings
"""
config = super(TwitterCollector, self).get_default_config()
config.update({
'enabled': 'True',
'path': 'twitter',
'interval': '60'
})
return config
def collect(self):
"""
Overrides the Collector.collect method
"""
# get api object
self.api = API()
# Set Metric Name
metric_name = "twitter"
COUNT = 200
MAX_PAGE = 3200/COUNT
LIST_IDS = [
859513,
10539722,
103916053,
82939670,
82936090,
82933898,
20851112
]
INTERVAL = int(self.config['interval'])
# running
result_list = map(
functools.partial(
self._get_single_result,
max_page = MAX_PAGE,
count = COUNT,
interval = INTERVAL
),
LIST_IDS
)
# Publish Metric
for n in xrange(len(LIST_IDS)):
self.publish(
'list.%s.total%s'%(result_list[n]['slug'], n),
result_list[n]['tweet_count']
)
self.publish(
'list.%s.per-100user%s'%(result_list[n]['slug'], n),
result_list[n]['tweet_count_per_user']*100,
precision=2
)
### private
def _get_single_result(self, list_id=None, max_page=1, count=None, interval=300):
# timestamp sequence
timestamp_seq = list()
for p in xrange(max_page):
try:
r = self.api.list_timeline(list_id=list_id, page=1+p, count=count)
timestamp_seq += map(
self._format_time,
map(
lambda s: s.created_at,
r
)
)
if timestamp_seq[len(timestamp_seq)-1] < time.time()-interval:
timestamp_seq = filter(
lambda s: s > time.time()-interval,
timestamp_seq
)
break
except StError, e:
logging.info(e.reason+'.. waiting')
time.sleep(180)
# get list info
s = self.api.get_list(list_id=list_id)
member_count = s.member_count
slug = s.slug
# result
return {
'tweet_count':len(timestamp_seq),
'tweet_count_per_user':float(len(timestamp_seq))/member_count,
'slug':slug
}
def _format_time(self, s):
return long(
time.mktime(
pytz.utc.localize(
s
).astimezone(
pytz.timezone('Asia/Tokyo')
).timetuple()
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment