risuoku/twitter.py

## twitter.py
# coding=utf-8

"""
An example collector that verifies the answer to life, the universe, and
everything does not change.

#### Dependencies

 * twitter access token
 * stapi
"""

import diamond.collector
import logging, pytz, time, sys, functools

sys.path.append('/home/risuo/local/projects/risuo-diamond-collecter/lib/stapi')
from stapi.api import API
from stapi.error import StError

class TwitterCollector(diamond.collector.Collector):

  def get_default_config_help(self):
    config_help = super(TwitterCollector, self).get_default_config_help()
    config_help.update({
    })
    return config_help

  def get_default_config(self):
    """
    Returns the default collector settings
    """
    config = super(TwitterCollector, self).get_default_config()
    config.update({
      'enabled': 'True',
      'path':   'twitter',
      'interval': '60'
    })
    return config

  def collect(self):
    """
    Overrides the Collector.collect method
    """

    # get api object
    self.api = API()

    # Set Metric Name
    metric_name = "twitter"

    COUNT = 200
    MAX_PAGE = 3200/COUNT
    LIST_IDS = [
      859513,
      10539722,
      103916053,
      82939670,
      82936090,
      82933898,
      20851112
    ]
    INTERVAL = int(self.config['interval'])

    # running
    result_list = map(
      functools.partial(
        self._get_single_result,
          max_page = MAX_PAGE,
          count = COUNT,
          interval = INTERVAL
      ),
      LIST_IDS
    )

    # Publish Metric
    for n in xrange(len(LIST_IDS)):
      self.publish(
        'list.%s.total%s'%(result_list[n]['slug'], n),
        result_list[n]['tweet_count']
      )
      self.publish(
        'list.%s.per-100user%s'%(result_list[n]['slug'], n),
        result_list[n]['tweet_count_per_user']*100,
        precision=2
      )

  ### private
  def _get_single_result(self, list_id=None, max_page=1, count=None, interval=300):

    # timestamp sequence
    timestamp_seq = list()
    for p in xrange(max_page):
      try:
        r = self.api.list_timeline(list_id=list_id, page=1+p, count=count)
        timestamp_seq += map(
          self._format_time,
          map(
            lambda s: s.created_at,
            r
          )
        )
        if timestamp_seq[len(timestamp_seq)-1] < time.time()-interval:
          timestamp_seq = filter(
            lambda s: s > time.time()-interval,
            timestamp_seq
          )
          break
      except StError, e:
        logging.info(e.reason+'.. waiting')
        time.sleep(180)

    # get list info
    s = self.api.get_list(list_id=list_id)
    member_count = s.member_count
    slug = s.slug

    # result
    return {
      'tweet_count':len(timestamp_seq),
      'tweet_count_per_user':float(len(timestamp_seq))/member_count,
      'slug':slug
    }

  def _format_time(self, s):
    return long(
      time.mktime(
        pytz.utc.localize(
          s
        ).astimezone(
            pytz.timezone('Asia/Tokyo')
          ).timetuple()
      )
    )
	# coding=utf-8

	"""
	An example collector that verifies the answer to life, the universe, and
	everything does not change.

	#### Dependencies

	* twitter access token
	* stapi
	"""

	import diamond.collector
	import logging, pytz, time, sys, functools

	sys.path.append('/home/risuo/local/projects/risuo-diamond-collecter/lib/stapi')
	from stapi.api import API
	from stapi.error import StError

	class TwitterCollector(diamond.collector.Collector):

	def get_default_config_help(self):
	config_help = super(TwitterCollector, self).get_default_config_help()
	config_help.update({
	})
	return config_help

	def get_default_config(self):
	"""
	Returns the default collector settings
	"""
	config = super(TwitterCollector, self).get_default_config()
	config.update({
	'enabled': 'True',
	'path': 'twitter',
	'interval': '60'
	})
	return config

	def collect(self):
	"""
	Overrides the Collector.collect method
	"""

	# get api object
	self.api = API()

	# Set Metric Name
	metric_name = "twitter"

	COUNT = 200
	MAX_PAGE = 3200/COUNT
	LIST_IDS = [
	859513,
	10539722,
	103916053,
	82939670,
	82936090,
	82933898,
	20851112
	]
	INTERVAL = int(self.config['interval'])

	# running
	result_list = map(
	functools.partial(
	self._get_single_result,
	max_page = MAX_PAGE,
	count = COUNT,
	interval = INTERVAL
	),
	LIST_IDS
	)

	# Publish Metric
	for n in xrange(len(LIST_IDS)):
	self.publish(
	'list.%s.total%s'%(result_list[n]['slug'], n),
	result_list[n]['tweet_count']
	)
	self.publish(
	'list.%s.per-100user%s'%(result_list[n]['slug'], n),
	result_list[n]['tweet_count_per_user']*100,
	precision=2
	)

	### private
	def _get_single_result(self, list_id=None, max_page=1, count=None, interval=300):

	# timestamp sequence
	timestamp_seq = list()
	for p in xrange(max_page):
	try:
	r = self.api.list_timeline(list_id=list_id, page=1+p, count=count)
	timestamp_seq += map(
	self._format_time,
	map(
	lambda s: s.created_at,
	r
	)
	)
	if timestamp_seq[len(timestamp_seq)-1] < time.time()-interval:
	timestamp_seq = filter(
	lambda s: s > time.time()-interval,
	timestamp_seq
	)
	break
	except StError, e:
	logging.info(e.reason+'.. waiting')
	time.sleep(180)

	# get list info
	s = self.api.get_list(list_id=list_id)
	member_count = s.member_count
	slug = s.slug

	# result
	return {
	'tweet_count':len(timestamp_seq),
	'tweet_count_per_user':float(len(timestamp_seq))/member_count,
	'slug':slug
	}

	def _format_time(self, s):
	return long(
	time.mktime(
	pytz.utc.localize(
	s
	).astimezone(
	pytz.timezone('Asia/Tokyo')
	).timetuple()
	)
	)