dhaikney/cb_monitor.py

## cb_monitor.py
#!/usr/bin/env python

# Initial version - 24-Feb 2015
# Monitor interesting Couchbase statistics over a period of time
# Stats are captured via calling the REST endpoint which provides individual
# stats for each node. These are then post-processed to provide cluster-wide
# totals before being output to the console.
import json
from collections import defaultdict
import sys
import urllib2
from datetime import datetime
import time

# List of stats to collect and data structures to capture them in
interesting_stats = ["cmd_set", "cmd_get","ep_queue_size", "ep_flusher_todo", "curr_items"]
stats_dict        = defaultdict(dict)
cluster_totals    = defaultdict(dict)
timestamps        = list()


# Avoid any duplicate samples by checking for the first unique timestamp
def getTimestampIndex(response_timestamps):
  index = 0
  global timestamps
  while  response_timestamps[index] in timestamps:
    index += 1
  timestamps += response_timestamps[index:]
  return index


# Collect each stat for n iterations by calling the REST endpoint
# Loop through for n iterations.
# Output is stats_dict containing unique samples for each node for each statistic
def collectStats(bucket, iterations):
  global timestamps
  for i in range (0,iterations):

    index = 0
    updated_timestamps = False

    for stat in interesting_stats:
      stats_url = "http://localhost:8091/pools/default/buckets/" + bucket + "/stats/"+ stat + "?zoom=minute"
      response = urllib2.urlopen(stats_url).read()
      response_dict = json.loads(response)

      if not updated_timestamps:
        index = getTimestampIndex(response_dict['timestamp'])
        updated_timestamps = True

      for node_name,stats_list in response_dict['nodeStats'].iteritems():
        stats_dict[stat][node_name] += stats_list[index:]

    # Unless this is the final iteration, sleep until ready to capture again
    # NB - sleep is for 58 seconds NOT 60 to allow for sampling time.
    if i + 1 < iterations:
      time.sleep(58)


# Sum the stats from each node to create a cluster-wide total
def processStats():
  for stat in interesting_stats:
    list_of_lists=[]
    for node, stats_list in stats_dict[stat].iteritems():
      list_of_lists.append(stats_list)
    cluster_totals[stat] = [sum(item) for item in zip(*list_of_lists)]

  # More complex stats can be attained by combining stats.
  # e.g. here we calculate the disk-write-queue size from two constituents
  cluster_totals['disk write queue'] = [sum(item) for item in zip(cluster_totals['ep_flusher_todo'], cluster_totals['ep_queue_size'])]


# Loop through all stats
def outputStats():
  global timestamps
  for stat in cluster_totals:
    print stat
    for i in range(0,len(timestamps)):
      dt = datetime.fromtimestamp(timestamps[i]/1000)
      human_time = dt.strftime('%Y-%m-%d %H:%M:%S')

      # print timestamps[stat][i],cluster_totals[stat][i]
      print human_time, cluster_totals[stat][i]


# Main function - check arguments, initialise dictionaries and invoke 3 tasks
if __name__ == "__main__":
  if len(sys.argv) != 3:
  	print "Usage: " + sys.argv[0] + " <bucket_name> <iterations>"
  	sys.exit()

  bucket = sys.argv[1]
  iterations = int(sys.argv[2])

  for stat in interesting_stats:
    stats_dict[stat] = defaultdict(list)

  collectStats(bucket, iterations)
  processStats()
  outputStats()
	#!/usr/bin/env python

	# Initial version - 24-Feb 2015
	# Monitor interesting Couchbase statistics over a period of time
	# Stats are captured via calling the REST endpoint which provides individual
	# stats for each node. These are then post-processed to provide cluster-wide
	# totals before being output to the console.
	import json
	from collections import defaultdict
	import sys
	import urllib2
	from datetime import datetime
	import time

	# List of stats to collect and data structures to capture them in
	interesting_stats = ["cmd_set", "cmd_get","ep_queue_size", "ep_flusher_todo", "curr_items"]
	stats_dict = defaultdict(dict)
	cluster_totals = defaultdict(dict)
	timestamps = list()


	# Avoid any duplicate samples by checking for the first unique timestamp
	def getTimestampIndex(response_timestamps):
	index = 0
	global timestamps
	while response_timestamps[index] in timestamps:
	index += 1
	timestamps += response_timestamps[index:]
	return index


	# Collect each stat for n iterations by calling the REST endpoint
	# Loop through for n iterations.
	# Output is stats_dict containing unique samples for each node for each statistic
	def collectStats(bucket, iterations):
	global timestamps
	for i in range (0,iterations):

	index = 0
	updated_timestamps = False

	for stat in interesting_stats:
	stats_url = "http://localhost:8091/pools/default/buckets/" + bucket + "/stats/"+ stat + "?zoom=minute"
	response = urllib2.urlopen(stats_url).read()
	response_dict = json.loads(response)

	if not updated_timestamps:
	index = getTimestampIndex(response_dict['timestamp'])
	updated_timestamps = True

	for node_name,stats_list in response_dict['nodeStats'].iteritems():
	stats_dict[stat][node_name] += stats_list[index:]

	# Unless this is the final iteration, sleep until ready to capture again
	# NB - sleep is for 58 seconds NOT 60 to allow for sampling time.
	if i + 1 < iterations:
	time.sleep(58)


	# Sum the stats from each node to create a cluster-wide total
	def processStats():
	for stat in interesting_stats:
	list_of_lists=[]
	for node, stats_list in stats_dict[stat].iteritems():
	list_of_lists.append(stats_list)
	cluster_totals[stat] = [sum(item) for item in zip(*list_of_lists)]

	# More complex stats can be attained by combining stats.
	# e.g. here we calculate the disk-write-queue size from two constituents
	cluster_totals['disk write queue'] = [sum(item) for item in zip(cluster_totals['ep_flusher_todo'], cluster_totals['ep_queue_size'])]


	# Loop through all stats
	def outputStats():
	global timestamps
	for stat in cluster_totals:
	print stat
	for i in range(0,len(timestamps)):
	dt = datetime.fromtimestamp(timestamps[i]/1000)
	human_time = dt.strftime('%Y-%m-%d %H:%M:%S')

	# print timestamps[stat][i],cluster_totals[stat][i]
	print human_time, cluster_totals[stat][i]


	# Main function - check arguments, initialise dictionaries and invoke 3 tasks
	if __name__ == "__main__":
	if len(sys.argv) != 3:
	print "Usage: " + sys.argv[0] + " <bucket_name> <iterations>"
	sys.exit()

	bucket = sys.argv[1]
	iterations = int(sys.argv[2])

	for stat in interesting_stats:
	stats_dict[stat] = defaultdict(list)

	collectStats(bucket, iterations)
	processStats()
	outputStats()