Skip to content

Instantly share code, notes, and snippets.

@dhaikney
Created February 24, 2015 11:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dhaikney/b11aa418384cdc37db7a to your computer and use it in GitHub Desktop.
Save dhaikney/b11aa418384cdc37db7a to your computer and use it in GitHub Desktop.
Basic python script to show how Couchbase's stats REST endpoint can be used to garner statistics over time
#!/usr/bin/env python
# Initial version - 24-Feb 2015
# Monitor interesting Couchbase statistics over a period of time
# Stats are captured via calling the REST endpoint which provides individual
# stats for each node. These are then post-processed to provide cluster-wide
# totals before being output to the console.
import json
from collections import defaultdict
import sys
import urllib2
from datetime import datetime
import time
# List of stats to collect and data structures to capture them in
interesting_stats = ["cmd_set", "cmd_get","ep_queue_size", "ep_flusher_todo", "curr_items"]
stats_dict = defaultdict(dict)
cluster_totals = defaultdict(dict)
timestamps = list()
# Avoid any duplicate samples by checking for the first unique timestamp
def getTimestampIndex(response_timestamps):
index = 0
global timestamps
while response_timestamps[index] in timestamps:
index += 1
timestamps += response_timestamps[index:]
return index
# Collect each stat for n iterations by calling the REST endpoint
# Loop through for n iterations.
# Output is stats_dict containing unique samples for each node for each statistic
def collectStats(bucket, iterations):
global timestamps
for i in range (0,iterations):
index = 0
updated_timestamps = False
for stat in interesting_stats:
stats_url = "http://localhost:8091/pools/default/buckets/" + bucket + "/stats/"+ stat + "?zoom=minute"
response = urllib2.urlopen(stats_url).read()
response_dict = json.loads(response)
if not updated_timestamps:
index = getTimestampIndex(response_dict['timestamp'])
updated_timestamps = True
for node_name,stats_list in response_dict['nodeStats'].iteritems():
stats_dict[stat][node_name] += stats_list[index:]
# Unless this is the final iteration, sleep until ready to capture again
# NB - sleep is for 58 seconds NOT 60 to allow for sampling time.
if i + 1 < iterations:
time.sleep(58)
# Sum the stats from each node to create a cluster-wide total
def processStats():
for stat in interesting_stats:
list_of_lists=[]
for node, stats_list in stats_dict[stat].iteritems():
list_of_lists.append(stats_list)
cluster_totals[stat] = [sum(item) for item in zip(*list_of_lists)]
# More complex stats can be attained by combining stats.
# e.g. here we calculate the disk-write-queue size from two constituents
cluster_totals['disk write queue'] = [sum(item) for item in zip(cluster_totals['ep_flusher_todo'], cluster_totals['ep_queue_size'])]
# Loop through all stats
def outputStats():
global timestamps
for stat in cluster_totals:
print stat
for i in range(0,len(timestamps)):
dt = datetime.fromtimestamp(timestamps[i]/1000)
human_time = dt.strftime('%Y-%m-%d %H:%M:%S')
# print timestamps[stat][i],cluster_totals[stat][i]
print human_time, cluster_totals[stat][i]
# Main function - check arguments, initialise dictionaries and invoke 3 tasks
if __name__ == "__main__":
if len(sys.argv) != 3:
print "Usage: " + sys.argv[0] + " <bucket_name> <iterations>"
sys.exit()
bucket = sys.argv[1]
iterations = int(sys.argv[2])
for stat in interesting_stats:
stats_dict[stat] = defaultdict(list)
collectStats(bucket, iterations)
processStats()
outputStats()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment