Skip to content

Instantly share code, notes, and snippets.

@snopoke
Forked from javierwilson/report.py
Last active August 24, 2017 11:21
Show Gist options
  • Save snopoke/71b44c9cb53be856634ebc126475eb05 to your computer and use it in GitHub Desktop.
Save snopoke/71b44c9cb53be856634ebc126475eb05 to your computer and use it in GitHub Desktop.
Generates CSV for last week cpu, mem and disk utilization
#!/usr/bin/env python
import datetime
import os
from datadog import initialize, api
from collections import defaultdict
options = {
'api_key': os.getenv('DD_API_KEY'),
'app_key': os.getenv('DD_APP_KEY'),
}
initialize(**options)
def normalize_series(series):
for serie in series:
vm = serie['scope']
vm = vm[vm.rfind(':')+1:]
pointlist = serie['pointlist']
list1 = filter(None, (x[1] for x in pointlist))
yield vm, list1
def dd_query(metric, environment, start_time, end_time, extra_filters=None):
vm_data = defaultdict(dict)
filters = []
env_filter = 'environment:%s' % environment
if extra_filters:
filters = [
','.join([env_filter, extra_filter])
for extra_filter in extra_filters
]
else:
filters = [env_filter]
for filter in filters:
for aggregate in ['avg', 'max']:
query = '%s:%s{%s}by{host}' % (aggregate, metric, filter)
print 'Fetching data:', query
results = api.Metric.query(start=start_time, end=end_time, query=query)
for vm, datapoints in normalize_series(results['series']):
vm_data[vm][aggregate] = datapoints
return vm_data
def transform_list(list1, percent=False, invert=False):
if percent:
list1 = [100*x for x in list1]
if invert:
list1 = [100-x for x in list1]
return list1
def get_metric_values(metric_name, vm_data, percent=False, invert=False):
metric_data = defaultdict(dict)
for vm, data in vm_data.items():
list_avg = transform_list(data['avg'], percent, invert)
len_avg = len(list_avg)
if len_avg:
sum1 = sum(list_avg)
avg1 = sum1 / len_avg
else:
avg1 = 0
metric_data[vm]['{}_avg'.format(metric_name)] = avg1
list_max = transform_list(data['max'], percent, invert)
if list_max:
max1 = max(list_max)
else:
max1 = 0
metric_data[vm]['{}_max'.format(metric_name)] = max1
return metric_data
def merge_data(combined, metric_data):
for vm, values in metric_data.items():
combined[vm].update(values)
return combined
def print_csv(combined_data):
headers = []
metric_names = ['DISK', 'CPU', 'RAM']
for metric in metric_names:
headers.append("VM Name,Weekly Avg ({0}),Weekly Max ({0})".format(metric))
print ','.join(headers)
for vm in sorted(combined_data):
vm_short = vm.split('.')[0]
row = []
for metric in metric_names:
row.append('{{vm}},{{{0}_avg:.0f}},{{{0}_max:.0f}}'.format(metric))
try:
print ','.join(row).format(vm=vm_short, **combined_data[vm])
except KeyError as e:
print 'Error printing data for VM: {}.'.format(vm)
raise
# get time range
today = datetime.date.today()
start_time = (datetime.datetime.combine(today - datetime.timedelta(days=today.weekday(), weeks=1), datetime.time.min) - datetime.datetime(1970, 1, 1)).total_seconds()
end_time = start_time + (3600 * 24 * 7)
# sets vars
environment = 'icds'
disk_usage_filters = [
'device:/dev/mapper/consolidated-data1',
'host:celery1.internal-icds.commcarehq.org,device:/dev/sda1',
'host:nic-tableau.commcarehq.org,device:g:',
]
cpu = dd_query('system.cpu.idle', environment, start_time, end_time)
disk = dd_query('system.disk.in_use', environment, start_time, end_time, extra_filters=disk_usage_filters)
mem = dd_query('system.mem.pct_usable', environment, start_time, end_time)
combined = defaultdict(dict)
merge_data(combined, get_metric_values('CPU', cpu, invert=True))
merge_data(combined, get_metric_values('DISK', disk, percent=True))
merge_data(combined, get_metric_values('RAM', mem, percent=True, invert=True))
print_csv(combined)
# TODO: add in trend of disk usage over last month
# sum:((system.disk.used * 100) / system.disk.total){environment:icds}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment