Skip to content

Instantly share code, notes, and snippets.

@ki38sato
Created December 27, 2015 12:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ki38sato/7f2b1a81423a55a0bbef to your computer and use it in GitHub Desktop.
Save ki38sato/7f2b1a81423a55a0bbef to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import requests
import time
import json
from StringIO import StringIO
import re
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(level=logging.INFO)
MACKEREL_APIKEY = os.environ.get("MACKEREL_APIKEY")
#MONITOR_NAME = "Filesystem %"
MONITOR_ID = "2zumT77N8sh"
BASEURL = "https://mackerel.io"
SAVE_FILE = "/tmp/processed_alerts.txt"
SLACK_TOKEN = os.environ.get("SLACK_TOKEN")
SLACK_URL = 'https://slack.com/api/chat.postMessage'
epoch_time = int(time.time())
duration = 60 * 60 * 1
THRESHOLD = 1
ALERT_THRESHOLD = 60 * 60 * 6
def fetch_alert_targets():
all_alerts = fetch_alerts()
target_alerts = select_target_alerts(all_alerts)
processed_alerts = fetch_processed_alerts()
unprocessed_alerts = select_unprocessed_alerts(target_alerts, processed_alerts)
save_new_alerts(target_alerts)
return [alert['hostId'] for alert in unprocessed_alerts]
def fetch_alerts():
headers = {'X-Api-Key': MACKEREL_APIKEY, 'Content-Type': 'application/json'}
r = requests.get(BASEURL+"/api/v0/alerts", headers=headers)
alerts = json.load(StringIO(r.content))
time.sleep(2.0)
return alerts['alerts']
def select_target_alerts(all_alerts):
return [ alert for alert in all_alerts if alert['status'] == 'WARNING' \
and alert['monitorId'] == MONITOR_ID \
and alert['type'] == 'host' \
and 'closedAt' not in alert ]
def fetch_processed_alerts():
with open(SAVE_FILE, 'r') as f:
lines = f.readlines()
logger.debug(lines)
return lines[0].split(';') if lines else lines
def select_unprocessed_alerts(target_alerts, processed_alerts):
return [ alert for alert in target_alerts if alert['id'] not in processed_alerts ]
def save_new_alerts(target_alerts):
os.remove(SAVE_FILE)
with open(SAVE_FILE, 'w') as f:
f.writelines(';'.join([ alert['id'] for alert in target_alerts ]))
def prepare_targets(host_ids):
dat = {}
for host_id in host_ids:
host = find_host(host_id)
if not dat.has_key(host['name']):
dat[host['name']] = {'id': host['id']}
meta = host['meta']
if meta.has_key('filesystem'):
dat[host['name']]['filesystem'] = meta['filesystem'].keys()
return dat
def find_host(host_id):
headers = {'X-Api-Key': MACKEREL_APIKEY, 'Content-Type': 'application/json'}
r = requests.get("{0}/api/v0/hosts/{1}".format(BASEURL, host_id), headers=headers)
res = json.load(StringIO(r.content))
return res['host']
def import_df(metrics, size):
# {u'metrics': [{u'value': 6.85, u'time': 1450575540}, {u'value': 7.03, u'time': 1450575600}]}
times = []
values = []
for metric in metrics:
times.append(metric['time'] - (epoch_time - duration))
values.append(metric['value'])
df = pd.DataFrame({'time': times, 'value': values})
#plt.plot(times, values, 'bo')
model = pd.ols(y=df['value'], x=df['time'], intercept=True)
#print model
#plt.plot(model.x['x'], model.y_fitted, 'g-')
#plt.hlines([size, size * 0.8], times[0], times[-1], linestyles="dashed")
#plt.show()
return model
def fetch_metrics(hostid, name, time_from, time_to):
payload = {'name': name, 'from': time_from, 'to': time_to}
headers = {'X-Api-Key': MACKEREL_APIKEY}
r = requests.get(BASEURL+"/api/v0/hosts/"+hostid+"/metrics", params=payload, headers=headers)
time.sleep(2.0)
metrics = json.load(StringIO(r.content))
if metrics.has_key('metrics'):
return metrics['metrics']
else:
return []
def human_readable_size(s):
s = float(s)
if s > 10**12:
return "%.2f TB" % (s / 10**12)
if s > 10**9:
return "%.2f GB" % (s / 10**9)
if s > 10**6:
return "%.2f MB" % (s / 10**6)
if s > 10**3:
return "%.2f MB" % (s / 10**3)
return "%d B" % s
def human_readable_time(s):
r = ""
unit = [("d", 60*60*24),("h", 60*60),("m", 60)]
for u in unit:
if s >= u[1]:
i = int(s / u[1])
r = r + "%s%s" % (i, u[0])
s = s - i * u[1]
return "%s%fs" % (r, s)
def process_targets(dat):
res = []
num = 0
for hostname in dat.keys():
num = num + 1
logger.info('fetching %s (%d/%d)', hostname, num, len(dat))
record = {'hostname': hostname, 'fs': [], 'nearest': sys.maxint}
if not dat[hostname].has_key('filesystem'):
continue
for fs in dat[hostname]['filesystem']:
(fs, count) = re.subn(r"^/dev/", "", fs)
fs = fs.replace('/', '_') # for vagrant
if count == 0:
continue
metric_name = "filesystem." + fs + ".used"
metrics_size = fetch_metrics(dat[hostname]['id'], "filesystem."+ fs +".size", epoch_time, epoch_time - 60 * 10)
if len(metrics_size) == 0:
logger.info("skip the fs: %s" % (metric_name))
continue
metrics = fetch_metrics(dat[hostname]['id'], metric_name, epoch_time, epoch_time - duration)
size = 0
for m in metrics_size:
size = max(size, m['value'])
latest = 0
for m in metrics[::-1]:
if m['value'] > 0:
latest = m['value']
break
model = import_df(metrics, size)
x = model.beta.x
i = model.beta.intercept
logger.debug("metric:%s, x: %f, intercept: %f, size: %s, current: %s" % (metric_name, x, i, size, latest))
if x > 0:
target = size * THRESHOLD
e = (target-i)/x
record['fs'].append({'name': fs, 'latest': latest, 'size': size, 'x': x, 'i': i, 'fullin': e})
record['nearest'] = min(record['nearest'], e)
logger.debug("fs: %s, size: %s/%s, estimated %s" % (
fs,
human_readable_size(latest),
human_readable_size(size),
human_readable_time(e)))
else:
record['fs'].append({'name': fs, 'latest': latest, 'size': size, 'x': x, 'i': i})
logger.debug("fs: %s, used size is reducing." % (fs))
res.append(record)
return res
def output_results(res):
res = sorted(res, key=lambda x: x['nearest'])
for host in res:
for fs in sorted(host['fs'], key=lambda x: x['fullin'] if x.has_key('fullin') else sys.maxint):
if fs.has_key('fullin'):
mension = " <!channel>" if fs['fullin'] < ALERT_THRESHOLD else ""
text = "{0}:{1}, size: {2}/{3}, full in {4} {5}".format(
host['hostname'], fs['name'],
human_readable_size(fs['latest']),
human_readable_size(fs['size']),
human_readable_time(fs['fullin']),
mension)
post2slack(text)
else:
text = "{0}:{1}, size: {2}/{3}, never full".format(
host['hostname'], fs['name'],
human_readable_size(fs['latest']),
human_readable_size(fs['size']))
post2slack(text)
def post2slack(text):
post2slack_raw(SLACK_TOKEN, SLACK_URL, text, "#alert", "life_checker", ':police_car:')
def post2slack_raw(token, url, text, channel, user, emoji):
payload = {
'token': token,
'channel': channel,
'username': user,
'text': text,
'icon_emoji': emoji
}
result = requests.post(url, data=payload).json()
if not result['ok']:
return "Failed: %s" % result['error']
else:
return "Success"
if __name__ == "__main__":
targets = fetch_alert_targets()
logger.debug(targets)
if targets:
dat = prepare_targets(targets)
res = process_targets(dat)
output_results(res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment