Created
December 27, 2015 12:45
-
-
Save ki38sato/7f2b1a81423a55a0bbef to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import requests | |
import time | |
import json | |
from StringIO import StringIO | |
import re | |
import sys | |
import os | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import logging | |
logging.basicConfig() | |
logger = logging.getLogger(__name__) | |
logger.setLevel(level=logging.INFO) | |
MACKEREL_APIKEY = os.environ.get("MACKEREL_APIKEY") | |
#MONITOR_NAME = "Filesystem %" | |
MONITOR_ID = "2zumT77N8sh" | |
BASEURL = "https://mackerel.io" | |
SAVE_FILE = "/tmp/processed_alerts.txt" | |
SLACK_TOKEN = os.environ.get("SLACK_TOKEN") | |
SLACK_URL = 'https://slack.com/api/chat.postMessage' | |
epoch_time = int(time.time()) | |
duration = 60 * 60 * 1 | |
THRESHOLD = 1 | |
ALERT_THRESHOLD = 60 * 60 * 6 | |
def fetch_alert_targets(): | |
all_alerts = fetch_alerts() | |
target_alerts = select_target_alerts(all_alerts) | |
processed_alerts = fetch_processed_alerts() | |
unprocessed_alerts = select_unprocessed_alerts(target_alerts, processed_alerts) | |
save_new_alerts(target_alerts) | |
return [alert['hostId'] for alert in unprocessed_alerts] | |
def fetch_alerts(): | |
headers = {'X-Api-Key': MACKEREL_APIKEY, 'Content-Type': 'application/json'} | |
r = requests.get(BASEURL+"/api/v0/alerts", headers=headers) | |
alerts = json.load(StringIO(r.content)) | |
time.sleep(2.0) | |
return alerts['alerts'] | |
def select_target_alerts(all_alerts): | |
return [ alert for alert in all_alerts if alert['status'] == 'WARNING' \ | |
and alert['monitorId'] == MONITOR_ID \ | |
and alert['type'] == 'host' \ | |
and 'closedAt' not in alert ] | |
def fetch_processed_alerts(): | |
with open(SAVE_FILE, 'r') as f: | |
lines = f.readlines() | |
logger.debug(lines) | |
return lines[0].split(';') if lines else lines | |
def select_unprocessed_alerts(target_alerts, processed_alerts): | |
return [ alert for alert in target_alerts if alert['id'] not in processed_alerts ] | |
def save_new_alerts(target_alerts): | |
os.remove(SAVE_FILE) | |
with open(SAVE_FILE, 'w') as f: | |
f.writelines(';'.join([ alert['id'] for alert in target_alerts ])) | |
def prepare_targets(host_ids): | |
dat = {} | |
for host_id in host_ids: | |
host = find_host(host_id) | |
if not dat.has_key(host['name']): | |
dat[host['name']] = {'id': host['id']} | |
meta = host['meta'] | |
if meta.has_key('filesystem'): | |
dat[host['name']]['filesystem'] = meta['filesystem'].keys() | |
return dat | |
def find_host(host_id): | |
headers = {'X-Api-Key': MACKEREL_APIKEY, 'Content-Type': 'application/json'} | |
r = requests.get("{0}/api/v0/hosts/{1}".format(BASEURL, host_id), headers=headers) | |
res = json.load(StringIO(r.content)) | |
return res['host'] | |
def import_df(metrics, size): | |
# {u'metrics': [{u'value': 6.85, u'time': 1450575540}, {u'value': 7.03, u'time': 1450575600}]} | |
times = [] | |
values = [] | |
for metric in metrics: | |
times.append(metric['time'] - (epoch_time - duration)) | |
values.append(metric['value']) | |
df = pd.DataFrame({'time': times, 'value': values}) | |
#plt.plot(times, values, 'bo') | |
model = pd.ols(y=df['value'], x=df['time'], intercept=True) | |
#print model | |
#plt.plot(model.x['x'], model.y_fitted, 'g-') | |
#plt.hlines([size, size * 0.8], times[0], times[-1], linestyles="dashed") | |
#plt.show() | |
return model | |
def fetch_metrics(hostid, name, time_from, time_to): | |
payload = {'name': name, 'from': time_from, 'to': time_to} | |
headers = {'X-Api-Key': MACKEREL_APIKEY} | |
r = requests.get(BASEURL+"/api/v0/hosts/"+hostid+"/metrics", params=payload, headers=headers) | |
time.sleep(2.0) | |
metrics = json.load(StringIO(r.content)) | |
if metrics.has_key('metrics'): | |
return metrics['metrics'] | |
else: | |
return [] | |
def human_readable_size(s): | |
s = float(s) | |
if s > 10**12: | |
return "%.2f TB" % (s / 10**12) | |
if s > 10**9: | |
return "%.2f GB" % (s / 10**9) | |
if s > 10**6: | |
return "%.2f MB" % (s / 10**6) | |
if s > 10**3: | |
return "%.2f MB" % (s / 10**3) | |
return "%d B" % s | |
def human_readable_time(s): | |
r = "" | |
unit = [("d", 60*60*24),("h", 60*60),("m", 60)] | |
for u in unit: | |
if s >= u[1]: | |
i = int(s / u[1]) | |
r = r + "%s%s" % (i, u[0]) | |
s = s - i * u[1] | |
return "%s%fs" % (r, s) | |
def process_targets(dat): | |
res = [] | |
num = 0 | |
for hostname in dat.keys(): | |
num = num + 1 | |
logger.info('fetching %s (%d/%d)', hostname, num, len(dat)) | |
record = {'hostname': hostname, 'fs': [], 'nearest': sys.maxint} | |
if not dat[hostname].has_key('filesystem'): | |
continue | |
for fs in dat[hostname]['filesystem']: | |
(fs, count) = re.subn(r"^/dev/", "", fs) | |
fs = fs.replace('/', '_') # for vagrant | |
if count == 0: | |
continue | |
metric_name = "filesystem." + fs + ".used" | |
metrics_size = fetch_metrics(dat[hostname]['id'], "filesystem."+ fs +".size", epoch_time, epoch_time - 60 * 10) | |
if len(metrics_size) == 0: | |
logger.info("skip the fs: %s" % (metric_name)) | |
continue | |
metrics = fetch_metrics(dat[hostname]['id'], metric_name, epoch_time, epoch_time - duration) | |
size = 0 | |
for m in metrics_size: | |
size = max(size, m['value']) | |
latest = 0 | |
for m in metrics[::-1]: | |
if m['value'] > 0: | |
latest = m['value'] | |
break | |
model = import_df(metrics, size) | |
x = model.beta.x | |
i = model.beta.intercept | |
logger.debug("metric:%s, x: %f, intercept: %f, size: %s, current: %s" % (metric_name, x, i, size, latest)) | |
if x > 0: | |
target = size * THRESHOLD | |
e = (target-i)/x | |
record['fs'].append({'name': fs, 'latest': latest, 'size': size, 'x': x, 'i': i, 'fullin': e}) | |
record['nearest'] = min(record['nearest'], e) | |
logger.debug("fs: %s, size: %s/%s, estimated %s" % ( | |
fs, | |
human_readable_size(latest), | |
human_readable_size(size), | |
human_readable_time(e))) | |
else: | |
record['fs'].append({'name': fs, 'latest': latest, 'size': size, 'x': x, 'i': i}) | |
logger.debug("fs: %s, used size is reducing." % (fs)) | |
res.append(record) | |
return res | |
def output_results(res): | |
res = sorted(res, key=lambda x: x['nearest']) | |
for host in res: | |
for fs in sorted(host['fs'], key=lambda x: x['fullin'] if x.has_key('fullin') else sys.maxint): | |
if fs.has_key('fullin'): | |
mension = " <!channel>" if fs['fullin'] < ALERT_THRESHOLD else "" | |
text = "{0}:{1}, size: {2}/{3}, full in {4} {5}".format( | |
host['hostname'], fs['name'], | |
human_readable_size(fs['latest']), | |
human_readable_size(fs['size']), | |
human_readable_time(fs['fullin']), | |
mension) | |
post2slack(text) | |
else: | |
text = "{0}:{1}, size: {2}/{3}, never full".format( | |
host['hostname'], fs['name'], | |
human_readable_size(fs['latest']), | |
human_readable_size(fs['size'])) | |
post2slack(text) | |
def post2slack(text): | |
post2slack_raw(SLACK_TOKEN, SLACK_URL, text, "#alert", "life_checker", ':police_car:') | |
def post2slack_raw(token, url, text, channel, user, emoji): | |
payload = { | |
'token': token, | |
'channel': channel, | |
'username': user, | |
'text': text, | |
'icon_emoji': emoji | |
} | |
result = requests.post(url, data=payload).json() | |
if not result['ok']: | |
return "Failed: %s" % result['error'] | |
else: | |
return "Success" | |
if __name__ == "__main__": | |
targets = fetch_alert_targets() | |
logger.debug(targets) | |
if targets: | |
dat = prepare_targets(targets) | |
res = process_targets(dat) | |
output_results(res) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment