Skip to content

Instantly share code, notes, and snippets.

@stanaka
Created April 15, 2016 01:20
Show Gist options
  • Save stanaka/56799bcf13b422d686c2aa573a2869d7 to your computer and use it in GitHub Desktop.
Save stanaka/56799bcf13b422d686c2aa573a2869d7 to your computer and use it in GitHub Desktop.
Simple Anomaly Detection for Mackerel
#! /usr/bin/env ruby
require "mackerel"
require "net/http"
require "uri"
require "pp"
require "date"
@mackerel_api_key = "<APIKEY>"
service = "<SERVICE>"
roles = nil # or specify a target role
target_period = 5 * 60 # sec
training_period = 3 * 60 * 60 # sec
start_time = Time.now - target_period
target_time = [
start_time - training_period,
start_time,
start_time + target_period,
]
epsilon = 0.1
class Array
def avg
inject(0.0){|r,i| r+=i.to_f }/size
end
def variance
a = avg
inject(0.0){|r,i| r+=(i.to_f-a)**2 }/size
end
def derivative
arr = inject([[], nil]) do |arr, key|
if arr[1] != nil
arr[0] << key - arr[1]
end
[arr[0], key]
end
arr[0]
end
def derivative_avg
derivative.avg
end
def derivative_variance
arr = derivative
a = arr.avg
arr.inject(0.0){|r,i| r+=(i.to_f-a)**2 }/arr.size
end
def standard_deviation
Math.sqrt(variance)
end
def derivative_standard_deviation
Math.sqrt(derivative_variance)
end
end
def get_metrics(host_id, metric, from_dt, until_dt)
base_url = 'https://mackerel.io/api/v0/hosts/'
url = "#{base_url}#{host_id}/metrics?name=#{metric}&from=#{from_dt.to_i}&to=#{until_dt.to_i}"
uri = URI.parse(url)
https = Net::HTTP.new(uri.host, uri.port)
https.use_ssl = true
req = Net::HTTP::Get.new(URI(url).request_uri)
req = Net::HTTP::Get.new(URI(url).request_uri)
req["X-Api-Key"] = @mackerel_api_key
res = https.request(req)
body = JSON.parse res.body
return body["metrics"]
end
metrics_threshold = {
"loadavg5" => {:threshold => 0.5, :name => 'loadavg5'},
"memory.used" => {:threshold => 1_000_000_000, :name => 'memory'},
"memory.swap_cached" => {:threshold => 1_000_000_000, :name => 'memory'},
"cpu.user.percentage" => {:threshold => 20, :name => 'cpu'},
"cpu.iowait.percentage" => {:threshold => 10, :name => 'cpu'},
"cpu.system.percentage" => {:threshold => 10, :name => 'cpu'},
"custom.access.latency.api.percentile_99" => {:threshold => 0, :name => 'custom.access.latency.api.*'},
"custom.access.latency.web.percentile_99" => {:threshold => 0, :name => 'custom.access.latency.web.*'},
}
metrics_variance = {}
def calc_standard_deviation(host, metric, target_time)
metrics = {}
results = get_metrics(host.id, metric, target_time[0], target_time[2])
learn_data = []
test_data = []
if results == nil
return metrics
end
results.each do |dp|
if dp["time"] < target_time[1].to_i
learn_data.push dp["value"]
else
test_data.push dp["value"]
end
end
metrics["#{host.id}_#{metric}"] = {
:learn_data => learn_data,
:test_data => test_data,
:derivative_standard_deviation => learn_data.derivative_standard_deviation,
:derivative_standard_deviation_test => test_data.derivative_standard_deviation,
:avg => learn_data.avg,
:derivative_avg => learn_data.derivative_avg,
:host_name => host.name,
:host_id => host.id,
:target_metric => metric,
:metric => metric,
}
return metrics
end
def gaussian_destribution(x, mu, sigma)
x = x.to_f
mu = mu.to_f
sigma = sigma.to_f
Math.exp(-(((x-mu)/sigma)**2)/2)/(sigma.abs * Math.sqrt(2*Math::PI))
end
@mackerel = Mackerel::Client.new(:mackerel_api_key => @mackerel_api_key)
hosts = @mackerel.get_hosts(:service => service, :roles => roles)
puts "Num of hosts: #{hosts.size}\n"
hosts.each do |host|
metrics_threshold.keys.each do |metric|
metrics_variance = metrics_variance.merge(calc_standard_deviation(host, metric, target_time))
end
end
result_metrics = []
metrics_variance.each do |k,v|
if v[:derivative_standard_deviation].nan?
next
end
if metrics_threshold[v[:metric]] != nil && v[:derivative_standard_deviation] < metrics_threshold[v[:metric]][:threshold].to_f
next
end
flag = false
v[:test_data].derivative.each do |d|
p = gaussian_destribution(d, v[:derivative_avg], v[:derivative_standard_deviation])
if p < epsilon
if v[:p] == nil || v[:p] > p
v[:p] = p
end
flag = true
end
end
if flag
v[:test_data] = v[:test_data].derivative.sort
v[:learn_data] = v[:learn_data].derivative.sort
if metrics_threshold[v[:metric]] != nil
v[:url] = sprintf("https://mackerel.io/orgs/hatena/hosts/%s/-/graphs/%s", v[:host_id], metrics_threshold[v[:metric]][:name])
v[:graph_name] = metrics_threshold[v[:metric]][:name]
end
result_metrics.push(v)
end
end
result_metrics.sort{ |a,b| a[:p] <=> b[:p] }.each do |v|
printf "<ul>"
printf "<li>hostname: %s</li>", v[:host_name]
printf "<li>p: %f</li>", v[:p]
printf "<li>stddev: %.2f(train), %.2f(test)</li>", v[:derivative_standard_deviation], v[:derivative_standard_deviation_test]
printf "<li>matric: %s</li>", v[:target_metric]
printf "<iframe src='https://mackerel.io/embed/orgs/hatena/hosts/%s?graph=%s#t=%s,%s' height='200' width='400' frameborder='0'></iframe>", v[:host_id], v[:graph_name],
target_time[1].getutc.strftime("%Y-%m-%dT%H:%M:%SZ"), target_time[2].getutc.strftime("%Y-%m-%dT%H:%M:%SZ")
printf "</ul>"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment