Skip to content

Instantly share code, notes, and snippets.

@ionutvilie
Created May 9, 2017 14:42
Show Gist options
  • Save ionutvilie/b4ee4f80292c4e33efcbc1b53df87fc5 to your computer and use it in GitHub Desktop.
Save ionutvilie/b4ee4f80292c4e33efcbc1b53df87fc5 to your computer and use it in GitHub Desktop.
automate prometheus alerts
package main
import (
"os"
"text/template"
)
type Template struct {
Text, Lang string
}
type Alert struct {
App, Service, Stage, AlertName, Severity string
}
func main() {
// alerts are configured inside tmpl file
alerts := []string{"restart", "cpu", "sla"} // this can be put inside a configuration file eg json
// order of changing the Delims matter
t, _ := template.New("template").Delims("{{{", "}}}").ParseFiles("alert-rules.tmpl")
// define new App
appFoo := Alert{App: "Foo",
Service: "Bar",
Stage: "prod",
Severity: "warning",
AlertName: "ProdFooBarWarning"}
for _, alert := range alerts {
t.ExecuteTemplate(os.Stdout, alert+"Alert", appFoo)
}
//t.ExecuteTemplate(os.Stdout, "slaAlert", a)
//t.ExecuteTemplate(os.Stdout, "cpuAlert", a)
//t.ExecuteTemplate(os.Stdout, "restartAlert",a)
}
{{{ define "slaCondition" }}} round( histogram_quantile(0.99, sum(rate({ __name__=~"^activity_{{{.App }}}_{{{.Service }}}_[a-zA-Z]+_duration_milliseconds_bucket", status="OK", App="{{{.App }}}", service="{{{.Service }}}", stage="{{{.Stage }}}"}[10m] )) BY(le, activity) )) > (avg({ __name__=~"^activity_{{{.App }}}_{{{.Service }}}_sla_duration_milliseconds", App="{{{.App }}}", service="{{{.Service }}}", stage="{{{.Stage }}}" }) BY (activity)) {{{ end }}}
{{{ define "slaAlert" }}}
ALERT {{{.AlertName }}}
IF {{{template "slaCondition" .}}}
FOR 1m LABELS {severity="{{{.Severity }}}"}
ANNOTATIONS {
description="Duration is: {{ humanize $value }}s.",
summary="{{ $labels.activity }}"
}
{{{ end }}}
{{{ define "cpuCondition" }}} sum(rate(container_cpu_usage_seconds_total{ kubernetes_namespace="{{{.App }}}-{{{.Stage }}}", kubernetes_container_name="{{{.App }}}-{{{.Service }}}"}[10m] )) BY (kubernetes_pod_name) > 0.1 {{{ end }}}
{{{ define "cpuAlert" }}}
ALERT {{{.AlertName }}}
IF {{{template "cpuCondition" .}}}
FOR 1m LABELS {severity="{{{.Severity }}}"}
ANNOTATIONS {
description="CPU usage is: {{ $value }}.",
summary="{{ $labels.kubernetes_pod_name }}"
}
{{{ end }}}
{{{ define "restartCondition" }}} round(rate(kube_pod_container_status_restarts{namespace="{{{.App }}}-{{{.Stage }}}", container = "{{{.App }}}-{{{.Service }}}"}[1h]) * 3600) > 10 {{{ end }}}
{{{ define "restartAlert" }}}
ALERT {{{.AlertName }}}
IF {{{template "restartCondition" .}}}
FOR 1m LABELS {severity="{{{.Severity }}}"}
ANNOTATIONS {
description="No of restarts: {{ $value }}",
summary="{{ $labels.pod }}"
}
{{{ end }}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment