Skip to content

Instantly share code, notes, and snippets.

@smijar
Created March 24, 2023 04:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save smijar/8903b823b1aeef12040a703eb5d559d6 to your computer and use it in GitHub Desktop.
Save smijar/8903b823b1aeef12040a703eb5d559d6 to your computer and use it in GitHub Desktop.
Prometheus histogram instrumentation example

to run example

# term 1
cd histo-example
go clean && go build .
./histo-example

# term 2 - run some load
for i in {1..100000};do curl http://localhost:2112/hello1; curl http://localhost:2112/hello2; sleep 0.5; done

# term 3
cd prometheus
rm -rf data
./start.sh
# number of requests per handler/code
fn_request_duration_seconds_count

# sum of all requests
sum(fn_request_duration_seconds_count)

fn_request_duration_seconds_sum - sum total request time
fn_request_duration_seconds_count - total of all requests

# hence, average request time

fn_request_duration_seconds_sum/fn_request_duration_seconds_count

# all requests

sum(fn_request_duration_seconds_count{code=~".*"})

# histo sum of all request NOT=/hello1

fn_request_duration_seconds_sum{code="200", handler!="/hello1"}

# graph the per-second HTTP request rate returning status code 200 happening in the self-scraped Prometheus:

rate(promhttp_metric_handler_requests_total{code="200"}[1m])

# To calculate the average request duration during the last 5 minutes from a histogram or summary called http_request_duration_seconds, use the following expression

rate(fn_request_duration_seconds_sum[5m])
/rate(fn_request_duration_seconds_count[5m])

# p50
#We could calculate average request time by dividing sum over count. In PromQL it would be:
#http_request_duration_seconds_sum / http_request_duration_seconds_count
# Also we could calculate percentiles from it. Prometheus comes with a handy histogram_quantile function for it. For example calculating 50% percentile (second quartile) for last 5 minutes in PromQL would be:
# [https://povilasv.me/prometheus-tracking-request-duration/](https://povilasv.me/prometheus-tracking-request-duration/)

histogram_quantile(0.5, rate(fn_request_duration_seconds_bucket{code="200"}[5m]))

# SLO - https://prometheus.io/docs/practices/histograms/
# You might have an SLO to serve 95% of requests within 300ms. In that case, configure a histogram to have a bucket with an upper limit of 0.3 seconds. You can then directly express the relative amount of requests served within 300ms and easily alert if the value drops below 0.95. The following expression calculates it by job for the requests served in the last 5 minutes. The request durations were collected with a histogram called http_request_duration_seconds.

sum(rate(fn_request_duration_seconds_bucket{le="0.8"}[10m]))/sum(rate(fn_request_duration_seconds_count[10m]))

# APDEX SCORE-https://prometheus.io/docs/practices/histograms/
# You can approximate the well-known Apdex score in a similar way. Configure a bucket with the target request duration as the upper bound and another bucket with the tolerated request duration (usually 4 times the target request duration) as the upper bound. Example: The target request duration is 300ms. The tolerable request duration is 1.2s. The following expression yields the Apdex score for each job over the last 5 minutes:

(
  sum(rate(fn_request_duration_seconds_bucket{le="0.3"}[5m]))
+
  sum(rate(fn_request_duration_seconds_bucket{le="1"}[5m]))
) / 2 / sum(rate(fn_request_duration_seconds_count[5m]))

PromQL cheatsheet

package main
import (
"fmt"
"math/rand"
"net/http"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var (
invocation_counter = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "fn_http_requests_total",
Help: "The total number of requests made to the function",
},
[]string{"route", "code"},
)
// last_request_duration = prometheus.NewGaugeVec(prometheus.GaugeOpts{
// Name: "fn_last_request_duration",
// Help: "Last request duration for function",
// },
// []string{"function", "code", "time"},
// )
requestDurationHisto = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "fn_request_duration_seconds",
Help: "Time taken to serve http requests.",
Buckets: []float64{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1, 2, 3, 5},
},
[]string{"route", "code"},
)
)
func init() {
//prometheus.MustRegister(invocation_counter)
// prometheus.MustRegister(last_request_duration)
prometheus.MustRegister(requestDurationHisto)
}
func helloHandler1(w http.ResponseWriter, r *http.Request) {
start := time.Now()
// ... your request handling logic ...
fmt.Printf("got /hello1 request\n")
max := 500
min := 125
rduration := time.Duration(rand.Intn(max-min) + min)
time.Sleep(rduration * time.Millisecond)
duration := time.Since(start).Seconds()
fmt.Printf("/hello1 duration: %v\n", duration)
// record request duration
requestDurationHisto.WithLabelValues("/hello1", fmt.Sprintf("%d", http.StatusOK)).Observe(duration)
// last_request_duration.WithLabelValues("/hello1", fmt.Sprintf("%d", http.StatusOK), fmt.Sprintf("%d", time.Now().UnixMilli())).Set(duration)
// invocation_counter.WithLabelValues("/hello1", fmt.Sprintf("%d", http.StatusOK)).Inc()
}
func helloHandler2(w http.ResponseWriter, r *http.Request) {
start := time.Now()
// ... your request handling logic ...
fmt.Printf("got /hello2 request\n")
max := 2000
min := 300
rduration := time.Duration(rand.Intn(max-min) + min)
time.Sleep(rduration * time.Millisecond)
duration := time.Since(start).Seconds()
fmt.Printf("/hello2 duration: %v\n", duration)
// record request duration
requestDurationHisto.WithLabelValues("/hello2", fmt.Sprintf("%d", http.StatusOK)).Observe(duration)
// last_request_duration.WithLabelValues("/hello2", fmt.Sprintf("%d", http.StatusOK), fmt.Sprintf("%d", time.Now().UnixMilli())).Set(duration)
// invocation_counter.WithLabelValues("/hello2", fmt.Sprintf("%d", http.StatusOK)).Inc()
}
func main() {
http.HandleFunc("/hello1", helloHandler1)
http.HandleFunc("/hello2", helloHandler2)
http.Handle("/metrics", promhttp.Handler())
http.ListenAndServe("0.0.0.0:2112", nil)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment