Skip to content

Instantly share code, notes, and snippets.

@philpennock
Created August 2, 2023 16:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save philpennock/660d01ae7cadc4331494add667375d3a to your computer and use it in GitHub Desktop.
Save philpennock/660d01ae7cadc4331494add667375d3a to your computer and use it in GitHub Desktop.
An amalgamation of NATS-related content from multiple Icinga config files
# ~~~~~~~~~~~~~~~~~~~~~~~~8< Your Commands File >8~~~~~~~~~~~~~~~~~~~~~~~~
object CheckCommand "nats-server" {
import "plugin-check-command"
command = [PluginDir+"/nats", "server", "check", "connection"]
# If the context has been defined, outside Icinga alas, then that's the best way to just check for some context
env.NATS_CONTEXT = "$nats_context$"
env.NATS_URL = "$nats_url$"
env.NATS_CREDS = "$nats_creds_file$"
env.NATS_USER = "$nats_user$"
env.NATS_PASSWORD = "$nats_password$"
env.NATS_NKEY = "$nats_nkey_file$"
env.NATS_CERT = "$nats_tls_cert_file$"
env.NATS_KEY = "$nats_tls_key_file$"
env.NATS_CA = "$nats_tls_ca_file$"
env.NATS_TIMEOUT = "$nats_timeout$" # Go duration, default 5s; this is NOT the NAGIOS warn/critical threshold
arguments = {
"--connect-warn" = {
value = "$nats_connect_warn$"
description = "Warning threshold to allow for establishing connections"
# default (2021-06): 500ms
}
"--connect-critical" = {
value = "$nats_connect_critical$"
description = "Critical threshold to allow for establishing connections"
# default (2021-06): 1s
}
"--rtt-warn" = {
value = "$nats_rtt_warn$"
description = "Warning threshold to allow for server RTT"
# default (2021-06): 500ms
}
"--rtt-critical" = {
value = "$nats_rtt_critical$"
description = "Critical threshold to allow for server RTT"
# default (2021-06): 1s
}
"--req-warn" = {
value = "$nats_req_warn$"
description = "Warning threshold to allow for full round trip test"
# default (2021-06): 500ms
}
"--req-critical" = {
value = "$nats_req_critical$"
description = "Critical threshold to allow for full round trip test"
# default (2021-06): 1s
}
}
}
object CheckCommand "nats-jetstream" {
import "plugin-check-command"
command = [PluginDir+"/nats", "server", "check", "js"]
# If the context has been defined, outside Icinga alas, then that's the best way to just check for some context
env.NATS_CONTEXT = "$nats_context$"
env.NATS_URL = "$nats_url$"
env.NATS_CREDS = "$nats_creds_file$"
env.NATS_USER = "$nats_user$"
env.NATS_PASSWORD = "$nats_password$"
env.NATS_NKEY = "$nats_nkey_file$"
env.NATS_CERT = "$nats_tls_cert_file$"
env.NATS_KEY = "$nats_tls_key_file$"
env.NATS_CA = "$nats_tls_ca_file$"
env.NATS_TIMEOUT = "$nats_timeout$" # Go duration, default 5s
arguments = {
"--js-domain" = {
value = "$nats_js_domain$"
description = "JetStream Domain for targetting clusters on the NATS network"
}
}
}
# ~~~~~~~~~~~~~~~~~~~~~~~~~8< Your rules file >8~~~~~~~~~~~~~~~~~~~~~~~~~~
apply Service "nats-healthz" {
import "generic-service"
assign where host.vars.nats_healthz_https
check_command = "http"
vars += {
http_uri = "/healthz"
http_port = host.vars.nats_healthz_https
http_ssl = true
http_sni = true
# http_certificate value is days the cert has to still be valid for to avoid "warning"; when comma-separated, second is "critical"
http_certificate = "10,2"
# (nothing checks that the cert hostname matches)
http_expect_body_regex = "\"status\"\\s*:\\s*\"ok\""
}
}
template Service "nats-server" {
import "generic-service"
check_command = "nats-server"
}
template Service "nats-jetstream" {
import "generic-service"
check_command = "nats-jetstream"
}
apply Service "nats" {
import "nats-server"
assign where host.vars.nats_server == true
vars.nats_url = host.vars.nats_url ? host.vars.nats_url : "nats://" + host.name
vars.slack_notifications = "enabled"
}
apply Service "nats-js" {
import "nats-jetstream"
assign where host.vars.nats_js_enabled == true
vars.nats_url = host.vars.nats_url ? host.vars.nats_url : "nats://" + host.name
vars.slack_notifications = "enabled"
}
# ~~~~~~~~~~~~~~~~~~~~~~~~~8< Your checks file >8~~~~~~~~~~~~~~~~~~~~~~~~~
object Host "nats.lan" {
import "home-host"
address = "192.0.2.1"
vars.nats_server = true
vars.nats_js_enabled = true
vars.nats_user = "icinga"
vars.nats_password = "SSWs!HP07M1V/=u_Aeef7_.V60leHn" # freshly generated for this gist, could use a canary :)
vars.nats_healthz_https = 8222
vars.slack_notifications = "enabled"
}
object HostGroup "nats-clusters" {
display_name = "NATS Clusters"
}
object ServiceGroup "nats" {
display_name = "NATS"
assign where service.check_command == "nats-server"
}
object ServiceGroup "nats-js" {
display_name = "NATS JetStream"
assign where service.check_command == "nats-jetstream"
}
# It's good to auto-gen hosts here, so that you build from lists of known servers.
object Host "prod-foo" {
display_name = "Foo: Production"
groups = ["nats-clusters"]
check_command = "dummy"
vars.dummy_state = 0 //Up
vars.dummy_text = "Everything OK."
vars.pt_remote = true
vars.nats_url = "tls://connect.example.org"
vars.nats_context = "foo-client1" # this would be managed outside of Icinga, however you manage contexts
vars.pt_nats_supercluster = true
}
object HostGroup "foo-clusters-prod" {
display_name = "Foo NATS Production Clusters"
}
object Host "foo-prod-us-east-2.aws.cloud.foo.example.org" {
display_name = "Foo-Geo: Production: us-east-2.aws.cloud.foo.example.org"
groups = ["foo-clusters-prod"]
check_command = "dummy"
check_interval = 5m
retry_interval = 30s
vars.dummy_state = 0 //Up
vars.dummy_text = "Everything OK."
vars.nats_url = "tls://us-east-2.aws.cloud.foo.example.org"
vars.nats_context = "foo-check1"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment