Created
January 18, 2022 12:21
-
-
Save proffalken/4b3500151c71d4a9e67530f97e316bc4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
job "platform-jobs" { | |
datacenters = ["dc1"] | |
# system job, runs on all nodes | |
type = "system" | |
update { | |
min_healthy_time = "10s" | |
healthy_deadline = "5m" | |
progress_deadline = "10m" | |
auto_revert = true | |
} | |
group "monitoring" { | |
count = 1 | |
restart { | |
attempts = 3 | |
interval = "10m" | |
delay = "30s" | |
mode = "fail" | |
} | |
network { | |
port "api" { | |
to = 8686 | |
} | |
port "vstats" { | |
to = 9598 | |
static = 9598 | |
} | |
port "cadvisor" { | |
to = 8080 | |
} | |
} | |
ephemeral_disk { | |
size = 256 | |
sticky = true | |
} | |
task "cadvisor" { | |
driver = "docker" | |
config { | |
dns_servers = ["192.168.20.7", "192.168.1.21"] | |
image = "gcr.io/cadvisor/cadvisor:latest" | |
network_mode = "bridge" | |
volumes = [ | |
"/:/rootfs:ro", | |
"/var/run:/var/run:rw", | |
"/sys:/sys:ro", | |
"/var/lib/docker/:/var/lib/docker:ro", | |
"/etc/localtime:/etc/localtime:ro" | |
] | |
ports = ["cadvisor"] | |
} | |
resources { | |
cpu = 500 # 500 MHz | |
memory = 256 # 512M | |
} | |
service { | |
name = "cadvisor" | |
tags = [ | |
"platform", | |
"cadvisor", | |
"_app=cadvisor", | |
"_env=prod", | |
"traefik.http.routers.cadvisor.tls=true", | |
"traefik.http.routers.cadvisor.tls.domains[0].main=cadvisor.service.consul", | |
"traefik.http.routers.cadvisor.tls.certresolver=le" | |
] | |
port = "cadvisor" | |
check { | |
name = "Container Advisor" | |
type = "tcp" | |
interval = "10s" | |
timeout = "2s" | |
} | |
} | |
} | |
task "vector" { | |
driver = "docker" | |
config { | |
dns_servers = ["192.168.20.7", "192.168.1.21"] | |
image = "timberio/vector:0.14.X-alpine" | |
ports = ["api", "vstats"] | |
volumes = [ | |
"/:/rootfs:ro", | |
"/var/run:/var/run:rw", | |
"/sys:/sys:ro", | |
"/var/lib/docker/:/var/lib/docker:ro", | |
"/etc/localtime:/etc/localtime:ro" | |
] | |
} | |
# Vector won't start unless the sinks(backends) configured are healthy | |
env { | |
VECTOR_CONFIG = "local/vector.toml" | |
VECTOR_REQUIRE_HEALTHY = "true" | |
} | |
# resource limits are a good idea because you don't want your log collection to consume all resources available | |
resources { | |
cpu = 256 # 500 MHz | |
memory = 256 # 256MB | |
} | |
# template with Vector's configuration | |
template { | |
destination = "local/vector.toml" | |
change_mode = "signal" | |
change_signal = "SIGHUP" | |
# overriding the delimiters to [[ ]] to avoid conflicts with Vector's native templating, which also uses {{ }} | |
left_delimiter = "[[" | |
right_delimiter = "]]" | |
data=<<EOH | |
data_dir = "alloc/data/vector/" | |
[api] | |
enabled = true | |
address = "0.0.0.0:8686" | |
playground = true | |
[sources.logs] | |
type = "docker_logs" | |
[sources.int_logs] | |
type = "internal_logs" | |
[sources.int_metrics] | |
type = "internal_metrics" | |
scrape_interval_secs = 2 | |
[transforms.modify] | |
type = "remap" | |
inputs = ["logs"] | |
source = ''' | |
# Parse Syslog input. The "!" means that the script should abort on error. | |
. = parse_syslog!(.message) | |
''' | |
[sinks.out] | |
type = "console" | |
inputs = [ "logs" ] | |
encoding.codec = "json" | |
[sinks.prom] | |
type = "prometheus" | |
inputs = ["int_metrics"] | |
address = "0.0.0.0:9598" | |
default_namespace = "vector" | |
[sinks.loki] | |
type = "loki" | |
inputs = ["logs", "int_logs"] | |
endpoint = "https://loki.service.consul/" | |
encoding.codec = "json" | |
healthcheck.enabled = true | |
# since . is used by Vector to denote a parent-child relationship, and Nomad's Docker labels contain ".", | |
# we need to escape them twice, once for TOML, once for Vector | |
labels.job = "{{ label.com\\.hashicorp\\.nomad\\.job_name }}" | |
labels.task = "{{ label.com\\.hashicorp\\.nomad\\.task_name }}" | |
labels.group = "{{ label.com\\.hashicorp\\.nomad\\.task_group_name }}" | |
labels.namespace = "{{ label.com\\.hashicorp\\.nomad\\.namespace }}" | |
labels.node = "{{ label.com\\.hashicorp\\.nomad\\.node_name }}" | |
# remove fields that have been converted to labels to avoid having the field twice | |
remove_label_fields = true | |
EOH | |
} | |
service { | |
name = "vector" | |
tags = [ | |
"platform", | |
"vector", | |
"_app=vector", | |
"_env=prod" | |
] | |
check { | |
port = "api" | |
type = "http" | |
path = "/health" | |
interval = "30s" | |
timeout = "5s" | |
} | |
} | |
service { | |
name = "vstats" | |
tags = [ | |
"platform", | |
"vector", | |
"_app=vstats", | |
"_env=prod", | |
"_nomad_host=${node.unique.name}" | |
] | |
check { | |
port = "vstats" | |
type = "http" | |
path = "/metrics" | |
interval = "30s" | |
timeout = "5s" | |
} | |
} | |
kill_timeout = "30s" | |
} | |
} | |
group "proxies" { | |
count = 1 | |
restart { | |
attempts = 3 | |
interval = "10m" | |
delay = "30s" | |
mode = "fail" | |
} | |
network { | |
port "ui" { | |
to = 8080 | |
static = 8080 | |
} | |
port "http" { | |
to = 80 | |
static = 80 | |
} | |
port "https" { | |
to = 443 | |
static = 443 | |
} | |
} | |
ephemeral_disk { | |
size = 256 | |
sticky = true | |
} | |
task "traefik" { | |
template { | |
data = <<EOF | |
{{ with secret "secret/traefik" }} | |
DO_AUTH_TOKEN="{{ .Data.DO_AUTH_TOKEN }}" | |
{{ end }} | |
EOF | |
destination = "secrets/.env" | |
env = true | |
} | |
driver = "docker" | |
config { | |
dns_servers = ["192.168.20.7", "192.168.1.21"] | |
image = "traefik:v2.5" | |
network_mode = "bridge" | |
volumes = [ | |
"/media/traefik/config:/etc/traefik:rw", | |
"/etc/localtime:/etc/localtime:ro" | |
] | |
ports = ["ui", "http", "https"] | |
} | |
resources { | |
cpu = 1024 # 500 MHz | |
memory = 1024 # 512M | |
} | |
service { | |
name = "traefik" | |
tags = [ | |
"platform", | |
"cadvisor", | |
"_app=traefik", | |
"_env=prod", | |
"_nomad_host=${node.unique.name}", | |
"traefik.http.routers.traefik.tls=true", | |
"traefik.http.routers.traefik.tls.domains[0].main=traefik.service.consul", | |
"traefik.http.routers.traefik.tls.certresolver=le" | |
] | |
port = "ui" | |
check { | |
name = "Traefik" | |
type = "tcp" | |
interval = "10s" | |
timeout = "2s" | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment