Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@johnalotoski
Last active August 20, 2021 21:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnalotoski/b848fd6a7eb99bacedaf87e626fdec68 to your computer and use it in GitHub Desktop.
Save johnalotoski/b848fd6a7eb99bacedaf87e626fdec68 to your computer and use it in GitHub Desktop.
Different nomad job configs for traefik+connect simple dashboard. Only the "dashboard-works.hcl" file is behaving as expected at the moment.
variable "ns" {
type = string
default = "example-namespace"
}
job "countdash" {
datacenters = ["eu-west-1"]
namespace = var.ns
constraint {
attribute = "${attr.unique.platform.aws.instance-id}"
value = "example-instance-id"
}
constraint {
attribute = "${meta.namespace}"
value = var.ns
}
group "api" {
network {
mode = "bridge"
}
service {
name = "count-api"
port = "9001"
connect {
sidecar_service {}
sidecar_task {
env {
CONSUL_HTTP_SSL = "true"
CONSUL_HTTP_ADDR = "https://${attr.unique.network.ip-address}:8501"
CONSUL_GRPC_ADDR = "https://${attr.unique.network.ip-address}:8502"
CONSUL_CACERT = "/etc/ssl/certs/consul/full.pem"
CONSUL_CLIENT_CERT = "/etc/ssl/certs/consul/cert.pem"
CONSUL_CLIENT_KEY = "/etc/ssl/certs/consul/cert-key.pem"
}
config {
cap_add = ["sys_ptrace"]
mount {
type = "bind"
target = "/etc/ssl/certs/consul/full.pem"
source = "/etc/ssl/certs/full.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
mount {
type = "bind"
target = "/etc/ssl/certs/consul/cert.pem"
source = "/etc/ssl/certs/cert.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
mount {
type = "bind"
target = "/etc/ssl/certs/consul/cert-key.pem"
source = "/var/lib/consul/cert-key.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
}
}
}
}
task "web" {
driver = "docker"
config {
image = "hashicorpnomad/counter-api:v3"
}
}
}
group "dashboard" {
network {
mode = "bridge"
port "http" {
static = 9002
to = 9002
}
}
service {
name = "count-dashboard"
port = "9002"
tags = [
var.ns,
"ingress",
"traefik.enable=true",
"traefik.consulcatalog.connect=true",
"traefik.http.routers.count-dashboard-ingress.rule=Host(`custom.fqdn.example`)",
]
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "count-api"
local_bind_port = 8080
}
}
}
sidecar_task {
env {
CONSUL_HTTP_SSL = "true"
CONSUL_HTTP_ADDR = "https://${attr.unique.network.ip-address}:8501"
CONSUL_GRPC_ADDR = "https://${attr.unique.network.ip-address}:8502"
CONSUL_CACERT = "/etc/ssl/certs/consul/full.pem"
CONSUL_CLIENT_CERT = "/etc/ssl/certs/consul/cert.pem"
CONSUL_CLIENT_KEY = "/etc/ssl/certs/consul/cert-key.pem"
}
config {
cap_add = ["sys_ptrace"]
mount {
type = "bind"
target = "/etc/ssl/certs/consul/full.pem"
source = "/etc/ssl/certs/full.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
mount {
type = "bind"
target = "/etc/ssl/certs/consul/cert.pem"
source = "/etc/ssl/certs/cert.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
mount {
type = "bind"
target = "/etc/ssl/certs/consul/cert-key.pem"
source = "/var/lib/consul/cert-key.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
}
}
}
}
task "dashboard" {
driver = "docker"
env {
COUNTING_SERVICE_URL = "http://${NOMAD_UPSTREAM_ADDR_count_api}"
}
config {
image = "hashicorpnomad/counter-dashboard:v3"
}
}
}
}
variable "ns" {
type = string
default = "example-namespace"
}
job "countdash" {
datacenters = ["eu-west-1"]
namespace = var.ns
constraint {
attribute = "${attr.unique.platform.aws.instance-id}"
value = "example-instance-id"
}
constraint {
attribute = "${meta.namespace}"
value = var.ns
}
group "edge" {
network {
mode = "bridge"
port "http" {
static = 8088
to = 8088
}
}
service {
name = "traefik-ingress"
port = "http"
connect {
native = true
}
}
task "traefik" {
driver = "docker"
config {
image = "traefik:v2.5.0-rc3"
args = [
"--log.level=TRACE",
# Enables connect support, otherwise only http connections would be tried
"--providers.consulcatalog.connectaware=true",
# Make the communication secure by default
"--providers.consulcatalog.connectbydefault=true",
"--providers.consulcatalog.exposedbydefault=false",
"--entrypoints.http=true",
"--entrypoints.http.address=:8088",
# The service name below should match the nomad/consul service above
# and is used for intentions in consul
"--providers.consulcatalog.servicename=traefik-ingress",
"--providers.consulcatalog.prefix=traefik",
# Automatically configured by Nomad through CONSUL_* environment variables
# as long as client consul.share_ssl is enabled
# "--providers.consulcatalog.endpoint.address=<socket|address>"
# "--providers.consulcatalog.endpoint.tls.ca=<path>"
# "--providers.consulcatalog.endpoint.tls.cert=<path>"
# "--providers.consulcatalog.endpoint.tls.key=<path>"
# "--providers.consulcatalog.endpoint.token=<token>"
]
}
}
}
group "api" {
network {
mode = "bridge"
}
service {
name = "count-api"
port = "9001"
connect {
sidecar_service {}
sidecar_task {
env {
CONSUL_HTTP_SSL = "true"
CONSUL_HTTP_ADDR = "https://${attr.unique.network.ip-address}:8501"
CONSUL_GRPC_ADDR = "https://${attr.unique.network.ip-address}:8502"
CONSUL_CACERT = "/etc/ssl/certs/consul/full.pem"
CONSUL_CLIENT_CERT = "/etc/ssl/certs/consul/cert.pem"
CONSUL_CLIENT_KEY = "/etc/ssl/certs/consul/cert-key.pem"
}
config {
cap_add = ["sys_ptrace"]
mount {
type = "bind"
target = "/etc/ssl/certs/consul/full.pem"
source = "/etc/ssl/certs/full.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
mount {
type = "bind"
target = "/etc/ssl/certs/consul/cert.pem"
source = "/etc/ssl/certs/cert.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
mount {
type = "bind"
target = "/etc/ssl/certs/consul/cert-key.pem"
source = "/var/lib/consul/cert-key.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
}
}
}
}
task "web" {
driver = "docker"
config {
image = "hashicorpnomad/counter-api:v3"
}
}
}
group "dashboard" {
network {
mode = "bridge"
port "http" {
static = 9002
to = 9002
}
}
service {
name = "count-dashboard"
port = "9002"
tags = [
var.ns,
"traefik.enable=true",
"traefik.consulcatalog.connect=true",
"traefik.http.routers.count-dashboard.rule=Host(`dash-hcl-embedded.local`)"
]
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "count-api"
local_bind_port = 8080
}
}
}
sidecar_task {
env {
CONSUL_HTTP_SSL = "true"
CONSUL_HTTP_ADDR = "https://${attr.unique.network.ip-address}:8501"
CONSUL_GRPC_ADDR = "https://${attr.unique.network.ip-address}:8502"
CONSUL_CACERT = "/etc/ssl/certs/consul/full.pem"
CONSUL_CLIENT_CERT = "/etc/ssl/certs/consul/cert.pem"
CONSUL_CLIENT_KEY = "/etc/ssl/certs/consul/cert-key.pem"
}
config {
cap_add = ["sys_ptrace"]
mount {
type = "bind"
target = "/etc/ssl/certs/consul/full.pem"
source = "/etc/ssl/certs/full.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
mount {
type = "bind"
target = "/etc/ssl/certs/consul/cert.pem"
source = "/etc/ssl/certs/cert.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
mount {
type = "bind"
target = "/etc/ssl/certs/consul/cert-key.pem"
source = "/var/lib/consul/cert-key.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
}
}
}
}
task "dashboard" {
driver = "docker"
env {
COUNTING_SERVICE_URL = "http://${NOMAD_UPSTREAM_ADDR_count_api}"
}
config {
image = "hashicorpnomad/counter-dashboard:v3"
}
}
}
}
variable "ns" {
type = string
default = "example-namespace"
}
job "countdash" {
datacenters = ["eu-west-1"]
namespace = var.ns
constraint {
attribute = "${attr.unique.platform.aws.instance-id}"
value = "example-instance-id"
}
constraint {
attribute = "${meta.namespace}"
value = var.ns
}
group "api" {
network {
mode = "bridge"
}
service {
name = "count-api"
port = "9001"
connect {
sidecar_service {}
sidecar_task {
config {
entrypoint = [
"/bin/bash",
"-c",
"sleep 10 && /alloc/tmp/si_patch.sh && /docker-entrypoint.sh -c /secrets/envoy_bootstrap.json -l trace --concurrency 1 --disable-hot-restart",
]
}
}
}
}
task "web" {
driver = "docker"
config {
image = "hashicorpnomad/counter-api:v3"
}
vault {
policies = [ "nomad-cluster" ]
change_mode = "noop"
}
template {
data = "{{ with secret \"consul/creds/connect\" }}{{ .Data.token }}{{ end }}\n"
destination = "${NOMAD_ALLOC_DIR}/tmp/si_token"
}
template {
data = <<EOF
#!/bin/bash
apt-get update
apt install -y jq moreutils
while ! [ -e /alloc/tmp/si_token ]; do
echo "Waiting for /alloc/tmp/si_token..."
sleep 1
done
while ! [ -e /secrets/envoy_bootstrap.json ]; do
echo "Waiting for /secrets/envoy_bootstrap.json..."
sleep 1
done
jq -r -e --arg SI $(< /alloc/tmp/si_token) '.dynamic_resources.ads_config.grpc_services.initial_metadata[0].value = $SI' < /secrets/envoy_bootstrap.json | sponge /secrets/envoy_bootstrap.json
EOF
destination = "${NOMAD_ALLOC_DIR}/tmp/si_patch.sh"
perms = "744"
}
}
}
group "dashboard" {
network {
mode = "bridge"
port "http" {
static = 9002
to = 9002
}
}
service {
name = "count-dashboard"
port = "9002"
tags = [
var.ns,
"ingress",
"traefik.enable=true",
"traefik.consulcatalog.connect=true",
"traefik.http.routers.count-dashboard.rule=Host(`custom.fqdn.example`)",
"traefik.http.routers.count-dashboard.tls=true",
"traefik.http.routers.count-dashboard.entrypoints=https",
]
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "count-api"
local_bind_port = 8080
}
}
}
sidecar_task {
config {
entrypoint = [
"/bin/bash",
"-c",
"sleep 10 && /alloc/tmp/si_patch.sh && /docker-entrypoint.sh -c /secrets/envoy_bootstrap.json -l trace --concurrency 1 --disable-hot-restart",
]
}
}
}
}
task "dashboard" {
driver = "docker"
env {
COUNTING_SERVICE_URL = "http://${NOMAD_UPSTREAM_ADDR_count_api}"
}
config {
image = "hashicorpnomad/counter-dashboard:v3"
}
vault {
policies = [ "nomad-cluster" ]
change_mode = "noop"
}
template {
data = "{{ with secret \"consul/creds/connect\" }}{{ .Data.token }}{{ end }}\n"
destination = "${NOMAD_ALLOC_DIR}/tmp/si_token"
}
template {
data = <<EOF
#!/bin/bash
apt-get update
apt install -y jq moreutils
while ! [ -e /alloc/tmp/si_token ]; do
echo "Waiting for /alloc/tmp/si_token..."
sleep 1
done
while ! [ -e /secrets/envoy_bootstrap.json ]; do
echo "Waiting for /secrets/envoy_bootstrap.json..."
sleep 1
done
jq -r -e --arg SI $(< /alloc/tmp/si_token) '.dynamic_resources.ads_config.grpc_services.initial_metadata[0].value = $SI' < /secrets/envoy_bootstrap.json | sponge /secrets/envoy_bootstrap.json
EOF
destination = "${NOMAD_ALLOC_DIR}/tmp/si_patch.sh"
perms = "744"
}
}
}
}
variable "ns" {
type = string
default = "example-namespace"
}
job "countdash" {
datacenters = ["eu-west-1"]
namespace = var.ns
constraint {
attribute = "${attr.unique.platform.aws.instance-id}"
value = "example-instance-id"
}
constraint {
attribute = "${meta.namespace}"
value = var.ns
}
group "api" {
network {
mode = "bridge"
}
service {
name = "count-dashboard-ingress"
tags = [
var.ns,
"ingress",
"traefik.enable=true",
"traefik.http.routers.count-dashboard-ingress.rule=Host(`custom.fqdn.example`)",
"traefik.http.routers.count-dashboard-ingress.entrypoints=https",
"traefik.http.routers.count-dashboard-ingress.tls=true",
"traefik.http.routers.count-dashboard-ingress.service=count-dashboard",
]
}
service {
name = "count-api"
port = "9001"
connect {
sidecar_service {}
sidecar_task {
env {
CONSUL_HTTP_SSL = "true"
CONSUL_HTTP_ADDR = "https://${attr.unique.network.ip-address}:8501"
CONSUL_GRPC_ADDR = "https://${attr.unique.network.ip-address}:8502"
CONSUL_CACERT = "/etc/ssl/certs/consul/full.pem"
CONSUL_CLIENT_CERT = "/etc/ssl/certs/consul/cert.pem"
CONSUL_CLIENT_KEY = "/etc/ssl/certs/consul/cert-key.pem"
}
config {
cap_add = ["sys_ptrace"]
mount {
type = "bind"
target = "/etc/ssl/certs/consul/full.pem"
source = "/etc/ssl/certs/full.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
mount {
type = "bind"
target = "/etc/ssl/certs/consul/cert.pem"
source = "/etc/ssl/certs/cert.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
mount {
type = "bind"
target = "/etc/ssl/certs/consul/cert-key.pem"
source = "/var/lib/consul/cert-key.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
}
}
}
}
task "web" {
driver = "docker"
config {
image = "hashicorpnomad/counter-api:v3"
}
}
}
group "dashboard" {
network {
mode = "bridge"
port "http" {
static = 9002
to = 9002
}
}
service {
name = "count-dashboard"
port = "9002"
tags = [
var.ns,
"ingress",
"traefik.enable=true",
]
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "count-api"
local_bind_port = 8080
}
}
}
sidecar_task {
env {
CONSUL_HTTP_SSL = "true"
CONSUL_HTTP_ADDR = "https://${attr.unique.network.ip-address}:8501"
CONSUL_GRPC_ADDR = "https://${attr.unique.network.ip-address}:8502"
CONSUL_CACERT = "/etc/ssl/certs/consul/full.pem"
CONSUL_CLIENT_CERT = "/etc/ssl/certs/consul/cert.pem"
CONSUL_CLIENT_KEY = "/etc/ssl/certs/consul/cert-key.pem"
}
config {
cap_add = ["sys_ptrace"]
mount {
type = "bind"
target = "/etc/ssl/certs/consul/full.pem"
source = "/etc/ssl/certs/full.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
mount {
type = "bind"
target = "/etc/ssl/certs/consul/cert.pem"
source = "/etc/ssl/certs/cert.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
mount {
type = "bind"
target = "/etc/ssl/certs/consul/cert-key.pem"
source = "/var/lib/consul/cert-key.pem"
readonly = true
bind_options = {
propagation = "rshared"
}
}
}
}
}
}
task "dashboard" {
driver = "docker"
env {
COUNTING_SERVICE_URL = "http://${NOMAD_UPSTREAM_ADDR_count_api}"
}
config {
image = "hashicorpnomad/counter-dashboard:v3"
}
}
}
}
{
"acl": {
"default_policy": "deny",
"down_policy": "extend-cache",
"enable_token_persistence": true,
"enabled": true
},
"addresses": {
"http": "127.0.0.1"
},
"advertise_addr": "{{ GetInterfaceIP \"ens5\" }}",
"bind_addr": "{{ GetInterfaceIP \"ens5\" }}",
"ca_file": "/etc/ssl/certs/full.pem",
"cert_file": "/etc/ssl/certs/cert.pem",
"client_addr": "0.0.0.0",
"connect": {
"ca_provider": "consul",
"enabled": true
},
"data_dir": "/var/lib/consul",
"datacenter": "eu-central-1",
"enable_debug": false,
"enable_local_script_checks": true,
"enable_script_checks": false,
"key_file": "/var/lib/consul/cert-key.pem",
"log_level": "trace",
"node_meta": {
"node_name": "client-eu-west-1-t3a-xlarge-unstable",
"region": "eu-central-1"
},
"ports": {
"grpc": 8502,
"http": 8500,
"https": 8501
},
"primary_datacenter": "eu-central-1",
"retry_join": [
"172.16.0.10",
"172.16.1.10",
"172.16.2.10",
"172.16.0.20",
"172.16.1.20",
"provider=aws region=eu-central-1 tag_key=Consul tag_value=server"
],
"server": false,
"telemetry": {
"disable_hostname": true,
"dogstatsd_addr": "localhost:8125"
},
"tls_min_version": "tls12",
"ui": false,
"verify_incoming": true,
"verify_outgoing": true,
"verify_server_hostname": true
}
{
"acl": {
"enabled": true
},
"client": {
"alloc_dir": "/var/lib/nomad/alloc",
"bridge_network_name": "nomad",
"bridge_network_subnet": "172.26.66.0/23",
"chroot_env": {
"/etc/passwd": "/etc/passwd",
"/etc/resolv.conf": "/etc/resolv.conf",
"/etc/services": "/etc/services",
"/etc/ssl/certs/ca-bundle.crt": "/etc/ssl/certs/ca-bundle.crt",
"/etc/ssl/certs/ca-certificates.crt": "/etc/ssl/certs/ca-certificates.crt",
"/nix/store/lbic4zgib09ca5lpv2xy6kbp7g9wsz9j-busybox-static-x86_64-unknown-linux-musl-1.32.1": "/usr"
},
"cni_path": "/nix/store/s0g5292g9dyihqh8lf2bf99k7z2zkl55-cni-plugins-0.9.1/bin",
"disable_remote_exec": false,
"enabled": true,
"gc_disk_usage_threshold": 80,
"gc_inode_usage_threshold": 70,
"gc_interval": "12h",
"gc_max_allocs": 50,
"gc_parallel_destroys": 2,
"host_volume": [
<HOST_VOLUMES>
],
"max_kill_timeout": "30s",
"meta": {
"connect.log_level": "trace",
"namespace": "<NAMESPACE>"
},
"no_host_uuid": false,
"reserved": {},
"server_join": {
"retry_interval": "30s",
"retry_join": [],
"retry_max": 0,
"start_join": []
},
"servers": [],
"state_dir": "/var/lib/nomad/client",
"template": {
"disable_file_sandbox": false,
"function_blacklist": [
"plugin"
]
}
},
"consul": {
"address": "127.0.0.1:8500",
"allow_unauthenticated": true,
"auto_advertise": true,
"checks_use_advertise": false,
"client_auto_join": true,
"client_http_check_name": "Nomad Client HTTP Check",
"client_service_name": "nomad-client",
"server_auto_join": true,
"server_http_check_name": "Nomad Server HTTP Check",
"server_rpc_check_name": "Nomad Server RPC Check",
"server_serf_check_name": "Nomad Server Serf Check",
"server_service_name": "nomad",
"ssl": false,
"tags": [],
"token": "$CONSUL_MASTER_TOKEN",
"verify_ssl": true
},
"data_dir": "/var/lib/nomad",
"datacenter": "eu-west-1",
"log_level": "DEBUG",
"plugin": [
{
"docker": [
{
"config": [
{
"allow_caps": [
"audit_write",
"chown",
"dac_override",
"fowner",
"fsetid",
"kill",
"mknod",
"net_bind_service",
"setfcap",
"setgid",
"setpcap",
"setuid",
"sys_chroot",
"net_raw",
"sys_ptrace"
],
"auth": {
"config": "/var/lib/nomad/.docker/config.json"
},
"volumes": {
"enabled": true
}
}
]
}
]
},
{
"raw_exec": [
{
"config": [
{
"enabled": false
}
]
}
]
}
],
"ports": {
"http": 4646,
"rpc": 4647,
"serf": 4648
},
"server": {
"bootstrap_expect": 1,
"data_dir": "/var/lib/nomad/server",
"default_scheduler_config": {
"preemption_config": {
"batch_scheduler_enabled": false,
"service_scheduler_enabled": false,
"system_scheduler_enabled": false
},
"scheduler_algorithm": "binpack"
},
"enabled": false,
"server_join": {
"retry_interval": "30s",
"retry_join": [],
"retry_max": 0,
"start_join": []
}
},
"telemetry": {
"datadog_address": "localhost:8125",
"datadog_tags": [
"region:eu-central-1",
"role:nomad"
],
"publish_allocation_metrics": true,
"publish_node_metrics": true
},
"tls": {
"ca_file": "/etc/ssl/certs/full.pem",
"cert_file": "/etc/ssl/certs/cert.pem",
"http": true,
"key_file": "/var/lib/nomad/cert-key.pem",
"rpc": true,
"rpc_upgrade_mode": false,
"tls_min_version": "tls12",
"tls_prefer_server_cipher_suites": false,
"verify_https_client": false,
"verify_server_hostname": false
},
"vault": {
"address": "https://127.0.0.1:8200",
"ca_file": "/etc/ssl/certs/full.pem",
"cert_file": "/etc/ssl/certs/cert.pem",
"create_from_role": "nomad-cluster",
"enabled": true,
"key_file": "/var/lib/nomad/cert-key.pem"
}
}
@johnalotoski
Copy link
Author

Nomad version is 1.1.3, consul is 1.10.1, traefik is 2.5.0-rc6 (or rc3 for *-embedded.hcl). Select consul and nomad config snippets above from a nomad client machine. Traefik (except in the case of the *-embedded.hcl file above) is a standalone systemd service running on an independent host outside of a nomad job.

dashboard-works.hcl works as expected, but I think it's not intended to code things this way.

dashboard-embedded.hcl seems to mostly work: the pink dashboard is shown, but says disconnected, chrome dev panel shows ws:$DOMAIN/socket.io/... socket connections failing. Logging of traefik docker container on debug doesn't show any errors that jump out at me at a glance.

dashboard-direct.hcl is the closest to how this is supposed to work I think, but hitting the webpage yields: 404 page not found and the connect-proxy-count-dashboard job of nomad running envoy logs SSL errors:

[2021-08-16 23:51:51.457][15][trace][connection] [source/extensions/transport_sockets/tls/ssl_socket.cc:132] [C895] ssl error occurred while read: WANT_READ

@johnalotoski
Copy link
Author

Also, the consul config on the nomad client above has a consul master token applied in the nomad agent config. If this is not provided, the following errors are seen in the logs when the dashboard-works.hcl file is spawned:

# From consul agent on the host (log level is trace):
agent.envoy.xds: Incremental xDS v3: xdsVersion=v3 direction=request protobuf="{ "typeUrl": "type.googleapis.com/envoy.config.cluster.v3.Cluster"
agent.envoy.xds: subscribing to type: xdsVersion=v3 typeUrl=type.googleapis.com/envoy.config.cluster.v3.Cluster
agent.envoy.xds: watching proxy, pending initial proxycfg snapshot for xDS: service_id=_nomad-task-6227f408-bee9-77fa-529f-924164f42b80-group-api-count-api-9001-sidecar-proxy xdsVersion=v3
agent.envoy.xds: Got initial config snapshot: service_id=_nomad-task-6227f408-bee9-77fa-529f-924164f42b80-group-api-count-api-9001-sidecar-proxy xdsVersion=v3
agent.envoy: Error handling ADS delta stream: xdsVersion=v3 error="rpc error: code = PermissionDenied desc = permission denied"

# From envoy stderr in the envoy sidecar container (log level is trace):
DeltaAggregatedResources gRPC config stream closed: 7, permission denied
gRPC update for type.googleapis.com/envoy.config.cluster.v3.Cluster failed
gRPC update for type.googleapis.com/envoy.config.listener.v3.Listener failed

@blake
Copy link

blake commented Aug 19, 2021

dashboard-embedded.hcl seems to mostly work: the pink dashboard is shown, but says disconnected, chrome dev panel shows ws:$DOMAIN/socket.io/... socket connections failing. Logging of traefik docker container on debug doesn't show any errors that jump out at me at a glance.

Which service protocol is the dashboard service configured to use? http, or tcp? If http, you're likely running into this issue – hashicorp/consul#8283. Changing the protocol to tcp should resolve the socket.io connection issues.

@johnalotoski
Copy link
Author

Hi @blake! Thanks for the tip. Indeed, the patch mentioned at issue 8283 does fix the issue to allow websockets to work now for the traefik embedded example using http protocol. The updated patch for v1.10.1 is:

diff --git i/agent/xds/listeners.go w/agent/xds/listeners.go
index 4e528cf73..9c33a2079 100644
--- i/agent/xds/listeners.go
+++ w/agent/xds/listeners.go
@@ -1598,6 +1598,11 @@ func makeHTTPFilter(opts listenerFilterOpts) (*envoy_listener_v3.Filter, error)
                        // sampled.
                        RandomSampling: &envoy_type_v3.Percent{Value: 0.0},
                },
+               UpgradeConfigs: []*envoy_http_v3.HttpConnectionManager_UpgradeConfig{
+                       {
+                               UpgradeType: "websocket",
+                       },
+               },
        }
 
        if opts.useRDS {

One more issue solved, thank you!

@johnalotoski
Copy link
Author

johnalotoski commented Aug 20, 2021

Resolved most of the issues mentioned above with the following changes from the PR here:

  • Patches consul envoy http protocol upgrade issue (modified patch from consul PR: 9639).
  • Patches consul 1.10.1 for connect listener issue (patch from consul PR 10714).
  • Configs nomad clients to use consul TLS for connect (eliminates passing TLS certs to envoy sidecar and also seemed to fix the dashboard-direct.hcl standalone traefik ingress to dashboard service problem).

Now just one issue seems to remain:

  • We specifically avoid passing a consul token into nomad clients because of nomad issue 9813 and instead make use of the consul default token which is utilized when the nomad consul token config is a blank string.
  • In the case of connect jobs, the consul default token does not get utilized in creating the envoy_bootstrap.json script and injection of the corresponding consul service identities, resulting in the Error handling ADS delta stream errors mentioned above.
  • If a consul default token is provided in the nomad client config consul stanza, it the connect job works as expected. This seems to be a break from the expected behavior of consul default token being utilized where needed when no token is provided in the nomad client config consul stanza.
  • A short term workaround is to manually inject a token into the envoy bootstrap configuration. An example of this is the dashboard-token-injection.hcl file, which is a bit hacky, but works.
  • Trying to avoid this workaround, and instead inject a consul token into the env with CONSUL_HTTP_TOKEN was also attempted in different ways (at the job level, at the task level, from within the envoy sidecar env), didn't work: those env vars don't get utilized for the consul mesh service identity creation of the si_token file.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment