Skip to content

Instantly share code, notes, and snippets.

@kleptog
Last active March 6, 2018 07:54
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save kleptog/9a5aa56e8d2532032b6a7b32bf7cc3aa to your computer and use it in GitHub Desktop.
Save kleptog/9a5aa56e8d2532032b6a7b32bf7cc3aa to your computer and use it in GitHub Desktop.
Script to check Docker Swarm fdb
#!/usr/bin/python
from subprocess import check_output as run
import glob
# Get nodes
nodes = run(['docker', 'node', 'ls', '-q']).split()
self = run(['docker', 'node', 'inspect', 'self', '--format={{.ID}}']).strip()
nodeinfo = {}
for line in run(['docker', 'node', 'inspect', '--format', '{{.ID}} {{.ManagerStatus.Addr}} {{.Description.Hostname}}'] + nodes).splitlines():
id, addr, hostname = line.split()
nodeinfo[id] = dict(addr=addr.partition(':')[0], hostname=hostname)
# Get services
services = run(['docker', 'service', 'ls', '-q']).split()
serviceinfo = {}
used_ips = {}
for line in run(['docker', 'service', 'inspect', '--format',
'{{.ID}} {{.Spec.Name}} '
'{{range $now := .Endpoint.VirtualIPs}} {{$now.NetworkID}} {{$now.Addr}} {{end}}'] + services).splitlines():
id, name = line.split()[:2]
addresses = line.split()[2:]
addresses = dict(zip(addresses[0::2], addresses[1::2]))
for ip in addresses.values():
if ip in used_ips:
print "Duplicate IP %s (%s & %s)" % (ip, "srv %s (%s)" % (id, name), used_ips[ip])
used_ips[ip] = "srv %s (%s)" % (id, name)
serviceinfo[id] = dict(id=id, name=name, addresses=addresses)
# Get tasks
# -q not available in 17.03
#tasks = set(run(['docker', 'node', 'ps', '-f', 'desired-state=running', '-q'] + nodes).split())
tasks = set(line.split()[0] for line in run(['docker', 'node', 'ps', '-f', 'desired-state=running'] + nodes).splitlines() if line.split()[0] != 'ID')
taskinfo = {}
for line in run(['docker', 'inspect', '--format',
'{{.ID}} {{.ServiceID}} {{.NodeID}} '
'{{range $net := .NetworksAttachments}} {{$net.Network.ID}} {{index $net.Addresses 0}} {{end}}'] + list(tasks)).splitlines():
task_id, service_id, node_id = line.split()[:3]
addresses = line.split()[3:]
addresses = dict(zip(addresses[0::2], addresses[1::2]))
taskinfo[task_id] = dict(task_id=task_id, service_id=service_id, node_id=node_id, addresses=addresses)
# Get networks
networks = run(['docker', 'network', 'ls', '-q', '-f', 'driver=overlay']).split()
netinfo = {}
for line in run(['docker', 'network', 'inspect', '--format', '{{.Id}} {{.Name}} {{index .Options "com.docker.network.driver.overlay.vxlanid_list"}}'] + networks).splitlines():
id, name, vlan_id = line.split()
netinfo[id] = dict(name=name, vlan_id=vlan_id)
# Go over networks and compare with fdb
#print nodeinfo
#print taskinfo
#print netinfo
def addr2mac(addr):
addr = addr.partition('/')[0] # Strip network
addr = [0x02, 0x42] + [int(n) for n in addr.split('.')]
return "%02x:%02x:%02x:%02x:%02x:%02x" % tuple(addr)
def check_ns(id, netns):
orig_fdb = []
for line in sorted(run(["nsenter", "--net="+netns, "bridge", "fdb"]).splitlines()):
if not line.startswith('02:42:'):
continue
orig_fdb.append(line)
line = line.split()
mac = line[0]
if line[1] != 'dev':
continue
if line[1] == 'dev' and line[2].startswith('vxlan') and \
(line[3:5] == ['master', 'br0'] or line[3:7] == ['vlan', '0', 'master', 'br0']): # probably ignore these?
continue
if line[2].startswith('vxlan'):
act_target = line[4]
print "%s -> %s" % (mac, line[4]),
else:
act_target = 'local'
print "%s -> local (%s)" % (mac, line[2]),
if act_target == 'br0': # No idea what this means
print "???"
continue
printed = False
for t in taskinfo.values():
if id in t['addresses']:
if addr2mac(t['addresses'][id]) == mac:
print "= srv %s (%s) -> node %s (%s)" % (t['task_id'], serviceinfo[t['service_id']]['name'], t['node_id'], nodeinfo[t['node_id']]['hostname'])
if t['node_id'] == self:
exp_target = 'local'
else:
exp_target = nodeinfo[t['node_id']]['addr']
printed = True
if not printed:
print "(no service for mac)"
elif exp_target != act_target:
if exp_target == 'local' and act_target == nodeinfo[self]['addr']:
print "^^^ WARN Remote reference to self?"
else:
print "^^^ ERROR %s != %s" % (exp_target, act_target)
print
print "--- orig fdb"
print "\n".join(orig_fdb)
print
for id in netinfo:
print "=== Network %s (%s) vlan %s" % (id, netinfo[id]['name'], netinfo[id]['vlan_id'])
netns = glob.glob("/var/run/docker/netns/*-" + id[:10])
if not netns:
print "(Not on this node)"
print
continue
netns = netns[0]
print "--- check %s" % netns
try:
check_ns(id, netns)
except Exception, e:
print "Exception: %s" % e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment