Skip to content

Instantly share code, notes, and snippets.

@rus-kilian
Created July 7, 2021 20:07
Show Gist options
  • Save rus-kilian/e1e6b7f3febfc75534c9af1b165db915 to your computer and use it in GitHub Desktop.
Save rus-kilian/e1e6b7f3febfc75534c9af1b165db915 to your computer and use it in GitHub Desktop.
Aruba API to prometheus exporter
#!/usr/bin/env python3
import yaml
import os.path
import pprint
import re
import time
import logging
import argparse
import requests
from requests.exceptions import Timeout
import urllib3 # For disabling SSL warnings
import socket
from prometheus_client import Gauge, Summary
from threading import Thread
# systemd socket activation
from prometheus_client import start_http_server
from prometheus_client.exposition import MetricsHandler
from prometheus_client.registry import REGISTRY
# Debian bullseye has _ThreadingSimpleServer renamed to ThreadingWSGIServer
try:
from prometheus_client.exposition import (
ThreadingWSGIServer as _ThreadingSimpleServer,
)
except ImportError:
from prometheus_client.exposition import _ThreadingSimpleServer
config = {}
if os.path.isfile("/etc/aruba_exporter.yaml"):
with open("/etc/aruba_exporter.yaml", "r") as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
exit(1)
elif os.path.isfile(os.environ["HOME"] + "/.config.yaml"):
with open(os.environ["HOME"] + "/.config.yaml", "r") as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
exit(1)
else:
print("No config.yaml")
exit(1)
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"--debug",
"-d",
dest="debug",
action="store_true",
help="Run debug mode",
default=False,
)
parser.add_argument(
"--listen-port", type=int, help="The port the exporter will listen on", default=9425
)
parser.add_argument(
"--delay",
type=int,
help="The refresh delay the exporter will wait between runs",
default=120,
)
args = parser.parse_args()
pp = pprint.PrettyPrinter(indent=4)
SYSTEMD_FIRST_SOCKET_FD = 3
CONTENT_TYPE_LATEST = str("text/plain; version=0.0.1; charset=utf-8")
"""Content type of the latest text format"""
logger = logging.getLogger(__name__)
debug = args.debug
# create console handler and set level to debug
ch = logging.StreamHandler()
if debug:
logger.setLevel(logging.DEBUG)
ch.setLevel(logging.DEBUG)
# create formatter
formatter = logging.Formatter(
"%(asctime)s - %(name)s/%(threadName)s - %(levelname)s - %(message)s"
)
# add formatter to ch
ch.setFormatter(formatter)
# add ch to logger
logger.addHandler(ch)
session_cookies = {}
urllib3.disable_warnings()
http_session = requests.Session()
http_session.verify = False
Aruba_collect = Summary(
"aruba_collect",
"Aruba poller details collecting and processing AP and Client stats",
)
Aruba_API_calls_sent = Gauge(
"aruba_api_calls_sent",
"Aruba API calls sent",
labelnames=["script", "md", "method", "transport"],
)
Aruba_module_collect = Gauge(
"aruba_module_collect", "Aruba poller details per module", labelnames=["module"]
)
Aruba_module_collect_AP = Gauge(
"aruba_module_collect_ap",
"Aruba poller details per module per AP",
labelnames=["module", "ap"],
)
Aruba_DP_DMA_Counters = Gauge(
"aruba_dp_dma_counter",
"Aruba datapath dma queue counters",
labelnames=["md", "queue", "processor"],
)
Aruba_CP_BWM_Table = Gauge(
"aruba_cp_bwm_table",
"Aruba control plane bandwidth table",
labelnames=["md", "queue", "status"],
)
Aruba_DP_Frame_Counters = Gauge(
"aruba_dp_frame_counters",
"Aruba datapath frame counters",
labelnames=["md", "slot", "key"],
)
Aruba_Vlan = Gauge(
"aruba_vlan",
"Aruba Vlan distribution details for wireless users connected to MD",
labelnames=["md", "vlan"],
)
Aruba_Vlan_APgroup = Gauge(
"aruba_vlan_apgroup",
"Aruba Vlan distribution details for wireless users connected to MD within certain AP group",
labelnames=["md", "vlan", "group"],
)
Aruba_AP_count = Gauge(
"aruba_ap_up_count", "Aruba AP Database status up per MD", labelnames=["md"]
)
Aruba_AP_Uptime = Gauge("aruba_ap_uptime", "Aruba AP Uptime", labelnames=["md", "ap"])
Aruba_AP_PoE_degraded = Gauge(
"aruba_ap_poe_degraded", "Aruba AP with PoE handshake issues", labelnames=["md"]
)
Aruba_AP_Channel = Gauge(
"aruba_ap_chan", "Aruba AP channel", labelnames=["md", "ap", "radio"]
)
Aruba_AP_PoE = Gauge("aruba_ap_poe", "Aruba AP PoE handshake", labelnames=["ap"])
Aruba_AP_bootstraps = Gauge(
"aruba_ap_bootstraps", "Aruba AP bootstraps", labelnames=["md", "ap"]
)
Aruba_AP_reboots = Gauge(
"aruba_ap_reboots", "Aruba AP reboots", labelnames=["md", "ap"]
)
Aruba_AP_keepalive = Gauge(
"aruba_ap_keepalive", "Aruba AP keepalives", labelnames=["md", "ap", "status"]
)
Aruba_AP_power_update = Gauge(
"aruba_ap_power_update", "Aruba AP power updates", labelnames=["ap", "status"]
)
Aruba_AP_config = Gauge(
"aruba_ap_config", "Aruba AP config sync", labelnames=["md", "ap", "status"]
)
Aruba_AP_crash = Gauge(
"aruba_ap_crash", "Aruba AP crash report", labelnames=["md", "ap"]
)
Aruba_AP_health = Gauge(
"aruba_ap_health", "Aruba AP IP health check", labelnames=["md", "ap", "status"]
)
Aruba_AP_channel_status = Gauge(
"aruba_ap_channel",
"Aruba AP channel status check",
labelnames=["ap", "channel", "status", "service"],
)
Aruba_AP_radio_status = Gauge(
"aruba_ap_radio_stats",
"Aruba AP radio status",
labelnames=["ap", "radio", "status"],
)
Aruba_clients_connected = Gauge(
"aruba_clients_connected", "Aruba clients connected to AP", labelnames=["md", "ap"]
)
Aruba_client_status = Gauge(
"aruba_client_stats", "Aruba client status", labelnames=["client", "status"]
)
ap_chan_re = re.compile(r"^(\d+)[^\d]?")
class SocketInheritingHTTPServer(_ThreadingSimpleServer):
"""A HttpServer subclass that takes over an inherited socket from systemd"""
def __init__(self, address_info, handler, fd, bind_and_activate=True):
_ThreadingSimpleServer.__init__(
self, address_info, handler, bind_and_activate=False
)
logger.debug("http server init complete - passing socket")
self.socket = socket.fromfd(fd, self.address_family, self.socket_type)
if bind_and_activate:
# NOTE: systemd provides ready-bound sockets, so we only need to activate:
logger.debug("http server activating")
self.server_activate()
else:
logger.debug("http server NOT activated")
def target_login(target):
global config
global session_cookies
logger.debug("Logging in to %s and add to session_cookies" % target)
login_url = "https://" + target + ":4343/v1/api/login"
# Initiate login with authentication, and persistently store cookies
try:
login_response = http_session.get(
login_url,
params={"username": config["login"], "password": config["password"]},
timeout=(1, 3),
)
except Timeout:
logger.error("Timeout while logging in to %s" % target)
return False
else:
# Login to store UIDARUBA
if login_response:
http_session_arubauid = login_response.json()["_global_result"]["UIDARUBA"]
logger.debug(
"Logged in sucessfully with HTTP status code %d"
% login_response.status_code
)
logger.debug("Received UIDARUBA: " + http_session_arubauid)
session_cookies[target] = http_session_arubauid
Aruba_API_calls_sent.labels(
script="aruba_exporter.py", md=target, transport="http", method="login"
).inc()
return http_session_arubauid
else:
# FIXME: add retry
logger.error(
"Login failed with HTTP status code %d" % login_response.status_code
)
Aruba_API_calls_sent.labels(
script="aruba_exporter.py",
md=target,
transport="http",
method="login_failed",
).inc()
return False
def showcli(target, command, retries=3):
global session_cookies
if target not in session_cookies:
if not target_login(target):
return
show_command_url = "https://" + target + ":4343/v1/configuration/showcommand"
for i in range(0, retries):
try:
# FIXME: for now we don't track individual commands to spare prometheus series...
Aruba_API_calls_sent.labels(
script="aruba_exporter.py",
md=target,
transport="http",
method="showcommand",
).inc()
show_command_response = http_session.get(
show_command_url,
params={
"json": "1",
"command": command,
"UIDARUBA": session_cookies[target],
},
timeout=(3, 8),
headers={"Connection": "close"},
)
except Timeout:
Aruba_API_calls_sent.labels(
script="aruba_exporter.py",
md=target,
transport="http",
method="showcommand_timeout",
).inc()
logger.warning(
'Timeout while executing "%s" on %s. Retrying...' % (command, target)
)
http_session.close()
else:
http_session.close()
if show_command_response.status_code == 200:
if show_command_response.text == "":
logger.warning("Empty response received. Retrying.")
# let's retry
time.sleep(3)
continue
else:
try:
json = show_command_response.json()
except Exception:
Aruba_API_calls_sent.labels(
script="aruba_exporter.py",
md=target,
transport="http",
method="showcommand_invalid_json",
).inc()
logger.error("Invalid response received! Not JSON:")
logger.error(show_command_response.text)
return
else:
return json
elif show_command_response.status_code == 401:
logger.error("Unauthenticated on %s. Retrying." % target)
Aruba_API_calls_sent.labels(
script="aruba_exporter.py",
md=target,
transport="http",
method="showcommand_auth_invalid",
).inc()
if target_login(target):
return showcli(target, command)
else:
return
else:
Aruba_API_calls_sent.labels(
script="aruba_exporter.py",
md=target,
transport="http",
method="showcommand_error_%d" % show_command_response.status_code,
).inc()
logger.error(
"Received unhandled error code: %d on %s"
% (show_command_response.status_code, target)
)
return
logger.error('Retries exceeded with Timeout for "%s" on %s' % (command, target))
return
def get_controllers(target):
controllers = {}
logger.debug("retrieving controlers managed by %s" % target)
command = "show switches"
data = showcli(target, command)
if data:
if "All Switches" in data:
for c in data["All Switches"]:
if c["Type"] == "MD":
ip = c["IP Address"]
# ipv6 = c["IPv6 Address"]
name = c["Name"]
# devtype = c["Type"]
logger.debug("Adding %s (%s)" % (ip, name))
controllers[ip] = name
else:
logger.debug("Ignoring type %s" % c["Type"])
return controllers
def uptime2sec(upstring):
uptime = 0
timeArray = upstring.split(":")
for element in timeArray:
timeint = int(element[:-1])
unit = element[-1:]
if unit == "s":
uptime += timeint
elif unit == "m":
uptime += timeint * 60
elif unit == "h":
uptime += timeint * 60 * 60
elif unit == "d":
uptime += timeint * 60 * 60 * 24
return uptime
def ap_db(target, group=""):
aps = {}
command = "show ap database status up"
if group != "":
logger.debug("collecting ap database for group %s on %s" % (group, target))
command = "show ap database status up group %s" % group
else:
logger.debug("collecting ap database on %s" % target)
data = showcli(target, command)
if data:
if "AP Database" in data:
for ap in iter(data["AP Database"]):
name = ap["Name"]
aps[name] = {}
aps[name]["ap_type"] = ap["AP Type"]
aps[name]["group"] = ap["Group"]
status = ap["Status"].split(" ")
aps[name]["status"] = status[0]
aps[name]["uptime"] = uptime2sec(status[1])
aps[name]["standby"] = ap["Standby IP"]
aps[name]["ip_address"] = ap["IP Address"]
aps[name]["flags"] = ap["Flags"]
aps[name]["switch"] = ap["Switch IP"]
return aps
def cp_bwcontracts(target):
contracts = {}
command = "show cp-bwcontracts"
logger.debug("collecting controlplane bandwidth contracts on %s" % target)
data = showcli(target, command)
if data:
if "CP bw contracts" in data:
for c in data["CP bw contracts"]:
contracts[int(c["Id"])] = c["Contract"]
return contracts
section_re = re.compile(r"^[=\s-]+$")
bwm_cp_re = re.compile(
r"^(\d+)\s+(\d+)\s+(\d+)\s+pps\s+(\d+)\s+(\d+)\s+(\d+)\/(\d+)\s*"
)
def bwm_cp_table(target):
entries = {}
command = "show datapath cp-bwm table"
logger.debug("collecting datapath cp-bwm table on %s" % target)
data = showcli(target, command)
if data:
if "_data" in data:
section = 0
for l in data["_data"]:
s = section_re.match(l)
if s:
section += 1
if section == 2:
p = bwm_cp_re.match(l)
if p:
# cpu = int(p.group(1))
contract = int(p.group(2))
entries[contract] = {}
entries[contract]["rate_pps"] = int(p.group(3))
entries[contract]["policed"] = int(p.group(4))
entries[contract]["credits"] = int(p.group(5))
entries[contract]["queued_bytes"] = int(p.group(6))
entries[contract]["queued_packets"] = int(p.group(7))
return entries
slot_re = re.compile(r"^\|\s*Slot\s*\|\s*(\d+)\s*\|")
dp_f_counters = re.compile(r"^.*\|\s+(\S[^\|]+\S)\s+(\d+)\s+\|$")
def dp_frame_counters(target):
entries = {}
command = "show datapath frame counters"
logger.debug("collecting datapath frame counters on %s" % target)
data = showcli(target, command)
if data:
if "_data" in data:
slot = None
for l in data["_data"]:
s = slot_re.match(l)
if s:
slot = int(s.group(1))
entries[slot] = {}
if slot is not None:
c = dp_f_counters.match(l)
if c:
desc = c.group(1)
value = int(c.group(2))
entries[slot][desc] = value
return entries
dp_dma_re = re.compile(r"^(\d+)\s+(\d+)\s+(\d+)\s*")
def datapath_dma_counters(target):
entries = {}
command = "show datapath debug dma counters"
logger.debug("collecting datapath debug dma counters on %s" % target)
data = showcli(target, command)
if data:
if "_data" in data:
section = 0
for l in data["_data"]:
s = section_re.match(l)
if s:
section += 1
if section == 2:
d = dp_dma_re.match(l)
if d:
queue = int(d.group(1))
entries[queue] = {}
entries[queue]["cp_full"] = int(d.group(2))
entries[queue]["np_full"] = int(d.group(3))
return entries
def ap_association(target, group=""):
associations = {}
command = "show ap association"
if group != "":
logger.debug("collecting ap association for group %s on %s" % (group, target))
command = "show ap association ap-group %s" % group
else:
logger.debug("collecting ap association on %s" % target)
data = showcli(target, command)
if data:
if "Association Table" in data:
for assoc in iter(data["Association Table"]):
vlan = assoc["vlan-id"]
if vlan in associations.keys():
associations[vlan] = associations[vlan] + 1
else:
associations[vlan] = 1
return associations
def ap_client_table(target, ap):
logger.debug("collecting ap debug client table for AP %s on %s" % (ap, target))
command = "show ap debug client-table ap-name %s" % ap
data = showcli(target, command)
if data:
if "Client Table" in data:
clients = {}
try:
for client in iter(data["Client Table"]):
mac = client["MAC"]
if mac is None:
# don't add empty client
continue
clients[mac] = {}
clients[mac]["ACK_SNR"] = client["Last_ACK_SNR"]
clients[mac]["state"] = client["Assoc_State"]
clients[mac]["health"] = client["Client health (C/R)"]
clients[mac]["ps_qlen"] = client["PS_Qlen"]
clients[mac]["tx_retries"] = client["Tx_Retries"]
return clients
except Exception:
pp.pprint(data)
return
def ap_poe(target, ap):
logger.debug("collecting ap PoE stats for AP %s on %s" % (ap, target))
command = "show ap power-mgmt-statistics ap-name %s" % ap
data = showcli(target, command)
if data:
for k, v in data.items():
if "AP Power Mgmt Status" in k:
for k in v:
if "Power Supply" in k["Attr"]:
return k["Value"]
ap_detail_key = re.compile(r'^\s*AP\s["a-z0-9A-Z,-]+\s+([^\s].*[^\s])+\s*$')
ap_radio_oper_info = re.compile(r"Radio (\d) Operating Information")
def ap_detail(target, ap):
logger.debug("collecting ap details advanced for AP %s on %s" % (ap, target))
command = "show ap details advanced ap-name %s" % ap
data = showcli(target, command)
if data:
items = {}
for e in data:
ap_match = ap_detail_key.match(e)
if ap_match:
key = ap_match.group(1)
# logger.debug('Inspecting "%s" on "%s"' % (key,ap))
if key == "AP to Switch Message Counts":
# logger.debug('Matched "%s" as AP to Switch message count' % key)
messages = data[e]
items["messages"] = {}
for msg in messages:
items["messages"][msg["Message"]] = {}
items["messages"][msg["Message"]]["Acknowledged"] = msg[
"Acknowledged"
]
items["messages"][msg["Message"]]["New"] = msg["New"]
items["messages"][msg["Message"]]["Total"] = msg["Total"]
elif key == "Operating Information":
# logger.debug('Matched "%s" as AP Operating Information' % key)
for l in data[e]:
if l["Item"] == "Reboots":
items["reboots"] = l["Value"]
elif l["Item"] == "Bootstraps":
items["bootstraps"] = l["Value"]
else:
# logger.debug('Trying to match Radio Operationg Information')
ap_oper = ap_radio_oper_info.match(key)
if ap_oper:
radio = int(ap_oper.group(1))
# logger.debug('Matched %s as Radio %d Operating Information' % (key,radio))
for l in data[e]:
if l["Item"] == "Channel":
items["channel%d" % radio] = l["Value"]
# elif l['Item'] == 'Cell size reduction':
# items['cell_size_reduction%d' % radio] = l['Value']
# else:
# logger.debug('Ignoring section "%s"' % key)
else:
logger.debug('Ignoring key "%s"' % e)
return items
else:
logger.error("No output received!")
bootstraps_re = re.compile(r"^(\d+)\s+\((\d+)\s*\)")
def ap_debug_counters(target, ap):
logger.debug("collecting ap debug counters for AP %s on %s" % (ap, target))
command = "show ap debug counters ap-name %s" % ap
data = showcli(target, command)
if data:
if "AP Counters" in data:
ret = data["AP Counters"][0]
items = {}
items["configs_ack"] = ret["Configs Acked"]
items["configs_sent"] = ret["Configs Sent"]
items["crash"] = ret["Crash"]
items["reboots"] = ret["Reboots"]
b = bootstraps_re.match(ret["Bootstraps (Total)"])
if b:
items["bootstraps"] = b.group(1)
items["bootstraps_total"] = b.group(2)
return items
health_loss = re.compile(r"^([0-9\.]+)%\s+[^0-9]+([0-9]+)\/([0-9]+)[^0-9]*$")
def ap_ip_health(target, ap):
logger.debug("collecting ap ip health-check for AP %s on %s" % (ap, target))
command = "show ap ip health-check ap-name %s" % ap
data = showcli(target, command)
if data:
if "AP Health-Check Status" in data:
if data["AP Health-Check Status"] != []:
ret = data["AP Health-Check Status"][0]
items = {}
items["avg_rtt"] = ret["1 min Avg RTT"]
loss = health_loss.match(ret["1 min Loss"])
if loss:
items["loss_pct"] = loss.group(1)
items["loss_pkt"] = loss.group(2)
return items
else:
logger.error(
"Received empty AP health check status for %s on %s" % (ap, target)
)
return
def ap_rf_verbose(target, ap):
logger.debug("collecting ap arm rf-summary for AP %s on %s" % (ap, target))
command = "show ap arm rf-summary ap-name %s" % ap
data = showcli(target, command)
if data:
if "Channel Summary" in data:
items = {}
if "Cur Chan: cca_ibss/cca_obss/cca_intf" in data:
items["cur_chan"] = data["Cur Chan: cca_ibss/cca_obss/cca_intf"]
if "Bcn fail/Bstuck reset/Scan rej(l)" in data:
items["bcn_fail"] = data["Bcn fail/Bstuck reset/Scan rej(l) "]
items["channels"] = {}
items["ht_vht_channels"] = {}
for c in data["Channel Summary"]:
chan = c["channel"]
items["channels"][chan] = {}
items["channels"][chan]["noise"] = c["noise"]
items["channels"][chan]["mac-err"] = c["mac-err"]
items["channels"][chan]["phy-err"] = c["phy-err"]
items["channels"][chan]["retry"] = c["retry"]
# 'cov-idx(Total)': '0/0(0)',
# 'intf_idx(Total)': '59/14//15/6(94)',
# 'util(Qual)': '13/11/1/0/99'}
for c in data["HT/VHT Channel Summary"]:
chan = c["Channel range"]
items["ht_vht_channels"][chan] = {}
items["ht_vht_channels"][chan]["bandwidth"] = c["Bandwidth"]
items["ht_vht_channels"][chan]["interference"] = int(
c["Total interference index"]
)
return items
def ap_debug_radio_stats(target, ap, radio):
logger.debug(
"collecting ap debug radio-stats (radio %d) for AP %s on %s"
% (radio, ap, target)
)
command = "show ap debug radio-stats ap-name %s radio %d advanced" % (ap, radio)
data = showcli(target, command)
if data:
items = {}
if "RADIO Stats" in data:
for e in data["RADIO Stats"]:
items[e["Parameter"]] = e["Value"]
return items
def ap_debug_client_stats(target, client):
logger.debug(
"collecting ap debug client-stats for client-mac %s on %s" % (client, target)
)
command = "show ap debug client-stats client-mac %s" % client
data = showcli(target, command)
if data:
items = {}
if "Station Stats" in data:
for e in data["Station Stats"]:
items[e["Parameter"]] = e["Value"]
return items
def lookup_user(md, client):
logger.debug("Searching for %s on %s" % (client, md))
data = showcli(md, "show user-table mac %s" % client)
if data:
if "Users" in data:
sessions = data["Users"]
user_on_ap = []
for s in sessions:
ap_name = s["AP name"]
if ap_name not in user_on_ap:
logger.debug("Adding %s to APs user %s is on" % (ap_name, client))
user_on_ap = user_on_ap + [ap_name]
return user_on_ap
def collect_stats(modname, offset):
curtime = time.time()
Aruba_module_collect.labels(module=modname).set(curtime - offset)
return curtime
def collect_stats_per_ap(modname, ap, offset):
curtime = time.time()
Aruba_module_collect_AP.labels(module=modname, ap=ap).set(curtime - offset)
return curtime
# MAIN #
def check_aruba():
mm = config["aruba_mm"]
start_time = time.time()
all_mds = get_controllers(mm)
intermediate = collect_stats("get_controllers", start_time)
for ip in all_mds:
md = all_mds[ip]
logger.debug("Fetching controlplane and bandwidth stats for %s" % md)
dp_dma_c = datapath_dma_counters(md)
if dp_dma_c:
for q in dp_dma_c:
for p in dp_dma_c[q]:
logger.debug(
"Adding DP DMA counter for %s, queue: %s, processor: %s, value: %d"
% (md, q, p, dp_dma_c[q][p])
)
Aruba_DP_DMA_Counters.labels(md=md, queue=q, processor=p).set(
dp_dma_c[q][p]
)
cp_bwcon = cp_bwcontracts(md)
md_cp_bwm = bwm_cp_table(md)
if md_cp_bwm:
for c in md_cp_bwm:
for q in md_cp_bwm[c]:
logger.debug(
"Adding CP BWM table entry for %s, class/queue: %s, entry: %s, value: %d"
% (md, c, q, md_cp_bwm[c][q])
)
Aruba_CP_BWM_Table.labels(md=md, queue=cp_bwcon[c], status=q).set(
md_cp_bwm[c][q]
)
dp_f_c = dp_frame_counters(md)
if dp_f_c:
for slot in dp_f_c:
for (k, v) in dp_f_c[slot].items():
logger.debug(
'Adding datapath frame counter in slot %d, "%s": %d'
% (slot, k, v)
)
Aruba_DP_Frame_Counters.labels(md=md, slot=slot, key=k).set(v)
intermediate = collect_stats("get_dp_cp_bwm_stats", start_time)
for ip in all_mds:
md = all_mds[ip]
logger.debug("Fetching vlan associations for %s" % md)
md_assoc = ap_association(md)
if md_assoc:
for vlan in md_assoc:
value = md_assoc[vlan]
logger.debug(
"Adding %d to associations for %s on %s" % (value, vlan, md)
)
Aruba_Vlan.labels(md=md, vlan=vlan).set(value)
if "aruba_sample_clients" in config:
for c in config["aruba_sample_clients"]:
# check if this MD actually has this user...
if lookup_user(md, c):
data = ap_debug_client_stats(md, c)
if data:
for s in config["aruba_client_fields"]:
Aruba_client_status.labels(client=c, status=s).set(
data[s]
)
intermediate = collect_stats("ap_associations", intermediate)
logger.debug("Fetching ap database for counters")
ap_in_db = ap_db(mm)
if ap_in_db:
logger.debug(
"Found total %d APs in database - registered in mm" % len(ap_in_db)
)
for ip in all_mds:
md = all_mds[ip]
ap_count = len({k: v for (k, v) in ap_in_db.items() if v["switch"] == ip})
logger.debug(
"Found total %d APs in database connected to %s" % (ap_count, md)
)
Aruba_AP_count.labels(md=md).set(ap_count)
ap_poe_degraded = len(
{
k: v
for (k, v) in ap_in_db.items()
if v["switch"] == ip and "r" in v["flags"]
}
)
if ap_poe_degraded:
Aruba_AP_PoE_degraded.labels(md=md).set(ap_poe_degraded)
aps = {}
for group in config["aruba_ap_groups"]:
ap_in_group = ap_db(mm, group)
if ap_in_group:
for ap in ap_in_group.keys():
md = all_mds[ap_in_group[ap]["switch"]]
Aruba_AP_Uptime.labels(md=md, ap=ap).set(ap_in_group[ap]["uptime"])
aps[ap] = md
assoc = ap_association(md, group)
if assoc:
for k, v in assoc.items():
Aruba_Vlan_APgroup.labels(md=md, vlan=k, group=group).set(v)
intermediate = collect_stats("get_ab_db", intermediate)
for ap in aps:
intermediate = time.time()
md = aps[ap]
poe = ap_poe(md, ap)
if poe:
logger.debug('Obtained PoE for AP %s as "%s"' % (ap, poe))
if poe == "POE-AF":
Aruba_AP_PoE.labels(ap=ap).set(1)
elif poe == "POE-AT":
Aruba_AP_PoE.labels(ap=ap).set(2)
elif poe == "POE-BT":
Aruba_AP_PoE.labels(ap=ap).set(3)
elif poe == "None":
Aruba_AP_PoE.labels(ap=ap).set(-1)
else:
Aruba_AP_PoE.labels(ap=ap).set(0)
intermediate = collect_stats_per_ap("get_ab_poe", ap, intermediate)
# FIXME: select min/max/avg SNR, qlen and txretries?
client_table = ap_client_table(md, ap)
if client_table:
Aruba_clients_connected.labels(md=md, ap=ap).set(len(client_table))
for c in client_table:
# XXX: just select a few clients to not clutter prometheus... (and respect privacy)
if "aruba_sample_clients" in config:
if c in config["aruba_sample_clients"]:
if "ACK_SNR" in client_table[c]:
Aruba_client_status.labels(client=c, status="ACK_SNR").set(
client_table[c]["ACK_SNR"]
)
if "ps_qlen" in client_table[c]:
Aruba_client_status.labels(client=c, status="ps_qlen").set(
client_table[c]["ps_qlen"]
)
if "tx_retries" in client_table[c]:
Aruba_client_status.labels(
client=c, status="tx_retries"
).set(client_table[c]["tx_retries"])
if "health" in client_table[c]:
Aruba_client_status.labels(client=c, status="health1").set(
client_table[c]["health"].split("/")[0]
)
Aruba_client_status.labels(client=c, status="health2").set(
client_table[c]["health"].split("/")[1]
)
else:
Aruba_clients_connected.labels(md=md, ap=ap).set(0)
intermediate = collect_stats_per_ap("get_ap_client_table", ap, intermediate)
ap_chan = []
ret = ap_detail(md, ap)
if ret:
ap_chan = [ret["channel0"], ret["channel1"]]
Aruba_AP_Channel.labels(md=md, ap=ap, radio=0).set(ret["channel0"])
Aruba_AP_Channel.labels(md=md, ap=ap, radio=1).set(ret["channel1"])
Aruba_AP_bootstraps.labels(md=md, ap=ap).set(ret["bootstraps"])
Aruba_AP_reboots.labels(md=md, ap=ap).set(ret["reboots"])
for s in ret["messages"]["KEEPALIVE"]:
Aruba_AP_keepalive.labels(md=md, ap=ap, status=s).set(
ret["messages"]["KEEPALIVE"][s]
)
for s in ret["messages"]["PWR_EVENT_UPDATE"]:
Aruba_AP_power_update.labels(ap=ap, status=s).set(
ret["messages"]["PWR_EVENT_UPDATE"][s]
)
intermediate = collect_stats_per_ap("get_ap_detail", ap, intermediate)
ret = ap_debug_counters(md, ap)
if ret:
Aruba_AP_config.labels(md=md, ap=ap, status="ACK").set(ret["configs_ack"])
Aruba_AP_config.labels(md=md, ap=ap, status="sent").set(ret["configs_sent"])
Aruba_AP_config.labels(md=md, ap=ap, status="bootstraps").set(
ret["bootstraps"]
)
Aruba_AP_config.labels(md=md, ap=ap, status="bootstraps_total").set(
ret["bootstraps_total"]
)
if ret["crash"] == "N":
Aruba_AP_crash.labels(md=md, ap=ap).set(0)
else:
Aruba_AP_crash.labels(md=md, ap=ap).set(1)
intermediate = collect_stats_per_ap("get_ap_debug_counters", ap, intermediate)
ret = ap_ip_health(md, ap)
if ret:
for k in ret:
Aruba_AP_health.labels(md=md, ap=ap, status=k).set(ret[k])
intermediate = collect_stats_per_ap("get_ap_ip_health", ap, intermediate)
ret = ap_rf_verbose(md, ap)
if ret:
for c in ret["channels"]:
for s in ret["channels"][c]:
if c in ap_chan:
in_service = "y"
else:
in_service = "n"
_chan = ret["channels"][c][s]
_c = ap_chan_re.match(_chan)
if _c:
_chan = _c.group(1)
Aruba_AP_channel_status.labels(
ap=ap, channel=c, status=s, service=in_service
).set(_chan)
for radio in [0, 1]:
ret = ap_debug_radio_stats(md, ap, radio)
if ret:
for s in config["aruba_radio_fields"]:
if s in ret:
Aruba_AP_radio_status.labels(ap=ap, radio=radio, status=s).set(
ret[s]
)
else:
logger.debug(
'Ignoring missing "%s" in AP %s radio stats for radio %d'
% (s, ap, radio)
)
# FIXME: if AP555, we might also want to look at radio 2...
intermediate = collect_stats_per_ap(
"get_ap_rf_verbose_radio_%d" % radio, ap, intermediate
)
# for c in config['aruba_sample_clients']:
# data = showcli(mm,'show ap virtual-beacon-report client-mac %s' % c)
# if data:
# if 'Consecutive (Fails/BTM Rej/BTM Timeouts) ' in data:
# pp.pprint(data['Consecutive (Fails/BTM Rej/BTM Timeouts) '])
# else:
# logger.debug('No Consecutive (Fails/BTM Rej/BTM Timeouts) in data for client %s' % c)
# pp.pprint(data)
# else:
# logger.error('virtual AP beacon report for %s does not contain fails etc.' % c)
# pp.pprint(data)
logger.debug("Time spent: %d" % (time.time() - start_time))
Aruba_collect.observe(time.time() - start_time)
class ArubaGatherer(Thread):
"""Periodically retrieve data from Aruba in a separate thread,
"""
def __init__(self):
Thread.__init__(self)
self.name = "ArubaGatherer"
def run(self):
logger.debug("Starting Aruba data gather thread")
while True:
try:
logger.debug("Running check_aruba in thread")
check_aruba()
logger.debug("Done: Running check_aruba in thread")
except Exception:
# Ignore failures, we will try again after refresh_interval.
# Most of them are termporary ie. connectivity problmes
logger.error("Error getting stats", exc_info=True)
logger.debug("Sleeping in Aruba thread for %d s" % args.delay)
time.sleep(args.delay)
if __name__ == "__main__":
logger.debug("Starting Aruba gatherer thread")
aruba_gatherer = ArubaGatherer()
aruba_gatherer.start()
# ...and now serve the registry contents so that we can consume it..
if os.environ.get("LISTEN_PID", None) == str(os.getpid()):
# systemd socket activation will need that httpd is waiting for socket
# to be passed - while collection still updates in the background
# inherit the socket
logger.debug(
"Starting systemd socket activation http server on %d" % args.listen_port
)
CustomMetricsHandler = MetricsHandler.factory(REGISTRY)
server_args = [("localhost", args.listen_port), CustomMetricsHandler]
httpd = SocketInheritingHTTPServer(*server_args, fd=SYSTEMD_FIRST_SOCKET_FD)
logging.info(
"aruba_exporter started for socket activation on fd %s"
% (SYSTEMD_FIRST_SOCKET_FD,)
)
try:
logging.info(
"aruba_exporter httpd running on socket fd %s"
% (SYSTEMD_FIRST_SOCKET_FD,)
)
httpd.serve_forever()
except KeyboardInterrupt:
httpd.socket.close()
else:
# start the server normally
# Start up the server to expose the metrics.
logger.debug("Starting http server on %d" % args.listen_port)
start_http_server(args.listen_port)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment