Collectd Python plugin to get Slurm's core allocation by state
#!/usr/bin/python | |
# vim: set ts=4 sw=4 et | |
""" | |
slurm_core_states.py - A read plugin that will dispatch the core cpu states | |
returned by the Slurm sinfo command. | |
""" | |
import collectd | |
import signal | |
import subprocess | |
from pyparsing import Word, alphanums, nums | |
__author__ = "Giovanni Torres" | |
def get_cpus_by_state(): | |
"""Returns a dictionary of cpu cores and their states, per Slurm | |
partition.""" | |
cores = {} | |
sinfo = "/path/to/sinfo" | |
cores['all-allocated'] = 0 | |
cores['all-idle'] = 0 | |
cores['all-other'] = 0 | |
cores['all-total'] = 0 | |
try: | |
used_core_count = subprocess.Popen([sinfo, '-o', '%P %C', '--noheader'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() | |
stdout = used_core_count[0].strip().split('\n') | |
if stdout == ['']: | |
return | |
part_parse = Word(alphanums + '*') + Word(nums + '/' + nums + '/' + nums + '/' + nums) | |
output = part_parse.scanString(stdout) | |
for partition in output: | |
queue = partition[0][0] | |
allocated, idle, other, total = partition[0][1].split('/') | |
cores[queue.strip('*') + '-allocated'] = int(allocated) | |
cores[queue.strip('*') + '-idle'] = int(idle) | |
cores[queue.strip('*') + '-other'] = int(other) | |
cores[queue.strip('*') + '-total'] = int(total) | |
cores['all-allocated'] += int(allocated) | |
cores['all-idle'] += int(idle) | |
cores['all-other'] += int(other) | |
cores['all-total'] += int(total) | |
return cores | |
except: | |
return | |
def restore_sigchld(): | |
signal.signal(signal.SIGCHLD, signal.SIG_DFL) | |
def read_callback(data=None): | |
""" Callback function for dispatching metrics into collectd""" | |
cores = get_cpus_by_state() | |
if not cores: | |
pass | |
else: | |
# for each key in each partition output, put the value | |
for key in cores: | |
metric = collectd.Values() | |
metric.plugin = 'core_states' | |
metric.host = 'slurm' | |
metric.interval = 30 | |
metric.type = 'gauge' | |
metric.type_instance = key | |
metric.values = [cores[key]] | |
metric.dispatch() | |
collectd.register_init(restore_sigchld) | |
collectd.register_read(read_callback) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment