Skip to content

Instantly share code, notes, and snippets.

@giovtorres
Last active July 30, 2020 00:21
Show Gist options
  • Save giovtorres/5125ca7c248d51388e40 to your computer and use it in GitHub Desktop.
Save giovtorres/5125ca7c248d51388e40 to your computer and use it in GitHub Desktop.
Collectd Python plugin to get Slurm's core allocation by state
#!/usr/bin/python
# vim: set ts=4 sw=4 et
"""
slurm_core_states.py - A read plugin that will dispatch the core cpu states
returned by the Slurm sinfo command.
"""
import collectd
import signal
import subprocess
from pyparsing import Word, alphanums, nums
__author__ = "Giovanni Torres"
def get_cpus_by_state():
"""Returns a dictionary of cpu cores and their states, per Slurm
partition."""
cores = {}
sinfo = "/path/to/sinfo"
cores['all-allocated'] = 0
cores['all-idle'] = 0
cores['all-other'] = 0
cores['all-total'] = 0
try:
used_core_count = subprocess.Popen([sinfo, '-o', '%P %C', '--noheader'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
stdout = used_core_count[0].strip().split('\n')
if stdout == ['']:
return
part_parse = Word(alphanums + '*') + Word(nums + '/' + nums + '/' + nums + '/' + nums)
output = part_parse.scanString(stdout)
for partition in output:
queue = partition[0][0]
allocated, idle, other, total = partition[0][1].split('/')
cores[queue.strip('*') + '-allocated'] = int(allocated)
cores[queue.strip('*') + '-idle'] = int(idle)
cores[queue.strip('*') + '-other'] = int(other)
cores[queue.strip('*') + '-total'] = int(total)
cores['all-allocated'] += int(allocated)
cores['all-idle'] += int(idle)
cores['all-other'] += int(other)
cores['all-total'] += int(total)
return cores
except:
return
def restore_sigchld():
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
def read_callback(data=None):
""" Callback function for dispatching metrics into collectd"""
cores = get_cpus_by_state()
if not cores:
pass
else:
# for each key in each partition output, put the value
for key in cores:
metric = collectd.Values()
metric.plugin = 'core_states'
metric.host = 'slurm'
metric.interval = 30
metric.type = 'gauge'
metric.type_instance = key
metric.values = [cores[key]]
metric.dispatch()
collectd.register_init(restore_sigchld)
collectd.register_read(read_callback)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment