Skip to content

Instantly share code, notes, and snippets.

@giovtorres
Created December 31, 2015 16:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save giovtorres/6581bb529437eed7c64c to your computer and use it in GitHub Desktop.
Save giovtorres/6581bb529437eed7c64c to your computer and use it in GitHub Desktop.
Push slurm cpu and memory cluster utilization to a carbon (graphite) instance for graphing
#!/usr/bin/python
# vim: set ts=4 sw=4 et
"""
send_cluster_util.py - A script that will get CPU and Memory utilization
for a SLURM cluster and submit it directly to carbon's
pickle listener port.
http://graphite.readthedocs.org/en/latest/feeding-carbon.html#the-pickle-protocol
"""
import pickle
import socket
import struct
import sys
import time
import pyslurm
import hostlist
__author__ = "Giovanni Torres"
CARBON_SERVER = "127.0.0.1"
CARBON_PICKLE_PORT = 2004
DELAY = 30
try:
pyslurmnode = pyslurm.node()
pyslurmjob = pyslurm.job()
except ValueError as e:
sys.exit(e)
def get_cluster_util():
try:
nodes = pyslurmnode.get()
jobs = pyslurmjob.get()
except ValueError as e:
return
else:
total_cpus_avail = 0
total_cpus_alloc = 0
total_memory_avail = 0
total_memory_alloc = 0
for node in nodes:
nodeinfo = nodes.get(node)
state = nodeinfo.get("state")
alloc_cpus = nodeinfo.get("alloc_cpus")
avail_cpus = nodeinfo.get("cpus")
alloc_memory = nodeinfo.get("alloc_memory")
avail_memory = nodeinfo.get("real_memory")
if "ALLOCATED" in state:
total_memory_avail += avail_memory
total_memory_alloc += avail_memory
total_cpus_avail += avail_cpus
total_cpus_alloc += alloc_cpus
elif "MIXED" in state:
total_memory_avail += avail_memory
total_memory_alloc += alloc_memory
total_cpus_avail += avail_cpus
total_cpus_alloc += alloc_cpus
elif "IDLE" in state:
total_memory_avail += avail_memory
total_cpus_avail += avail_cpus
total_cpus_alloc += alloc_cpus
return total_cpus_avail, total_cpus_alloc, total_memory_avail, total_memory_alloc
def run(sock, delay):
while True:
now = int(time.time())
tuples = ([])
cpus_avail, cpus_alloc, memory_avail, memory_alloc = get_cluster_util()
mem_util_percent = memory_alloc * 100 / memory_avail
cpu_util_percent = cpus_alloc * 100 / cpus_avail
tuples.append(('cluster.slurm_cpu_util.gauge-percent_cpu_util',
(now, cpu_util_percent)))
tuples.append(('cluster.slurm_mem_util.gauge-percent_mem_util',
(now, mem_util_percent)))
package = pickle.dumps(tuples, 1)
size = struct.pack('!L', len(package))
sock.sendall(size)
sock.sendall(package)
time.sleep(delay)
def main():
sock = socket.socket()
try:
sock.connect((CARBON_SERVER, CARBON_PICKLE_PORT))
except socket.error:
raise SystemExit("Couldn't connect to %s on port %d. Is carbon-cache \
running" % (CARBON_SERVER, CARBON_PICKLE_PORT))
try:
run(sock, DELAY)
except KeyboardInterrupt:
sys.stderr.write("\nExiting on CTRL-c\n")
sys.exit(0)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment