rmcgibbo/free-slots.py

## free-slots.py
#!/usr/bin/python
from __future__ import print_function, division
import os
import re
import sys
import itertools
from pprint import pprint
import subprocess
from distutils.spawn import find_executable
from collections import defaultdict
from collections import namedtuple

SCONTROL_BIN = find_executable('scontrol')
status = namedtuple('status', ['partition', 'np_alloc', 'np_total'])


def main():
    count = collect()
    lines = [['Number of nodes', 'Partition', 'Utilization', 'Free slots']]
    lines.append(['-' * len(e) for e in lines[0]])
    for k in sorted(count.keys(), key=lambda c: c.np_alloc / c.np_total):
        if k.np_total == k.np_alloc:
            continue
        lines.append([count[k], k.partition, '%s/%s' %
                      (k.np_alloc, k.np_total), k.np_total - k.np_alloc])

    print('Summary of SLURM nodes with free slots\n')
    print(format_table(lines))


def collect():
    count = defaultdict(lambda: 0)
    nodes = scontrol_show('node')
    n2p = nodes_to_partition()

    for node in nodes:
        try:
            partition = n2p[node['NodeHostName']]
        except KeyError as e:
            continue
        count[status(partition, int(node['CPUAlloc']), int(node['CPUTot']))] += 1

    return count


def nodes_to_partition():
    """Mapping from NodeHostName to PartitionName for each
    node"""
    partitions = scontrol_show('partition')
    n2p = defaultdict(lambda: [])
    for partition in partitions:
        these_nodes = set()
        for group in partition['Nodes'].split(','):
            these_nodes.update(expand_bracket(group))
        for node in these_nodes:
            n2p[node].append(partition['PartitionName'])
    for key, value in n2p.items():
        n2p[key] = ','.join(value)
    return dict(n2p)


def scontrol_show(entity):
    """Wrapper around the `scontrol show` SLURM utility

    Parameters
    ----------
    entity : {partition, job, node}
        The type of entity to query scontrol for

    Returns
    -------
    vals : list of dics
       Each element in the list is a dict containing the information about
       one of the requested entities on the system.
    """
    if not os.path.exists(SCONTROL_BIN):
        raise RuntimeError('This script is for SLURM systems only')

    comm = subprocess.Popen([SCONTROL_BIN, 'show', entity],
                            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = comm.communicate()
    assert stderr == ''

    lines = stdout.splitlines()

    # individual sections are marked by blank lines
    breaks = [-1] + [i for i, l in enumerate(lines) if l == '']
    # list of tuples with the start/end index of each section
    sections = [(breaks[i] + 1, breaks[i + 1]) for i in range(len(breaks) - 1)]

    values = []
    for i, j in sections:
        entry = {}
        for line in lines[i:j]:
            for items in line.split():
                try:
                    key, val = items.split('=')
                except ValueError:
                    pass
                entry[key] = val
        values.append(entry)

    return values


def expand_bracket(s):
    """Expand SLURM's bracket notation

    Example
    -------
    >>> expand_bracket("sh-1-[1-5]")
    ['sh-1-1', 'sh-1-2', 'sh-1-3', 'sh-1-4', 'sh-1-5']
    >>> expand_bracket('sh-1-[1-5,11-12]')
    ['sh-1-1', 'sh-1-2', 'sh-1-3', 'sh-1-4', 'sh-1-11', 'sh-1-12']
    """
    m = re.match('(.*)\[(\d+)\-(\d+)(?:,(\d+)\-(\d+))*\]', s)
    if not m:
        return [s]
    groups = [g for g in m.groups() if g is not None]
    prefix = groups[0]

    returnvalue = []
    for i in range(1, len(groups), 2):
        leading_zeros = groups[i][0] == '0'
        n_chars = len(groups[i])

        first = int(groups[i])
        last = int(groups[i+1])

        for j in range(first, last + 1):
            if leading_zeros:
                suffix = ('%0{n_chars}d'.format(n_chars=n_chars)) % j
            else:
                suffix = str(j)
            returnvalue.append('%s%s' % (prefix, suffix))
    return returnvalue


def format_table(rows):
    cols = zip(*rows)
    col_widths = [max(len(str(value)) + 2 for value in col) for col in cols]
    format = ' '.join(['%%-%ds' % width for width in col_widths])
    lines = []
    for row in rows:
        lines.append(format % tuple(row))
    return '\n'.join(lines)


if __name__ == '__main__':
    main()

## output-example
$ free-slots
Summary of SLURM nodes with free slots

Number of nodes   Partition   Utilization   Free slots
---------------   ---------   -----------   ----------
4                 gpu         0/16          16
2                 normal      0/16          16
2                 dev         0/16          16
1                 gpu         12/16         4
	#!/usr/bin/python
	from __future__ import print_function, division
	import os
	import re
	import sys
	import itertools
	from pprint import pprint
	import subprocess
	from distutils.spawn import find_executable
	from collections import defaultdict
	from collections import namedtuple

	SCONTROL_BIN = find_executable('scontrol')
	status = namedtuple('status', ['partition', 'np_alloc', 'np_total'])


	def main():
	count = collect()
	lines = [['Number of nodes', 'Partition', 'Utilization', 'Free slots']]
	lines.append(['-' * len(e) for e in lines[0]])
	for k in sorted(count.keys(), key=lambda c: c.np_alloc / c.np_total):
	if k.np_total == k.np_alloc:
	continue
	lines.append([count[k], k.partition, '%s/%s' %
	(k.np_alloc, k.np_total), k.np_total - k.np_alloc])

	print('Summary of SLURM nodes with free slots\n')
	print(format_table(lines))


	def collect():
	count = defaultdict(lambda: 0)
	nodes = scontrol_show('node')
	n2p = nodes_to_partition()

	for node in nodes:
	try:
	partition = n2p[node['NodeHostName']]
	except KeyError as e:
	continue
	count[status(partition, int(node['CPUAlloc']), int(node['CPUTot']))] += 1

	return count


	def nodes_to_partition():
	"""Mapping from NodeHostName to PartitionName for each
	node"""
	partitions = scontrol_show('partition')
	n2p = defaultdict(lambda: [])
	for partition in partitions:
	these_nodes = set()
	for group in partition['Nodes'].split(','):
	these_nodes.update(expand_bracket(group))
	for node in these_nodes:
	n2p[node].append(partition['PartitionName'])
	for key, value in n2p.items():
	n2p[key] = ','.join(value)
	return dict(n2p)


	def scontrol_show(entity):
	"""Wrapper around the `scontrol show` SLURM utility

	Parameters
	----------
	entity : {partition, job, node}
	The type of entity to query scontrol for

	Returns
	-------
	vals : list of dics
	Each element in the list is a dict containing the information about
	one of the requested entities on the system.
	"""
	if not os.path.exists(SCONTROL_BIN):
	raise RuntimeError('This script is for SLURM systems only')

	comm = subprocess.Popen([SCONTROL_BIN, 'show', entity],
	stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	stdout, stderr = comm.communicate()
	assert stderr == ''

	lines = stdout.splitlines()

	# individual sections are marked by blank lines
	breaks = [-1] + [i for i, l in enumerate(lines) if l == '']
	# list of tuples with the start/end index of each section
	sections = [(breaks[i] + 1, breaks[i + 1]) for i in range(len(breaks) - 1)]

	values = []
	for i, j in sections:
	entry = {}
	for line in lines[i:j]:
	for items in line.split():
	try:
	key, val = items.split('=')
	except ValueError:
	pass
	entry[key] = val
	values.append(entry)

	return values


	def expand_bracket(s):
	"""Expand SLURM's bracket notation

	Example
	-------
	>>> expand_bracket("sh-1-[1-5]")
	['sh-1-1', 'sh-1-2', 'sh-1-3', 'sh-1-4', 'sh-1-5']
	>>> expand_bracket('sh-1-[1-5,11-12]')
	['sh-1-1', 'sh-1-2', 'sh-1-3', 'sh-1-4', 'sh-1-11', 'sh-1-12']
	"""
	m = re.match('(.)\[(\d+)\-(\d+)(?:,(\d+)\-(\d+))\]', s)
	if not m:
	return [s]
	groups = [g for g in m.groups() if g is not None]
	prefix = groups[0]

	returnvalue = []
	for i in range(1, len(groups), 2):
	leading_zeros = groups[i][0] == '0'
	n_chars = len(groups[i])

	first = int(groups[i])
	last = int(groups[i+1])

	for j in range(first, last + 1):
	if leading_zeros:
	suffix = ('%0{n_chars}d'.format(n_chars=n_chars)) % j
	else:
	suffix = str(j)
	returnvalue.append('%s%s' % (prefix, suffix))
	return returnvalue


	def format_table(rows):
	cols = zip(*rows)
	col_widths = [max(len(str(value)) + 2 for value in col) for col in cols]
	format = ' '.join(['%%-%ds' % width for width in col_widths])
	lines = []
	for row in rows:
	lines.append(format % tuple(row))
	return '\n'.join(lines)


	if __name__ == '__main__':
	main()
	$ free-slots
	Summary of SLURM nodes with free slots

	Number of nodes Partition Utilization Free slots
	--------------- --------- ----------- ----------
	4 gpu 0/16 16
	2 normal 0/16 16
	2 dev 0/16 16
	1 gpu 12/16 4