Skip to content

Instantly share code, notes, and snippets.

@mgxd
Created July 31, 2017 20:56
Show Gist options
  • Save mgxd/6d3f5fe2da663b9126ef75a84a62aa1e to your computer and use it in GitHub Desktop.
Save mgxd/6d3f5fe2da663b9126ef75a84a62aa1e to your computer and use it in GitHub Desktop.
# A little helper to show /om usage information for user + group,
# as well as some SLURM usage
# For easiest use - add function to .bashrc that sources this with a simple input
import subprocess
from getpass import getuser
from collections import Counter
import re
# Make sure OM/SLURM is working
MAX_TRIES = 10
LAB = 'gablab'
while MAX_TRIES > 0:
try:
sq = subprocess.check_output(['squeue','-o',
'%u,%t,%m,%c,%q,%g']).decode("utf-8") # convert to unicode
lq = subprocess.check_output(['lfs', 'quota', '-g', '{}'.format(LAB), '/om']).decode()
mq = subprocess.check_output(['lfs', 'quota', '-u', '{}'.format(getuser()), '/om']).decode()
break
except:
MAX_TRIES -= 1
if MAX_TRIES == 0:
raise Exception('Connection to SLURM failed.')
queue = [x.split(',') for x in sq.splitlines()[1:]]
# queue format :
#
# [0] [1] [2] [3] [4]
# USER | STATUS | MIN_MEMORY | MIN_CORES | QUALITY OF SERVICE (QOS)
running = [x for x in queue if x[1] == 'R']
pending = [x for x in queue if x[1] == 'PD']
myjobs = [x for x in queue if x[0] == getuser()]
labq = [x for x in queue if x[5] == LAB]
labqos = [x for x in queue if x[4] == LAB]
# to maybe add later
# most = Counter([x[0] for x in queue]).most_common(1)[0] # who is being a hog?
# usage by lab
#labuse = Counter(x[5] for x in queue]).most_common(4)
# check past jobs to see if completed (from start of day) - can expand to 24 hours with python time + --starttime
# sacct --format=JOBID,JOBNAME,State
labquota = [y for y in
[re.split('\s+', x) for x in lq.splitlines()[2:]][0]
if y != '' and y != '-']
myquota = [z for z in
[re.split('\s+', l) for l in mq.splitlines()[2:]][0]
if z != '' and z != '-']
# filesystem | kbytes used | kbytes quota | same (limit) | number of files | file limit | grace
def to_int(_str):
return ''.join([x for x in _str if x.isnumeric()])
if labquota[2].endswith('*'):
labquota[2] = to_int(labquota[2])
if labquota[4].endswith('*'):
labquota[4] = to_int(labquota[4])
lab_gb = (int(labquota[2]) - int(labquota[1])) / 1000000
lab_fls = int(labquota[5]) - int(labquota[4])
my_gb = int(myquota[1]) / 1000000
my_fls = int(myquota[4])
# Reporting
print("Hi {}, here is your OM overview".format(getuser()))
print("-----------------------------------")
print("SLURM")
print('\tThere are {} total jobs on the queue: {} running and {} pending.'.format(len(queue),
len(running),
len(pending)))
if len(myjobs) == 0:
print("\tYou don't have any current jobs.")
else:
myjobs_r = [x for x in myjobs if x[1] == 'R']
myjobs_p = [x for x in myjobs if x[1] == 'PD']
print('\tOf your personal {} job(s): {} running and {} pending.'.format(len(myjobs),
len(myjobs_r),
len(myjobs_p)))
if len(labqos) == 0:
print('\tThere are no jobs using {} QOS.\n'.format(LAB))
else:
print('\tThere are {} job(s) using {} QOS.\n'.format(len(labqos),
LAB))
print("Personal usage")
print('\tYou are using up {:,} GB and {:,} inodes on /om.\n'.format(my_gb, my_fls))
print("Group quotas")
if lab_gb > 0:
print('\t{} has {:,} GB space remaining.'.format(LAB.capitalize(), lab_gb))
else:
print('\tWARNING: {} is past capacity by {:,} GB!'.format(LAB.capitalize(), lab_gb * -1))
if lab_fls > 0:
print('\t{} can support {:,} more files.'.format(LAB.capitalize(), lab_fls))
else:
print('\tWARNING: {} has passed capacity by {:,} files!'.format(LAB.capitalize(), lab_fls * -1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment