Last active
August 22, 2017 06:47
-
-
Save ScottWales/336e1e1994f314e826cc49ad974b17d0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import division | |
import os | |
import math | |
def write_model_ranks(rankfile, nranks, start_rank, start_host, slots_per_host, hostnames, omp=1): | |
""" | |
Write out the rankfile for a single model | |
Output looks like | |
rank 0=+n0 slot=0:0-1 | |
and defines what host, socket and core each MPI rank is assigned | |
See https://www.open-mpi.org/faq/?category=tuning#using-paffinity-v1.3 | |
""" | |
sockets_per_host = 2 | |
slots_per_socket = slots_per_host / sockets_per_host | |
for rank in range(nranks): | |
host = math.floor(rank * omp / slots_per_host) | |
socket = math.floor(rank * omp / slots_per_socket) % sockets_per_host | |
core = (rank * omp) % slots_per_socket | |
if core + omp - 1 >= slots_per_socket: | |
# Split across two sockets on the same host | |
rankfile.write("rank %d=%s slot=%d:%d-%d,%d:%d-%d\n"%( | |
rank + start_rank, | |
hostnames[int(host + start_host)], | |
socket, | |
core, | |
slots_per_socket-1, | |
socket+1, | |
0, | |
(core + omp - 1)%slots_per_socket)) | |
else: | |
rankfile.write("rank %d=%s slot=%d:%d-%d\n"%( | |
rank + start_rank, | |
hostnames[int(host + start_host)], | |
socket, | |
core, | |
core+omp-1)) | |
def coupled_rankfile(rankfile, um_decomp, mom_decomp, ice_decomp, hostnames): | |
# Total number of MPI ranks for each process | |
um_nrank = um_decomp['x']*um_decomp['y']+um_decomp['ios'] | |
mom_nrank = mom_decomp['x']*mom_decomp['y'] | |
cice_nrank = ice_decomp['n'] | |
# Assume that the number of threads evenly divides into a single host | |
assert um_decomp['slots'] % um_decomp['omp'] == 0 | |
# Assume each model gets dedicated nodes | |
assert (math.ceil(um_nrank*um_decomp['omp']/ um_decomp['slots']) + | |
math.ceil(mom_nrank / mom_decomp['slots']) + | |
math.ceil(mom_nrank / ice_decomp['slots'])) <= int(os.environ['PBS_NCPUS']) | |
# UM processes can get multiple cores, based on OMP_NUM_THREADS | |
write_model_ranks(rankfile, | |
nranks = um_nrank, | |
start_rank = 0, | |
start_host = 0, | |
slots_per_host = um_decomp['slots'], | |
omp = um_decomp['omp'], | |
hostnames = hostnames) | |
# Start on a clean node | |
mom_host_start = math.ceil(um_nrank*um_decomp['omp']/ um_decomp['slots']) | |
write_model_ranks(rankfile, | |
nranks = mom_nrank, | |
start_rank = um_nrank, | |
start_host = mom_host_start, | |
slots_per_host = mom_decomp['slots'], | |
hostnames = hostnames) | |
cice_host_start = mom_host_start + math.ceil(mom_nrank / mom_decomp['slots']) | |
write_model_ranks(rankfile, | |
nranks = cice_nrank, | |
start_rank = um_nrank+mom_nrank, | |
start_host = cice_host_start, | |
slots_per_host = ice_decomp['slots'], | |
hostnames = hostnames) | |
def main(): | |
# Some information about processor layout | |
slots_per_host = int(os.environ['NSLOTS']) | |
# Get the decomposition from the environment variables defined in suite.rc | |
um_decomp = { | |
'x': int(os.environ['UM_ATM_NPROCX']), | |
'y': int(os.environ['UM_ATM_NPROCY']), | |
'ios': int(os.environ['FLUME_IOS_NPROC']), | |
'omp': int(os.environ['OMP_NUM_THREADS']), | |
'slots': int(os.environ.get('UM_NSLOTS', slots_per_host))} | |
print(um_decomp) | |
mom_decomp = { | |
'x': int(os.environ['OCN_NPROCX']), | |
'y': int(os.environ['OCN_NPROCY']), | |
'slots': slots_per_host} | |
print(mom_decomp) | |
ice_decomp = { | |
'n': int(os.environ['ICE_NPROCS']), | |
'slots': slots_per_host} | |
print(ice_decomp) | |
# Get a list of unique hosts | |
with open(os.environ['PBS_NODEFILE']) as nodefile: | |
pbs_hosts = [x.strip() for x in sorted(set(nodefile.readlines()))] | |
print(pbs_hosts) | |
with open(os.path.join(os.environ['CYLC_TASK_WORK_DIR'],'rankfile'),'w') as rankfile: | |
coupled_rankfile(rankfile, um_decomp, mom_decomp, ice_decomp, pbs_hosts) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from create_rankfile import * | |
from six import StringIO | |
import os | |
hosts = ['alpha', 'beta', 'gamma'] | |
def test_single(): | |
with StringIO() as f: | |
write_model_ranks(f, | |
nranks = 1, | |
start_rank = 0, | |
start_host = 0, | |
slots_per_host = 1, | |
omp = 1, | |
hostnames = hosts) | |
assert f.getvalue() == "rank 0=alpha slot=0:0-0\n" | |
def test_single_omp(): | |
with StringIO() as f: | |
write_model_ranks(f, | |
nranks = 1, | |
start_rank = 0, | |
start_host = 0, | |
slots_per_host = 4, | |
omp = 2, | |
hostnames = hosts) | |
assert f.getvalue() == "rank 0=alpha slot=0:0-1\n" | |
def test_multi(): | |
with StringIO() as f: | |
write_model_ranks(f, | |
nranks = 8, | |
start_rank = 0, | |
start_host = 0, | |
slots_per_host = 4, | |
omp = 1, | |
hostnames = hosts) | |
assert f.getvalue() == """rank 0=alpha slot=0:0-0 | |
rank 1=alpha slot=0:1-1 | |
rank 2=alpha slot=1:0-0 | |
rank 3=alpha slot=1:1-1 | |
rank 4=beta slot=0:0-0 | |
rank 5=beta slot=0:1-1 | |
rank 6=beta slot=1:0-0 | |
rank 7=beta slot=1:1-1 | |
""" | |
def test_multi_omp(): | |
with StringIO() as f: | |
write_model_ranks(f, | |
nranks = 4, | |
start_rank = 0, | |
start_host = 0, | |
slots_per_host = 4, | |
omp = 2, | |
hostnames = hosts) | |
assert f.getvalue() == """rank 0=alpha slot=0:0-1 | |
rank 1=alpha slot=1:0-1 | |
rank 2=beta slot=0:0-1 | |
rank 3=beta slot=1:0-1 | |
""" | |
def test_cross_slot(): | |
with StringIO() as f: | |
write_model_ranks(f, | |
nranks = 3, | |
start_rank = 0, | |
start_host = 0, | |
slots_per_host = 6, | |
omp = 2, | |
hostnames = hosts) | |
assert f.getvalue() == """rank 0=alpha slot=0:0-1 | |
rank 1=alpha slot=0:2-2,1:0-0 | |
rank 2=alpha slot=1:1-2 | |
""" | |
def test_coupled(): | |
with StringIO() as f: | |
os.environ['PBS_NCPUS']='30' | |
coupled_rankfile(f, | |
um_decomp = { | |
'x': 2, | |
'y': 2, | |
'ios': 0, | |
'omp': 2, | |
'slots': 8, | |
}, | |
mom_decomp = { | |
'x': 2, | |
'y': 3, | |
'slots': 10, | |
}, | |
ice_decomp = { | |
'n': 4, | |
'slots': 10, | |
}, | |
hostnames = hosts, | |
) | |
assert f.getvalue() == """rank 0=alpha slot=0:0-1 | |
rank 1=alpha slot=0:2-3 | |
rank 2=alpha slot=1:0-1 | |
rank 3=alpha slot=1:2-3 | |
rank 4=beta slot=0:0-0 | |
rank 5=beta slot=0:1-1 | |
rank 6=beta slot=0:2-2 | |
rank 7=beta slot=0:3-3 | |
rank 8=beta slot=0:4-4 | |
rank 9=beta slot=1:0-0 | |
rank 10=gamma slot=0:0-0 | |
rank 11=gamma slot=0:1-1 | |
rank 12=gamma slot=0:2-2 | |
rank 13=gamma slot=0:3-3 | |
""" | |
def node_gen(n): | |
r = 0 | |
while r < n: | |
yield 'r%04d'%r | |
r += 1 | |
def test_coupled(): | |
with StringIO() as f: | |
os.environ['PBS_NCPUS']='1484' | |
hosts = [ | |
'r3859', | |
'r3913', | |
'r3914', | |
'r3915', | |
'r3916', | |
'r3917', | |
'r3918', | |
'r3919', | |
'r3920', | |
'r3921', | |
'r3922', | |
'r3927', | |
'r3928', | |
'r3929', | |
'r3930', | |
'r3931', | |
'r3932', | |
'r3933', | |
'r3934', | |
'r3935', | |
'r4061', | |
'r4064', | |
'r4065', | |
'r4066', | |
'r4067', | |
'r4068', | |
'r4069', | |
'r4070', | |
'r4073', | |
'r4076', | |
'r4082', | |
'r4083', | |
'r4084', | |
'r4081', | |
'r4085', | |
'r4086', | |
'r4087', | |
'r4090', | |
'r4091', | |
'r4092', | |
'r4093', | |
'r4094', | |
'r4095', | |
'r4166', | |
'r4247', | |
'r4248', | |
'r4250', | |
'r4257', | |
'r4258', | |
'r4435', | |
'r4440', | |
'r4472', | |
'r4485', | |
] | |
coupled_rankfile(f, | |
um_decomp = { | |
'x': 28, | |
'y': 24, | |
'ios': 0, | |
'omp': 2, | |
'slots': 28, | |
}, | |
mom_decomp = { | |
'x': 8, | |
'y': 12, | |
'slots': 28, | |
}, | |
ice_decomp = { | |
'n': 16, | |
'slots': 28, | |
}, | |
hostnames = hosts, | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment