-
-
Save eschnett/7aab9bd9f00cddd89b2bfac87ff1a1d2 to your computer and use it in GitHub Desktop.
Running CarpetX on Crusher with HIP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Option list for the Einstein Toolkit | |
# The "weird" options here should probably be made the default in the | |
# ET instead of being set here. | |
# Whenever this version string changes, the application is configured | |
# and rebuilt from scratch | |
VERSION = crusher-hip-2023-05-15 | |
EXTERNAL_CC = cc | |
EXTERNAL_CXX = CC | |
EXTERNAL_F90 = ftn | |
EXTERNAL_LD = CC | |
EXTERNAL_CFLAGS = -g -O3 | |
EXTERNAL_CXXFLAGS = -g -O3 | |
EXTERNAL_F90FLAGS = -g -O3 | |
EXTERNAL_LDFLAGS = -g -O3 | |
CPP = cpp | |
CC = cc | |
CXX = hipcc --amdgpu-target=gfx90a | |
FPP = cpp | |
F90 = ftn | |
# LD = CC | |
LD = hipcc --amdgpu-target=gfx90a | |
CPPFLAGS = -DSIMD_CPU | |
CFLAGS = -g -std=gnu11 | |
CXXFLAGS = -g -std=c++17 -D__cpp_lib_filesystem=1L | |
FPPFLAGS = -traditional | |
F90FLAGS = -g | |
LDFLAGS = -fgpu-rdc --hip-link | |
LIBDIRS = /opt/cray/pe/cce/15.0.0/cce/x86_64/lib | |
LIBS = stdc++fs f u | |
C_LINE_DIRECTIVES = yes | |
F_LINE_DIRECTIVES = yes | |
DISABLE_REAL16 = yes | |
DEBUG = no | |
CPP_DEBUG_FLAGS = -DCARPET_DEBUG | |
C_DEBUG_FLAGS = | |
CXX_DEBUG_FLAGS = | |
FPP_DEBUG_FLAGS = -DCARPET_DEBUG | |
F90_DEBUG_FLAGS = | |
OPTIMISE = yes | |
# -ffp-contract=fast -fno-math-errno -fno-rounding-math -funsafe-math-optimizations | |
C_OPTIMISE_FLAGS = -O3 | |
CXX_OPTIMISE_FLAGS = -O3 | |
F90_OPTIMISE_FLAGS = -O3 | |
OPENMP = no | |
WARN = yes | |
CPP_WARN_FLAGS = -Wall | |
C_WARN_FLAGS = | |
CXX_WARN_FLAGS = | |
FPP_WARN_FLAGS = -Wall | |
F90_WARN_FLAGS = | |
VECTORISE = no | |
# Cray-compatible libraries: | |
ADIOS2_DIR = /sw/crusher/spack-envs/base/opt/cray-sles15-zen3/cce-15.0.0/adios2-2.8.3-hz47nph7zsdem2px2w353r6z7gyhoft4 | |
AMREX_DIR = /sw/crusher/spack-envs/base/opt/cray-sles15-zen3/cce-15.0.0/amrex-22.11-ceb5rs43slzmbjmn6o2lv3xbfsvcfot4 | |
AMREX_INC_DIRS = /sw/crusher/spack-envs/base/opt/cray-sles15-zen3/cce-15.0.0/amrex-22.11-ceb5rs43slzmbjmn6o2lv3xbfsvcfot4/include | |
AMREX_LIB_DIRS = | |
AMREX_LIBS = -file=/sw/crusher/spack-envs/base/opt/cray-sles15-zen3/cce-15.0.0/amrex-22.11-ceb5rs43slzmbjmn6o2lv3xbfsvcfot4/lib/libamrex.a | |
BOOST_DIR = /sw/crusher/spack-envs/base/opt/cray-sles15-zen3/cce-15.0.0/boost-1.79.0-sauuo5yew5byx2bmtf2g4i3wn6un6ipu | |
FFTW3_DIR = /opt/cray/pe/fftw/3.3.10.3/x86_trento | |
GSL_DIR = /sw/crusher/spack-envs/base/opt/linux-sles15-x86_64/gcc-7.5.0/gsl-2.7.1-dundwxsuuvyeyq5jqrkchdxqrr6mccyo | |
HDF5_DIR = /opt/cray/pe/hdf5-parallel/1.12.2.1/crayclang/14.0 | |
HWLOC_DIR = /sw/crusher/spack-envs/base/opt/linux-sles15-x86_64/gcc-7.5.0/hwloc-2.5.0-4p6jkgf5ez6wr27pytkzyptppzpugu3e | |
LIBJPEG_DIR = /sw/crusher/spack-envs/base/opt/linux-sles15-x86_64/gcc-7.5.0/libjpeg-turbo-2.1.0-med5q3cu3wumshqtclzz276yxtqidgvt | |
LORENE_DIR = BUILD | |
MPI_DIR = /opt/cray/pe/mpich/8.1.23/ofi/crayclang/10.0 | |
MPI_LIB_DIRS = /opt/cray/pe/mpich/8.1.23/ofi/crayclang/10.0/lib /opt/cray/pe/mpich/8.1.23/gtl/lib | |
MPI_LIBS = mpi mpi_gtl_hsa | |
NSIMD_DIR = BUILD | |
NSIMD_SIMD = AVX2 | |
OPENBLAS_DIR = /sw/crusher/spack-envs/base/opt/linux-sles15-x86_64/gcc-7.5.0/openblas-0.3.17-54x7v5e4i6yxqs6j5nebrbztpy4lftj4 | |
OPENPMD_DIR = /sw/crusher/spack-envs/base/opt/cray-sles15-zen3/cce-15.0.0/openpmd-api-0.14.4-6e2k3qe63hutydezgbiydg5ev3ft2ri6 | |
PTHREADS_DIR = NO_BUILD | |
SILO_DIR = BUILD | |
SSHT_DIR = BUILD | |
YAML_CPP_DIR = BUILD | |
ZLIB_DIR = /sw/crusher/spack-envs/base/opt/linux-sles15-x86_64/gcc-7.5.0/zlib-1.2.11-zuyclcfig4tizmb2bm2h4roqsp3rwn2y |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[crusher-hip] | |
# last-tested-on: ??? | |
# last-tested-by: ??? | |
# Machine description | |
nickname = crusher-hip | |
name = Crusher | |
location = Oak Ridge National Laboratory | |
description = HPC system at ORNL | |
status = experimental | |
# Access to this machine | |
hostname = crusher.olcf.ornl.gov | |
# ORNL does not offer a passwordless login and disables SSH multiplexing. | |
# However by setting a long ControlPersist value one can re-use the single | |
# connection they do allow to avoid having to enter the password multiple times. | |
sshopts = -oControlMaster=auto -oControlPath=~/.ssh/simfactory-summit-%C -oControlPersist=3600 | |
aliaspattern = ^login\d.crusher.olcf.ornl.gov$ | |
envsetup = <<EOT | |
module load craype-accel-amd-gfx90a | |
module load rocm/5.3.0 | |
export MPICH_GPU_SUPPORT_ENABLED=1 | |
export PE_MPICH_GTL_DIR_amd_gfx90a="-L/opt/cray/pe/mpich/8.1.23/gtl/lib" | |
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa" | |
module load adios2/2.8.3 | |
module load amrex/22.11 | |
module load boost/1.79.0-cxx17 | |
module load cray-fftw/3.3.10.3 | |
module load cray-hdf5-parallel/1.12.2.1 | |
module load gsl/2.7.1 | |
module load hwloc/2.5.0 | |
module load libjpeg-turbo/2.1.0 | |
module load openblas/0.3.17 | |
module load openpmd-api/0.14.4 | |
module load zlib/1.2.11 | |
EOT | |
# Source tree management | |
sourcebasedir = /ccs/home/@USER@/crusher | |
optionlist = crusher-hip.cfg | |
submitscript = crusher-hip.sub | |
runscript = crusher-hip.run | |
make = make -j @MAKEJOBS@ | |
makejobs = 8 | |
disabled-thorns = <<EOT | |
AsterX/Con2PrimFactory | |
AsterX/AsterSeeds | |
AsterX/AsterX | |
CarpetXold/HydroInitial | |
CapyrX/MultiPatch | |
CapyrX/TestMultiPatch | |
# LORENE builds slowly, add this later | |
ExternalLibraries/LORENE | |
EinsteinInitialData/Meudon_Bin_BH | |
EinsteinInitialData/Meudon_Bin_NS | |
EinsteinInitialData/Meudon_Mag_NS | |
ExternalLibraries/NSIMD | |
# Don't know where this is installed on Crusher | |
ExternalLibraries/PETSc | |
CarpetX/PDESolvers | |
CarpetX/Poisson2 | |
# Cray pointers might be broken | |
CactusTest/TestFortranCrayPointers | |
# Function is too large | |
CarpetXNRPy/Z4cNRPy | |
EOT | |
enabled-thorns = <<EOT | |
EOT | |
# Simulation management | |
basedir = /lustre/orion/@ALLOCATION@/scratch/@USER@/simulations | |
quota = 10 # don't use all disk space | |
cpu = AMD EPYC 7A53 64-Core Processor | |
cpufreq = 2.75 | |
flop/cycle = 16 | |
ppn = 64 | |
spn = 8 | |
mpn = 8 | |
max-num-threads = 128 | |
num-threads = 8 | |
memory = 524288 | |
max-num-smt = 2 | |
num-smt = 1 | |
nodes = 160 | |
min-ppn = 64 | |
allocation = NO_ALLOCATION | |
queue = batch | |
maxwalltime = 8:00:00 # also depends on number of nodes | |
submit = sbatch @SCRIPTFILE@ && sleep 1 | |
getstatus = squeue -j @JOB_ID@ | |
stop = scancel @JOB_ID@ | |
submitpattern = Submitted batch job ([0-9]+) | |
statuspattern = '@JOB_ID@ ' | |
queuedpattern = ' PD ' | |
runningpattern = ' (CF|CG|R|TO) ' | |
holdingpattern = ' S ' | |
exechost = localhost | |
# TODO: make this work using bpeek | |
stdout = cat @SIMULATION_NAME@.out | |
stderr = cat @SIMULATION_NAME@.err | |
stdout-follow = tail -n 100 -f @SIMULATION_NAME@.out @SIMULATION_NAME@.err |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
echo "Preparing:" | |
set -euxo pipefail | |
cd @RUNDIR@-active | |
echo 'Job setup:' | |
echo ' Allocated:' | |
echo ' Nodes: @NODES@' | |
echo ' Cores per node: @PPN@' | |
echo ' Running:' | |
echo ' MPI processes: @NUM_PROCS@' | |
echo ' OpenMP threads per process: @NUM_THREADS@' | |
echo ' MPI processes per node: @NODE_PROCS@' | |
echo ' OpenMP threads per core: @NUM_SMT@' | |
echo ' OpenMP threads per node: @(@PPN_USED@ * @NUM_SMT@)@' | |
echo "Checking:" | |
date | |
env | |
hostname | |
pwd | |
module list | |
scontrol show hostnames | |
hostfile="SIMFACTORY/NODES" | |
scontrol show hostnames >"${hostfile}" | |
/sbin/ifconfig || true | |
# VIEW="@SOURCEDIR@/view" | |
echo "Environment:" | |
export 'SIMULATION_ID=@SIMULATION_ID@' | |
export 'CACTUS_MAX_MEMORY=@(@MEMORY@ * 1024)@' # Byte | |
export 'CACTUS_NUM_PROCS=@NUM_PROCS@' | |
export 'CACTUS_NUM_THREADS=@NUM_THREADS@' | |
export 'CACTUS_SET_THREAD_BINDINGS=1' | |
export 'GLIBCXX_FORCE_NEW=1' | |
export 'GMON_OUT_PREFIX=gmon.out' | |
export 'OMP_DISPLAY_ENV=FALSE' # false, true | |
export 'OMP_NUM_THREADS=@NUM_THREADS@' | |
export 'OMP_PLACES=cores' # threads, cores, sockets | |
export 'OMP_PROC_BIND=FALSE' # false, true, master, close, spread | |
export 'OMP_STACKSIZE=8192' # kByte | |
env | sort >'SIMFACTORY/ENVIRONMENT' | |
export MPICH_GPU_SUPPORT_ENABLED=1 | |
echo "Starting:" | |
date | |
export CACTUS_STARTTIME=$(date +%s) | |
# RePrimAnd uses `libboost_atomic` but its build script does not set the respective run path | |
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/ccs/home/schnett/crusher/boost-amd-1.82.0/lib" | |
time \ | |
srun \ | |
--ntasks=@NUM_PROCS@ \ | |
--ntasks-per-node=@NODE_PROCS@ \ | |
--gpus=@NUM_PROCS@ \ | |
--gpus-per-node=@NODE_PROCS@ \ | |
--gpu-bind=closest \ | |
"@EXECUTABLE@" \ | |
-L 3 \ | |
"@PARFILE@" \ | |
>stdout.txt \ | |
2>stderr.txt | |
echo "Stopping:" | |
date | |
echo "Done." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
#SBATCH --account=@ALLOCATION@ | |
#SBATCH --job-name=@SHORT_SIMULATION_NAME@ | |
#SBATCH --mail-type=ALL | |
#SBATCH --mail-user=@EMAIL@ | |
#SBATCH --output=@RUNDIR@/stdlog.txt | |
#SBATCH --time=@WALLTIME@ | |
#SBATCH --partition=@QUEUE@ | |
#SBATCH --nodes=@NODES@ | |
#SBATCH --gpus-per-node=@NODE_PROCS@ | |
#SBATCH --gpu-bind=closest | |
# Jobs with this option will not start | |
##SBATCH --tasks-per-node=@PPN_USED@ | |
#SBATCH --cpus-per-task=1 | |
#SBATCH @("@CHAINED_JOB_ID@" != "" ? "--dependency=afterany:@CHAINED_JOB_ID@" : "")@ | |
#SBATCH --export=ALL | |
cd @SOURCEDIR@ | |
@SIMFACTORY@ run @SIMULATION_NAME@ --basedir=@BASEDIR@ --machine=@MACHINE@ --restart-id=@RESTART_ID@ @FROM_RESTART_COMMAND@ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment