Skip to content

Instantly share code, notes, and snippets.

@eschnett
Created June 15, 2023 00:44
Show Gist options
  • Save eschnett/7aab9bd9f00cddd89b2bfac87ff1a1d2 to your computer and use it in GitHub Desktop.
Save eschnett/7aab9bd9f00cddd89b2bfac87ff1a1d2 to your computer and use it in GitHub Desktop.
Running CarpetX on Crusher with HIP
# Option list for the Einstein Toolkit
# The "weird" options here should probably be made the default in the
# ET instead of being set here.
# Whenever this version string changes, the application is configured
# and rebuilt from scratch
VERSION = crusher-hip-2023-05-15
EXTERNAL_CC = cc
EXTERNAL_CXX = CC
EXTERNAL_F90 = ftn
EXTERNAL_LD = CC
EXTERNAL_CFLAGS = -g -O3
EXTERNAL_CXXFLAGS = -g -O3
EXTERNAL_F90FLAGS = -g -O3
EXTERNAL_LDFLAGS = -g -O3
CPP = cpp
CC = cc
CXX = hipcc --amdgpu-target=gfx90a
FPP = cpp
F90 = ftn
# LD = CC
LD = hipcc --amdgpu-target=gfx90a
CPPFLAGS = -DSIMD_CPU
CFLAGS = -g -std=gnu11
CXXFLAGS = -g -std=c++17 -D__cpp_lib_filesystem=1L
FPPFLAGS = -traditional
F90FLAGS = -g
LDFLAGS = -fgpu-rdc --hip-link
LIBDIRS = /opt/cray/pe/cce/15.0.0/cce/x86_64/lib
LIBS = stdc++fs f u
C_LINE_DIRECTIVES = yes
F_LINE_DIRECTIVES = yes
DISABLE_REAL16 = yes
DEBUG = no
CPP_DEBUG_FLAGS = -DCARPET_DEBUG
C_DEBUG_FLAGS =
CXX_DEBUG_FLAGS =
FPP_DEBUG_FLAGS = -DCARPET_DEBUG
F90_DEBUG_FLAGS =
OPTIMISE = yes
# -ffp-contract=fast -fno-math-errno -fno-rounding-math -funsafe-math-optimizations
C_OPTIMISE_FLAGS = -O3
CXX_OPTIMISE_FLAGS = -O3
F90_OPTIMISE_FLAGS = -O3
OPENMP = no
WARN = yes
CPP_WARN_FLAGS = -Wall
C_WARN_FLAGS =
CXX_WARN_FLAGS =
FPP_WARN_FLAGS = -Wall
F90_WARN_FLAGS =
VECTORISE = no
# Cray-compatible libraries:
ADIOS2_DIR = /sw/crusher/spack-envs/base/opt/cray-sles15-zen3/cce-15.0.0/adios2-2.8.3-hz47nph7zsdem2px2w353r6z7gyhoft4
AMREX_DIR = /sw/crusher/spack-envs/base/opt/cray-sles15-zen3/cce-15.0.0/amrex-22.11-ceb5rs43slzmbjmn6o2lv3xbfsvcfot4
AMREX_INC_DIRS = /sw/crusher/spack-envs/base/opt/cray-sles15-zen3/cce-15.0.0/amrex-22.11-ceb5rs43slzmbjmn6o2lv3xbfsvcfot4/include
AMREX_LIB_DIRS =
AMREX_LIBS = -file=/sw/crusher/spack-envs/base/opt/cray-sles15-zen3/cce-15.0.0/amrex-22.11-ceb5rs43slzmbjmn6o2lv3xbfsvcfot4/lib/libamrex.a
BOOST_DIR = /sw/crusher/spack-envs/base/opt/cray-sles15-zen3/cce-15.0.0/boost-1.79.0-sauuo5yew5byx2bmtf2g4i3wn6un6ipu
FFTW3_DIR = /opt/cray/pe/fftw/3.3.10.3/x86_trento
GSL_DIR = /sw/crusher/spack-envs/base/opt/linux-sles15-x86_64/gcc-7.5.0/gsl-2.7.1-dundwxsuuvyeyq5jqrkchdxqrr6mccyo
HDF5_DIR = /opt/cray/pe/hdf5-parallel/1.12.2.1/crayclang/14.0
HWLOC_DIR = /sw/crusher/spack-envs/base/opt/linux-sles15-x86_64/gcc-7.5.0/hwloc-2.5.0-4p6jkgf5ez6wr27pytkzyptppzpugu3e
LIBJPEG_DIR = /sw/crusher/spack-envs/base/opt/linux-sles15-x86_64/gcc-7.5.0/libjpeg-turbo-2.1.0-med5q3cu3wumshqtclzz276yxtqidgvt
LORENE_DIR = BUILD
MPI_DIR = /opt/cray/pe/mpich/8.1.23/ofi/crayclang/10.0
MPI_LIB_DIRS = /opt/cray/pe/mpich/8.1.23/ofi/crayclang/10.0/lib /opt/cray/pe/mpich/8.1.23/gtl/lib
MPI_LIBS = mpi mpi_gtl_hsa
NSIMD_DIR = BUILD
NSIMD_SIMD = AVX2
OPENBLAS_DIR = /sw/crusher/spack-envs/base/opt/linux-sles15-x86_64/gcc-7.5.0/openblas-0.3.17-54x7v5e4i6yxqs6j5nebrbztpy4lftj4
OPENPMD_DIR = /sw/crusher/spack-envs/base/opt/cray-sles15-zen3/cce-15.0.0/openpmd-api-0.14.4-6e2k3qe63hutydezgbiydg5ev3ft2ri6
PTHREADS_DIR = NO_BUILD
SILO_DIR = BUILD
SSHT_DIR = BUILD
YAML_CPP_DIR = BUILD
ZLIB_DIR = /sw/crusher/spack-envs/base/opt/linux-sles15-x86_64/gcc-7.5.0/zlib-1.2.11-zuyclcfig4tizmb2bm2h4roqsp3rwn2y
[crusher-hip]
# last-tested-on: ???
# last-tested-by: ???
# Machine description
nickname = crusher-hip
name = Crusher
location = Oak Ridge National Laboratory
description = HPC system at ORNL
status = experimental
# Access to this machine
hostname = crusher.olcf.ornl.gov
# ORNL does not offer a passwordless login and disables SSH multiplexing.
# However by setting a long ControlPersist value one can re-use the single
# connection they do allow to avoid having to enter the password multiple times.
sshopts = -oControlMaster=auto -oControlPath=~/.ssh/simfactory-summit-%C -oControlPersist=3600
aliaspattern = ^login\d.crusher.olcf.ornl.gov$
envsetup = <<EOT
module load craype-accel-amd-gfx90a
module load rocm/5.3.0
export MPICH_GPU_SUPPORT_ENABLED=1
export PE_MPICH_GTL_DIR_amd_gfx90a="-L/opt/cray/pe/mpich/8.1.23/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
module load adios2/2.8.3
module load amrex/22.11
module load boost/1.79.0-cxx17
module load cray-fftw/3.3.10.3
module load cray-hdf5-parallel/1.12.2.1
module load gsl/2.7.1
module load hwloc/2.5.0
module load libjpeg-turbo/2.1.0
module load openblas/0.3.17
module load openpmd-api/0.14.4
module load zlib/1.2.11
EOT
# Source tree management
sourcebasedir = /ccs/home/@USER@/crusher
optionlist = crusher-hip.cfg
submitscript = crusher-hip.sub
runscript = crusher-hip.run
make = make -j @MAKEJOBS@
makejobs = 8
disabled-thorns = <<EOT
AsterX/Con2PrimFactory
AsterX/AsterSeeds
AsterX/AsterX
CarpetXold/HydroInitial
CapyrX/MultiPatch
CapyrX/TestMultiPatch
# LORENE builds slowly, add this later
ExternalLibraries/LORENE
EinsteinInitialData/Meudon_Bin_BH
EinsteinInitialData/Meudon_Bin_NS
EinsteinInitialData/Meudon_Mag_NS
ExternalLibraries/NSIMD
# Don't know where this is installed on Crusher
ExternalLibraries/PETSc
CarpetX/PDESolvers
CarpetX/Poisson2
# Cray pointers might be broken
CactusTest/TestFortranCrayPointers
# Function is too large
CarpetXNRPy/Z4cNRPy
EOT
enabled-thorns = <<EOT
EOT
# Simulation management
basedir = /lustre/orion/@ALLOCATION@/scratch/@USER@/simulations
quota = 10 # don't use all disk space
cpu = AMD EPYC 7A53 64-Core Processor
cpufreq = 2.75
flop/cycle = 16
ppn = 64
spn = 8
mpn = 8
max-num-threads = 128
num-threads = 8
memory = 524288
max-num-smt = 2
num-smt = 1
nodes = 160
min-ppn = 64
allocation = NO_ALLOCATION
queue = batch
maxwalltime = 8:00:00 # also depends on number of nodes
submit = sbatch @SCRIPTFILE@ && sleep 1
getstatus = squeue -j @JOB_ID@
stop = scancel @JOB_ID@
submitpattern = Submitted batch job ([0-9]+)
statuspattern = '@JOB_ID@ '
queuedpattern = ' PD '
runningpattern = ' (CF|CG|R|TO) '
holdingpattern = ' S '
exechost = localhost
# TODO: make this work using bpeek
stdout = cat @SIMULATION_NAME@.out
stderr = cat @SIMULATION_NAME@.err
stdout-follow = tail -n 100 -f @SIMULATION_NAME@.out @SIMULATION_NAME@.err
#! /bin/bash
echo "Preparing:"
set -euxo pipefail
cd @RUNDIR@-active
echo 'Job setup:'
echo ' Allocated:'
echo ' Nodes: @NODES@'
echo ' Cores per node: @PPN@'
echo ' Running:'
echo ' MPI processes: @NUM_PROCS@'
echo ' OpenMP threads per process: @NUM_THREADS@'
echo ' MPI processes per node: @NODE_PROCS@'
echo ' OpenMP threads per core: @NUM_SMT@'
echo ' OpenMP threads per node: @(@PPN_USED@ * @NUM_SMT@)@'
echo "Checking:"
date
env
hostname
pwd
module list
scontrol show hostnames
hostfile="SIMFACTORY/NODES"
scontrol show hostnames >"${hostfile}"
/sbin/ifconfig || true
# VIEW="@SOURCEDIR@/view"
echo "Environment:"
export 'SIMULATION_ID=@SIMULATION_ID@'
export 'CACTUS_MAX_MEMORY=@(@MEMORY@ * 1024)@' # Byte
export 'CACTUS_NUM_PROCS=@NUM_PROCS@'
export 'CACTUS_NUM_THREADS=@NUM_THREADS@'
export 'CACTUS_SET_THREAD_BINDINGS=1'
export 'GLIBCXX_FORCE_NEW=1'
export 'GMON_OUT_PREFIX=gmon.out'
export 'OMP_DISPLAY_ENV=FALSE' # false, true
export 'OMP_NUM_THREADS=@NUM_THREADS@'
export 'OMP_PLACES=cores' # threads, cores, sockets
export 'OMP_PROC_BIND=FALSE' # false, true, master, close, spread
export 'OMP_STACKSIZE=8192' # kByte
env | sort >'SIMFACTORY/ENVIRONMENT'
export MPICH_GPU_SUPPORT_ENABLED=1
echo "Starting:"
date
export CACTUS_STARTTIME=$(date +%s)
# RePrimAnd uses `libboost_atomic` but its build script does not set the respective run path
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/ccs/home/schnett/crusher/boost-amd-1.82.0/lib"
time \
srun \
--ntasks=@NUM_PROCS@ \
--ntasks-per-node=@NODE_PROCS@ \
--gpus=@NUM_PROCS@ \
--gpus-per-node=@NODE_PROCS@ \
--gpu-bind=closest \
"@EXECUTABLE@" \
-L 3 \
"@PARFILE@" \
>stdout.txt \
2>stderr.txt
echo "Stopping:"
date
echo "Done."
#! /bin/bash
#SBATCH --account=@ALLOCATION@
#SBATCH --job-name=@SHORT_SIMULATION_NAME@
#SBATCH --mail-type=ALL
#SBATCH --mail-user=@EMAIL@
#SBATCH --output=@RUNDIR@/stdlog.txt
#SBATCH --time=@WALLTIME@
#SBATCH --partition=@QUEUE@
#SBATCH --nodes=@NODES@
#SBATCH --gpus-per-node=@NODE_PROCS@
#SBATCH --gpu-bind=closest
# Jobs with this option will not start
##SBATCH --tasks-per-node=@PPN_USED@
#SBATCH --cpus-per-task=1
#SBATCH @("@CHAINED_JOB_ID@" != "" ? "--dependency=afterany:@CHAINED_JOB_ID@" : "")@
#SBATCH --export=ALL
cd @SOURCEDIR@
@SIMFACTORY@ run @SIMULATION_NAME@ --basedir=@BASEDIR@ --machine=@MACHINE@ --restart-id=@RESTART_ID@ @FROM_RESTART_COMMAND@
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment