Last active
January 28, 2020 15:34
-
-
Save christophernhill/bff35521a2fa0c499578c98751be1b3c to your computer and use it in GitHub Desktop.
Satori IAP 2020 energy profiling code fragments from Florin
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#BSUB -L /bin/bash | |
## | |
## Begin LSF Directives (change only no of required GPUs processes/GPUs and job-name-single name as desired/need) | |
## - "-n 4" for single AC922, "-n 8" for 2x AC922s, "-n 16" for 4x AC922s etc | |
## | |
#BSUB -J "energy-ai" | |
#BSUB -o "energy-ai_o.%J" | |
#BSUB -e "energy-ai_e.%J" | |
#BSUB -n 16 | |
#BSUB -R "span[ptile=4]" | |
#BSUB -gpu "num=4" | |
#BSUB -q "normal" | |
##BSUB -x | |
# | |
# Load performance module | |
# | |
module load perftools | |
# | |
# Setup User Environement (Python, WMLCE virtual environment etc) | |
# | |
HOME2=/nobackup/users/florin | |
PYTHON_VIRTUAL_ENVIRONMENT=wmlce-1.6.2 | |
CONDA_ROOT=$HOME2/anaconda3 | |
source ${CONDA_ROOT}/etc/profile.d/conda.sh | |
conda activate $PYTHON_VIRTUAL_ENVIRONMENT | |
export EGO_TOP=/opt/ibm/spectrumcomputing | |
# Set up the GPUs and delete any existing scratch directory | |
cat > setup.sh << EoF_s | |
#! /bin/sh | |
## | |
if [ \${OMPI_COMM_WORLD_LOCAL_RANK} -eq 0 ] | |
then | |
PID_RIPC=\${PID_RIPC:-0} | |
if [ \${PID_RIPC} -eq 0 ]; then | |
./read_inst_power_cons.sh > energy-consumption.out.\${LSB_JOBID}_\${OMPI_COMM_WORLD_RANK} 2>&1 & | |
PID_RIPC=\$! | |
mkdir -p /tmp/\${LSB_JOBID} | |
touch /tmp/\${LSB_JOBID}/\${PID_RIPC} | |
fi | |
sudo satori-ppc64_cpu --smt=2 # Set the SMT mode to 2 | |
sudo satori-ppc64_cpu --smt # Verify the SMT mode | |
sudo satori-nvidia-smi -rac # For POWER9+V100 | |
sudo satori-nvidia-smi --compute-mode=DEFAULT # Set the compute mode to DEFAULT | |
/bin/rm -rf /tmp/data.\${USER} # Delete the scratch directory | |
fi | |
EoF_s | |
chmod +x setup.sh | |
# | |
# Cleaning CUDA_VISIBLE_DEVICES | |
# | |
cat > launch.sh << EoF_s | |
#! /bin/sh | |
export CUDA_VISIBLE_DEVICES=0,1,2,3 | |
exec \$* | |
EoF_s | |
chmod +x launch.sh | |
mpirun --tag-output ./setup.sh | |
# | |
# Runing the training/inference job | |
# (change only the script name and options after python command) | |
# | |
ddlrun -v \ | |
./launch.sh python \ | |
$HOME2/hpms/tf_cnn_benchmarks/tf_cnn_benchmarks.py --model resnet50 --batch_size 128 --variable_update=horovod --num_batches=1000 --use_fp16 | |
# Reset the GPUs | |
cat > reset.sh << EoF_r | |
#! /bin/sh | |
module load perftools | |
if [ \${OMPI_COMM_WORLD_LOCAL_RANK} -eq 0 ] | |
then | |
PID_RIPC=\`ls /tmp/${LSB_JOBID}\` | |
if [ \${PID_RIPC} -ne 0 ]; then | |
kill -TERM \${PID_RIPC} | |
PID_RIPC=0 | |
fi | |
/bin/rm -rf /tmp/\${LSB_JOBID} | |
sudo satori-ppc64_cpu --smt=on # SMT mode back to default | |
sudo satori-nvidia-smi -rac # Clocks back to default | |
sudo satori-nvidia-smi --compute-mode=EXCLUSIVE_PROCESS # Compute mode back to p10 default | |
fi | |
EoF_r | |
chmod +x reset.sh | |
mpirun --tag-output ./reset.sh | |
# Clean up | |
/bin/rm -f setup.sh reset.sh launch.sh | |
# | |
# EoF | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/sh | |
export CUDA_VISIBLE_DEVICES=0,1,2,3 | |
exec $* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mpirun --tag-output ./setup.sh | |
ddlrun -v \ | |
./launch.sh python \ | |
$HOME2/hpms/tf_cnn_benchmarks/tf_cnn_benchmarks.py --model resnet50 --batch_size 128 --variable_update=horovod --num_batches=1000 --use_fp16 | |
mpirun --tag-output ./reset.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/sh | |
module load perftools | |
while [ 1 ] | |
do | |
echo -n "`date -u +"%s"`: " | |
sudo satori-ipmitool dcmi power reading | grep "Instantaneous power reading:" | |
sleep 3 | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/sh | |
module load perftools | |
if [ ${OMPI_COMM_WORLD_LOCAL_RANK} -eq 0 ] | |
then | |
PID_RIPC=`ls /tmp/${LSB_JOBID}` | |
if [ ${PID_RIPC} -ne 0 ]; then | |
kill -TERM ${PID_RIPC} | |
PID_RIPC=0 | |
fi | |
/bin/rm -rf /tmp/${LSB_JOBID} | |
sudo satori-ppc64_cpu --smt=on # SMT mode back to default | |
sudo satori-nvidia-smi -rac # Clocks back to default | |
sudo satori-nvidia-smi --compute-mode=EXCLUSIVE_PROCESS # Compute mode back to p10 default | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/sh | |
## | |
if [ ${OMPI_COMM_WORLD_LOCAL_RANK} -eq 0 ] | |
then | |
PID_RIPC=${PID_RIPC:-0} | |
if [ ${PID_RIPC} -eq 0 ]; then | |
./read_inst_power_cons.sh > energy-consumption.out.${LSB_JOBID}_${OMPI_COMM_WORLD_RANK} 2>&1 & | |
PID_RIPC=$! | |
mkdir -p /tmp/${LSB_JOBID} | |
touch /tmp/${LSB_JOBID}/${PID_RIPC} | |
fi | |
sudo satori-ppc64_cpu --smt=2 # Set the SMT mode to 2 | |
sudo satori-ppc64_cpu --smt # Verify the SMT mode | |
sudo satori-nvidia-smi -rac # For POWER9+V100 | |
sudo satori-nvidia-smi --compute-mode=DEFAULT # Set the compute mode to DEFAULT | |
/bin/rm -rf /tmp/data.${USER} # Delete the scratch directory | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment