Skip to content

Instantly share code, notes, and snippets.

@mwrnd
Created October 23, 2019 20:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mwrnd/32296d1d4624f91c3d7aa1190b566b9b to your computer and use it in GitHub Desktop.
Save mwrnd/32296d1d4624f91c3d7aa1190b566b9b to your computer and use it in GitHub Desktop.
Generate permutations of command-line calls to tensorflow/benchmark's tf_cnn_benchmarks.py script.
"""
Purpose: Generate permutations of command-line calls to
tensorflow/benchmark's tf_cnn_benchmarks.py script.
Usage: 00) sudo pip install future
0) place this file in /benchmarks/scripts/tf_cnn_benchmarks
1) python cmds.py > benchmarkcommands
2) bash -x <benchmarkcommands 2>&1 | tee benchmarklog
Notes: - XLA and ROCM are options specific to AMD's ROCm
- the associated parse.py will parse log output into summary form
"""
import future
from models import model_config
batch_sizes = ["16 ", "32 ", "64 ", "128", "256"]
additional_arguments = ["", " --use_fp16"]
imgnet_models = sorted(model_config._model_name_to_imagenet_model.keys())
cifar_models = sorted(model_config._model_name_to_cifar_model.keys())
def print_cmdline(d, b, m, s, a):
# build up the command-line call to tf_cnn_benchmarks.py,
# run through timeout to deal with crashes
cmdline="echo ======"+d+"dev;env | grep \"TF_\";timeout -k 17.0m 15.0m " + \
"python tf_cnn_benchmarks.py"
if d == "CPU":
cmdline=cmdline+" --device=CPU"
else:
# --compute_lr_on_cpu speeds up runs but is not strictly GPU-focused
cmdline=cmdline+" --device=GPU --num_gpus=1"
cmdline = cmdline + " --batch_size="+b+" --num_batches=40 --data_name="+s+ \
" --model=" + m + a
print(cmdline)
def model_batchsize_permuations(device, modelname, models, batchsizes):
for m in models:
for b in batchsizes:
for a in additional_arguments:
print_cmdline(device, b, m, modelname, a)
def all_GPU_benchmarks():
model_batchsize_permuations("GPU", "imagenet", imgnet_models, batch_sizes)
model_batchsize_permuations("GPU", "cifar10", cifar_models, batch_sizes)
model_batchsize_permuations("GPU", "coco", ["ssd3000", "trivial"], batch_sizes)
model_batchsize_permuations("GPU", "imagenet", ["nasnetlarge"], ["8 "])
model_batchsize_permuations("GPU", "librispeech", ["deepspeech2"], ["16 "])
print("timedatectl")
print("python --version")
print("python -c \'import future; import tensorflow; " \
"print(\"tensorflow version: {0}\".format(tensorflow.__version__))\'")
print("dkms status | grep amd")
print("dmesg | grep kfd")
print("rocm_bandwidth_test")
print("python all_reduce_benchmark.py --variable_update=replicated")
print("rocm-smi")
### GPU and CPU Benchmarks with no flags
print("unset TF_XLA_FLAGS")
print("unset TF_ROCM_FUSION_ENABLE")
all_GPU_benchmarks()
model_batchsize_permuations("CPU", "imagenet", imgnet_models, batch_sizes)
model_batchsize_permuations("CPU", "cifar10", cifar_models, batch_sizes)
### GPU Benchmarks with ROCm Fusion enabled
#print("unset TF_XLA_FLAGS")
#print("export TF_ROCM_FUSION_ENABLE=1")
#all_GPU_benchmarks()
### GPU Benchmarks with XLA enabled
#print("export TF_XLA_FLAGS=--tf_xla_cpu_global_jit")
#print("unset TF_ROCM_FUSION_ENABLE")
#all_GPU_benchmarks()
### GPU Benchmarks with XLA and ROCm Fusion enabled
print("export TF_XLA_FLAGS=--tf_xla_cpu_global_jit")
print("export TF_ROCM_FUSION_ENABLE=1")
all_GPU_benchmarks()
print("timedatectl")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment