Skip to content

Instantly share code, notes, and snippets.

@mwrnd
Created October 16, 2019 14:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mwrnd/0dc9bc4857edc2293d2503f22c023783 to your computer and use it in GitHub Desktop.
Save mwrnd/0dc9bc4857edc2293d2503f22c023783 to your computer and use it in GitHub Desktop.
Generate permutations of command-line calls to tensorflow/benchmark's tf_cnn_benchmarks.py script.
"""
Purpose: Generate permutations of command-line calls to
tensorflow/benchmark's tf_cnn_benchmarks.py script.
Usage: 0) place this file in /benchmarks/scripts/tf_cnn_benchmarks
1) python cmds.py > benchmarkcommands
2) bash -x <benchmarkcommands 2>&1 | tee benchmarklog
Notes: - XLA and ROCM are options specific to AMD's ROCm
- the associated parse.py will parse log output into summary form
"""
from models import model_config
batch_sizes = ["16", "32 ", "64 ", "128", "256"]
imgnet_models = sorted(model_config._model_name_to_imagenet_model.keys())
cifar_models = sorted(model_config._model_name_to_cifar_model.keys())
def print_cmdline(d, b, m, s):
cmdline="echo ======"+d+"dev;env | grep \"TF_\";python tf_cnn_benchmarks.py"
if d == "CPU":
cmdline=cmdline+" --device=CPU"
else:
# --compute_lr_on_cpu speeds up runs but is not strictly GPU-focused
cmdline=cmdline+" --device=GPU --num_gpus=1"
print cmdline+" --batch_size=" + b + " --num_batches=40 --model=" + m + \
" --data_name=" + s
def model_batchsize_permuations(device, modelname, models, batchsizes):
for m in models:
for b in batchsizes:
print_cmdline(device, b, m, modelname)
print "timedatectl"
print "unset TF_XLA_FLAGS"
print "unset TF_ROCM_FUSION_ENABLE"
### CPU Benchmarks
model_batchsize_permuations("CPU", "imagenet", imgnet_models, batch_sizes)
model_batchsize_permuations("CPU", "cifar10", cifar_models, batch_sizes)
### GPU Benchmarks
model_batchsize_permuations("GPU", "imagenet", imgnet_models, batch_sizes)
model_batchsize_permuations("GPU", "cifar10", cifar_models, batch_sizes)
### GPU Benchmarks ROCm Fusion enabled
print "unset TF_XLA_FLAGS"
print "export TF_ROCM_FUSION_ENABLE=1"
model_batchsize_permuations("GPU", "imagenet", imgnet_models, batch_sizes)
model_batchsize_permuations("GPU", "cifar10", cifar_models, batch_sizes)
### CPU and GPU Benchmarks with XLA and ROCm Fusion
print "export TF_XLA_FLAGS=--tf_xla_cpu_global_jit"
print "export TF_ROCM_FUSION_ENABLE=1"
model_batchsize_permuations("CPU", "imagenet", imgnet_models, batch_sizes)
model_batchsize_permuations("CPU", "cifar10", cifar_models, batch_sizes)
model_batchsize_permuations("GPU", "imagenet", imgnet_models, batch_sizes)
model_batchsize_permuations("GPU", "cifar10", cifar_models, batch_sizes)
print "timedatectl"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment