Created
October 23, 2019 20:23
-
-
Save mwrnd/32296d1d4624f91c3d7aa1190b566b9b to your computer and use it in GitHub Desktop.
Generate permutations of command-line calls to tensorflow/benchmark's tf_cnn_benchmarks.py script.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Purpose: Generate permutations of command-line calls to | |
tensorflow/benchmark's tf_cnn_benchmarks.py script. | |
Usage: 00) sudo pip install future | |
0) place this file in /benchmarks/scripts/tf_cnn_benchmarks | |
1) python cmds.py > benchmarkcommands | |
2) bash -x <benchmarkcommands 2>&1 | tee benchmarklog | |
Notes: - XLA and ROCM are options specific to AMD's ROCm | |
- the associated parse.py will parse log output into summary form | |
""" | |
import future | |
from models import model_config | |
batch_sizes = ["16 ", "32 ", "64 ", "128", "256"] | |
additional_arguments = ["", " --use_fp16"] | |
imgnet_models = sorted(model_config._model_name_to_imagenet_model.keys()) | |
cifar_models = sorted(model_config._model_name_to_cifar_model.keys()) | |
def print_cmdline(d, b, m, s, a): | |
# build up the command-line call to tf_cnn_benchmarks.py, | |
# run through timeout to deal with crashes | |
cmdline="echo ======"+d+"dev;env | grep \"TF_\";timeout -k 17.0m 15.0m " + \ | |
"python tf_cnn_benchmarks.py" | |
if d == "CPU": | |
cmdline=cmdline+" --device=CPU" | |
else: | |
# --compute_lr_on_cpu speeds up runs but is not strictly GPU-focused | |
cmdline=cmdline+" --device=GPU --num_gpus=1" | |
cmdline = cmdline + " --batch_size="+b+" --num_batches=40 --data_name="+s+ \ | |
" --model=" + m + a | |
print(cmdline) | |
def model_batchsize_permuations(device, modelname, models, batchsizes): | |
for m in models: | |
for b in batchsizes: | |
for a in additional_arguments: | |
print_cmdline(device, b, m, modelname, a) | |
def all_GPU_benchmarks(): | |
model_batchsize_permuations("GPU", "imagenet", imgnet_models, batch_sizes) | |
model_batchsize_permuations("GPU", "cifar10", cifar_models, batch_sizes) | |
model_batchsize_permuations("GPU", "coco", ["ssd3000", "trivial"], batch_sizes) | |
model_batchsize_permuations("GPU", "imagenet", ["nasnetlarge"], ["8 "]) | |
model_batchsize_permuations("GPU", "librispeech", ["deepspeech2"], ["16 "]) | |
print("timedatectl") | |
print("python --version") | |
print("python -c \'import future; import tensorflow; " \ | |
"print(\"tensorflow version: {0}\".format(tensorflow.__version__))\'") | |
print("dkms status | grep amd") | |
print("dmesg | grep kfd") | |
print("rocm_bandwidth_test") | |
print("python all_reduce_benchmark.py --variable_update=replicated") | |
print("rocm-smi") | |
### GPU and CPU Benchmarks with no flags | |
print("unset TF_XLA_FLAGS") | |
print("unset TF_ROCM_FUSION_ENABLE") | |
all_GPU_benchmarks() | |
model_batchsize_permuations("CPU", "imagenet", imgnet_models, batch_sizes) | |
model_batchsize_permuations("CPU", "cifar10", cifar_models, batch_sizes) | |
### GPU Benchmarks with ROCm Fusion enabled | |
#print("unset TF_XLA_FLAGS") | |
#print("export TF_ROCM_FUSION_ENABLE=1") | |
#all_GPU_benchmarks() | |
### GPU Benchmarks with XLA enabled | |
#print("export TF_XLA_FLAGS=--tf_xla_cpu_global_jit") | |
#print("unset TF_ROCM_FUSION_ENABLE") | |
#all_GPU_benchmarks() | |
### GPU Benchmarks with XLA and ROCm Fusion enabled | |
print("export TF_XLA_FLAGS=--tf_xla_cpu_global_jit") | |
print("export TF_ROCM_FUSION_ENABLE=1") | |
all_GPU_benchmarks() | |
print("timedatectl") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment