mwrnd/cmds.py

## cmds.py
"""
Purpose: Generate permutations of command-line calls to
         tensorflow/benchmark's tf_cnn_benchmarks.py script.

Usage: 0) place this file in /benchmarks/scripts/tf_cnn_benchmarks
       1) python cmds.py > benchmarkcommands
       2) bash -x <benchmarkcommands 2>&1 | tee benchmarklog

Notes: - XLA and ROCM are options specific to AMD's ROCm
       - the associated parse.py will parse log output into summary form
"""

from models import model_config

batch_sizes = ["16", "32 ", "64 ", "128", "256"]
imgnet_models = sorted(model_config._model_name_to_imagenet_model.keys())
cifar_models = sorted(model_config._model_name_to_cifar_model.keys())


def print_cmdline(d, b, m, s):
  cmdline="echo ======"+d+"dev;env | grep \"TF_\";python tf_cnn_benchmarks.py"
  if d == "CPU":
    cmdline=cmdline+" --device=CPU"
  else:
    # --compute_lr_on_cpu speeds up runs but is not strictly GPU-focused
    cmdline=cmdline+" --device=GPU --num_gpus=1"
  print cmdline+" --batch_size=" + b + " --num_batches=40 --model=" + m + \
    " --data_name=" + s


def model_batchsize_permuations(device, modelname, models, batchsizes):
  for m in models:
    for b in batchsizes:
      print_cmdline(device, b, m, modelname)


print "timedatectl"
print "unset TF_XLA_FLAGS"
print "unset TF_ROCM_FUSION_ENABLE"

### CPU Benchmarks
model_batchsize_permuations("CPU", "imagenet", imgnet_models, batch_sizes)
model_batchsize_permuations("CPU", "cifar10",  cifar_models,  batch_sizes)

### GPU Benchmarks
model_batchsize_permuations("GPU", "imagenet", imgnet_models, batch_sizes)
model_batchsize_permuations("GPU", "cifar10",  cifar_models,  batch_sizes)


### GPU Benchmarks ROCm Fusion enabled

print "unset TF_XLA_FLAGS"
print "export TF_ROCM_FUSION_ENABLE=1"

model_batchsize_permuations("GPU", "imagenet", imgnet_models, batch_sizes)
model_batchsize_permuations("GPU", "cifar10",  cifar_models,  batch_sizes)


### CPU and GPU Benchmarks with XLA and ROCm Fusion

print "export TF_XLA_FLAGS=--tf_xla_cpu_global_jit"
print "export TF_ROCM_FUSION_ENABLE=1"

model_batchsize_permuations("CPU", "imagenet", imgnet_models, batch_sizes)
model_batchsize_permuations("CPU", "cifar10",  cifar_models,  batch_sizes)

model_batchsize_permuations("GPU", "imagenet", imgnet_models, batch_sizes)
model_batchsize_permuations("GPU", "cifar10",  cifar_models,  batch_sizes)

print "timedatectl"
	"""
	Purpose: Generate permutations of command-line calls to
	tensorflow/benchmark's tf_cnn_benchmarks.py script.

	Usage: 0) place this file in /benchmarks/scripts/tf_cnn_benchmarks
	1) python cmds.py > benchmarkcommands
	2) bash -x <benchmarkcommands 2>&1 \| tee benchmarklog

	Notes: - XLA and ROCM are options specific to AMD's ROCm
	- the associated parse.py will parse log output into summary form
	"""

	from models import model_config

	batch_sizes = ["16", "32 ", "64 ", "128", "256"]
	imgnet_models = sorted(model_config._model_name_to_imagenet_model.keys())
	cifar_models = sorted(model_config._model_name_to_cifar_model.keys())


	def print_cmdline(d, b, m, s):
	cmdline="echo ======"+d+"dev;env \| grep \"TF_\";python tf_cnn_benchmarks.py"
	if d == "CPU":
	cmdline=cmdline+" --device=CPU"
	else:
	# --compute_lr_on_cpu speeds up runs but is not strictly GPU-focused
	cmdline=cmdline+" --device=GPU --num_gpus=1"
	print cmdline+" --batch_size=" + b + " --num_batches=40 --model=" + m + \
	" --data_name=" + s


	def model_batchsize_permuations(device, modelname, models, batchsizes):
	for m in models:
	for b in batchsizes:
	print_cmdline(device, b, m, modelname)



	print "timedatectl"
	print "unset TF_XLA_FLAGS"
	print "unset TF_ROCM_FUSION_ENABLE"

	### CPU Benchmarks
	model_batchsize_permuations("CPU", "imagenet", imgnet_models, batch_sizes)
	model_batchsize_permuations("CPU", "cifar10", cifar_models, batch_sizes)

	### GPU Benchmarks
	model_batchsize_permuations("GPU", "imagenet", imgnet_models, batch_sizes)
	model_batchsize_permuations("GPU", "cifar10", cifar_models, batch_sizes)


	### GPU Benchmarks ROCm Fusion enabled

	print "unset TF_XLA_FLAGS"
	print "export TF_ROCM_FUSION_ENABLE=1"

	model_batchsize_permuations("GPU", "imagenet", imgnet_models, batch_sizes)
	model_batchsize_permuations("GPU", "cifar10", cifar_models, batch_sizes)


	### CPU and GPU Benchmarks with XLA and ROCm Fusion

	print "export TF_XLA_FLAGS=--tf_xla_cpu_global_jit"
	print "export TF_ROCM_FUSION_ENABLE=1"

	model_batchsize_permuations("CPU", "imagenet", imgnet_models, batch_sizes)
	model_batchsize_permuations("CPU", "cifar10", cifar_models, batch_sizes)

	model_batchsize_permuations("GPU", "imagenet", imgnet_models, batch_sizes)
	model_batchsize_permuations("GPU", "cifar10", cifar_models, batch_sizes)

	print "timedatectl"