mwrnd/gist:7931accea091b6dcf5f95348025c525e

## gistfile1.txt
"""
# Copyright 2019 Matthew Wielgus. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

Purpose: Process the output logs of multiple calls to tensorflow/benchmark's
         tf_cnn_benchmarks.py script into an easy-to-read summary.
Usage:
  0) Linux system with tensorflow and tensorflow/benchmarks installed
     Versions used for development and testing:
     tensorflow/benchmarks version: abb1aec2f2db4ba73fac2e1359227aef59b10258
     https://codeload.github.com/tensorflow/benchmarks/zip/abb1aec2f2db4ba73fac2e1359227aef59b10258
     tensorflow version: tensorflow-rocm-1.14.2
  1) Generate command-line calls with cmds.py:
       python cmds.py > benchmarkcommands
  2) Run the commands and output results to a file:
       bash -x <benchmarkcommands 2>&1 | tee benchmarklog
  3) Parse the results with this script to generate summary:
       python parse.py benchmarklog 2>&1 | tee benchmarkresults
Notes: - tested on CPU and GPU target devices
       - cat benchmarklog | grep "PUdev\|TF_\|images\/sec\|--model" | less
"""

import future
import fileinput
import re


tabSizeInSpaces = 8
miopenCUtext = "CU"


# find all tf_cnn_benchmarks.py results in a log file
def get_results_from_logfile(fileIterator):
  results = []
  flagXLA = ""
  flagROCm = ""
  miopenwarn = miopenCUtext
  modelname = ""
  dataset = ""
  batchsize = ""
  targetdevice = ""
  totalimgsec = ""

  xlae = re.compile('XLA service')
  xlad = re.compile('If you want XLA:CPU')
  rocm = re.compile('ROCm Fusion is enabled')
  mipn = re.compile('MIOpen\(HIP\): Warning \[FindRecordUnsafe\] File is unrea')
  modl = re.compile('Model:     ')
  dats = re.compile('Dataset:')
  batc = re.compile('Batch size:')
  devs = re.compile('Devices:     \[\'')
  cpud = re.compile('CPU')
  gpud = re.compile('GPU')
  totl = re.compile('total images/sec:')

  for line in fileIterator:
    x = xlae.search(line)
    y = xlad.search(line)
    r = rocm.search(line)
    n = mipn.search(line)
    m = modl.search(line)
    d = dats.search(line)
    b = batc.search(line)
    v = devs.search(line)
    c = cpud.search(line)
    g = gpud.search(line)
    t = totl.search(line)

    if x:
      flagXLA = "X"

    # XLA Service can start, but then not run
    if y:
      flagXLA = ""

    if r:
      flagROCm = "R"

    if n:
      # No Compute Unit Optimizations for current GPU
      miopenwarn = ""


    if m:
      # if you get Model twice then this run failed, restart parsing
      if (modelname != ""):
        flagXLA = ""
        flagROCm = ""
        miopenwarn = miopenCUtext
        modelname = ""
        dataset = ""
        batchsize = ""
        targetdevice = ""
        totalimgsec = ""

      s = str.split(line)
      modelname = s[1]


    if d:
      s = str.split(line)
      dataset = s[1]

    if b:
      s = str.split(line)
      # use three numerals for benchmark size
      batchsize = '{:03d}'.format(int(s[2]))


    if v:
      if c:
        targetdevice = "CPU"
      elif g:
        targetdevice = "GPU"
      else:
        targetdevice = "UNKNOWN"

    if t:
      s = str.split(line)
      n = float(s[2])
      if   (n < 1):
        totalimgsec = '{:1.2f}'.format(n)
      elif (n >= 1) and (n < 10):
        totalimgsec = '{:2.1f}'.format(n)
      elif (n >= 10) and (n < 100):
        totalimgsec = '{:3.1f}'.format(n)
      else :
        totalimgsec = '{:.0f}'.format(n)


    # have all the required data, must have encountered a completed run
    if (modelname != "") and (dataset != "") and (batchsize != 0) and \
       (targetdevice != "") and (totalimgsec != "") :

      s = '{0}, {1}, {2}, {3}{4}{5}{6}, {7}'.format( \
        targetdevice, dataset, modelname, batchsize, \
        flagXLA, flagROCm, miopenwarn, totalimgsec)
      # print(s)

      # BUGFix: trivial & CPU/mobilenet/imagenet combinations are incorrectly
      #         processed as they do not use any MIOpen Compute Optimizations
      #         but parsing logs is easier if you assume they do, correct here
      # TODO:   Find some related invariant in the logs that can be used
      if (modelname == 'trivial') or ( (modelname == 'mobilenet') and \
                                       (dataset == 'imagenet')    and \
                                       (targetdevice == 'CPU')   ) or \
       (modelname == 'official_ncf') and (dataset == 'imagenet') :
        bxr = '{0}{1}{2}{3}'.format(batchsize, flagXLA, flagROCm, "")
      else:
        bxr = '{0}{1}{2}{3}'.format(batchsize, flagXLA, flagROCm, miopenwarn)


      results.append((targetdevice, dataset, modelname, bxr, totalimgsec))
      # print(targetdevice, dataset, modelname, bxr, totalimgsec)

      # reset parsing
      flagXLA = ""
      flagROCm = ""
      miopenwarn = miopenCUtext
      modelname = ""
      dataset = ""
      batchsize = ""
      targetdevice = ""
      totalimgsec = ""

  # end  for line in fileIterator

  return results

#end get_results_from_logfile(fileIterator)


# find the unique keys in results
def find_unique_keys_in_results(results):

  v = []
  # find all possible devices
  for i in range(len(results)):
    v.append(results[i][0])
  devices = sorted(set(v))


  d = []
  # find all possible data sets
  for i in range(len(results)):
    d.append(results[i][1])
  datasets = sorted(set(d))


  m = []
  # find all possible model names
  for i in range(len(results)):
    m.append(results[i][2])
  modelnames = sorted(set(m))


  b = []
  # find all possible batch size and flag combinations
  for i in range(len(results)):
    b.append(results[i][3])
  batchsizes = sorted(set(b))


  # find baseline batch size values
  # baseline values are runs with no flags enabled
  k = []
  for a in b:
    try:
      n = int(a)
      k.append(n)
    except Exception:
      continue

  batchsizeBaselineKeys = sorted(set(k))

  return (devices, datasets, modelnames, batchsizes, batchsizeBaselineKeys)

# end find_unique_keys_in_results(results)


# print results with model names as rows and batch size+flags as columns
#
# model batchsize=16      32      64      128     256
# trivial         3619    6778    11854   19279   26723
# alexnet         193     258     311     340     355
# googlenet       121     136     143     145     132
# inception3      22.3    23.0    23.7    na      na
# inception4      10.7    10.9    na      na      na
# lenet5          3706    6212    10154   14438   17627
# ...
def print_tabular_results(results, devices, datasets, modelnames, \
                          batchsizes, batchsizeBaselineKeys):

  seperator = '\t'
  for v in devices:
    print("")
    print(v)

    # for each data set:
    for d in datasets:
      print("")
      print(d)

      # print each batch size value in the legend
      s = "model batchsize"
      for e in batchsizes:
        s = s + seperator + e
      print(s.expandtabs(tabSizeInSpaces))

      # for each model ...
      for m in modelnames:

        rslt = m

        # find its results for each batch size
        for b in batchsizes:

          foundrslt = 0
          for i in range(len(results)):
            if (results[i][0] == v) and (results[i][1] == d) and \
               (results[i][2] == m) and (results[i][3] == b) and \
               (foundrslt == 0):

              foundrslt = results[i][4]
              rslt = rslt + seperator + results[i][4]

          if foundrslt == 0:
            rslt = rslt + seperator + "na"

        print(rslt.expandtabs(tabSizeInSpaces))
# end print_tabular_results(results, ...


# print results with model names as rows and batch size+flags as columns
# but with flagged results normalized relative to standard/baseline results
#
# the results are comma-seperated in CSV-compatible output that can be
# imported into a spreadsheet for further processing
#
# imagenet
# model,     016, 016CU, 016XRCU, 32, 032CU, 032XRCU, 64, 064CU
# alexnet,   0,   4.15,  5.18,    0,  8.53,  4.65,    0,  10.29
# googlenet, 0,  -1.65,  0,       0,  0,     0,       0,  0.7
# inception3,0,   0.45,  4.93,    0,  2.17,  4.78,    0,  1.67
# ...
def print_relative_tabular_results(results, devices, datasets, modelnames, \
                                   batchsizes, batchsizeBaselineKeys):
  seperator = ','
  for v in devices:
    print("")
    print(v)

    # for each data set:
    for d in datasets:
      print("")
      print(d)

      # print each batch size value in the legend
      s = "model batchsize"
      for e in batchsizes:
        s = s + seperator + e
      #print(s.expandtabs(tabSizeInSpaces))
      print(s)

      # for each model ...
      for m in modelnames:

        rslt = m
        baseline = 1.0
        # find its results for each batch size
        for b in batchsizes:

          foundrslt = 0
          for i in range(len(results)):
            if (results[i][0] == v) and (results[i][1] == d) and \
               (results[i][2] == m) and (results[i][3] == b) and \
               (foundrslt == 0):

              # output results relative to baseline batch sizes
              try:
                n = int(b)
                if n in batchsizeBaselineKeys:
                  baseline = float(results[i][4])
              except Exception:
                pass

              foundrslt = results[i][4]
              prcntGain = ((float(foundrslt) / baseline) * 100) - 100
              prcntGainStr = '{:2.2f}'.format(prcntGain)
              rslt = rslt + seperator + prcntGainStr

          if foundrslt == 0:
            rslt = rslt + seperator + "na"

        #print(rslt.expandtabs(tabSizeInSpaces))
        print(rslt)

#end print_relative_tabular_results(results, ...


# output formatted values for each device-type/model/dataset
fileIterator = iter(fileinput.input())

results = get_results_from_logfile(fileIterator)

(devices, datasets, modelnames, batchsizes, batchsizeBaselineKeys) = \
  find_unique_keys_in_results(results)

print("Legend:")
print("  X means XLA was enabled")
print("    export TF_XLA_FLAGS=--tf_xla_cpu_global_jit")
print("  R means ROCm Fusion was enabled")
print("    export TF_ROCM_FUSION_ENABLE=1")
print("  CU means MIOpen Compute Unit optimations")
print("  were used with this unsupported video card")
print("  na means the benchmark did not run or batch size was too large")

print_tabular_results(results, devices, datasets, modelnames, \
                      batchsizes, batchsizeBaselineKeys)

print("\n\n\n\n")

print_relative_tabular_results(results, devices, datasets, modelnames, \
                      batchsizes, batchsizeBaselineKeys)
	"""
	# Copyright 2019 Matthew Wielgus. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ============================================================================

	Purpose: Process the output logs of multiple calls to tensorflow/benchmark's
	tf_cnn_benchmarks.py script into an easy-to-read summary.
	Usage:
	0) Linux system with tensorflow and tensorflow/benchmarks installed
	Versions used for development and testing:
	tensorflow/benchmarks version: abb1aec2f2db4ba73fac2e1359227aef59b10258
	https://codeload.github.com/tensorflow/benchmarks/zip/abb1aec2f2db4ba73fac2e1359227aef59b10258
	tensorflow version: tensorflow-rocm-1.14.2
	1) Generate command-line calls with cmds.py:
	python cmds.py > benchmarkcommands
	2) Run the commands and output results to a file:
	bash -x <benchmarkcommands 2>&1 \| tee benchmarklog
	3) Parse the results with this script to generate summary:
	python parse.py benchmarklog 2>&1 \| tee benchmarkresults
	Notes: - tested on CPU and GPU target devices
	- cat benchmarklog \| grep "PUdev\\|TF_\\|images\/sec\\|--model" \| less
	"""

	import future
	import fileinput
	import re


	tabSizeInSpaces = 8
	miopenCUtext = "CU"


	# find all tf_cnn_benchmarks.py results in a log file
	def get_results_from_logfile(fileIterator):
	results = []
	flagXLA = ""
	flagROCm = ""
	miopenwarn = miopenCUtext
	modelname = ""
	dataset = ""
	batchsize = ""
	targetdevice = ""
	totalimgsec = ""

	xlae = re.compile('XLA service')
	xlad = re.compile('If you want XLA:CPU')
	rocm = re.compile('ROCm Fusion is enabled')
	mipn = re.compile('MIOpen\(HIP\): Warning \[FindRecordUnsafe\] File is unrea')
	modl = re.compile('Model: ')
	dats = re.compile('Dataset:')
	batc = re.compile('Batch size:')
	devs = re.compile('Devices: \[\'')
	cpud = re.compile('CPU')
	gpud = re.compile('GPU')
	totl = re.compile('total images/sec:')

	for line in fileIterator:
	x = xlae.search(line)
	y = xlad.search(line)
	r = rocm.search(line)
	n = mipn.search(line)
	m = modl.search(line)
	d = dats.search(line)
	b = batc.search(line)
	v = devs.search(line)
	c = cpud.search(line)
	g = gpud.search(line)
	t = totl.search(line)

	if x:
	flagXLA = "X"

	# XLA Service can start, but then not run
	if y:
	flagXLA = ""

	if r:
	flagROCm = "R"

	if n:
	# No Compute Unit Optimizations for current GPU
	miopenwarn = ""


	if m:
	# if you get Model twice then this run failed, restart parsing
	if (modelname != ""):
	flagXLA = ""
	flagROCm = ""
	miopenwarn = miopenCUtext
	modelname = ""
	dataset = ""
	batchsize = ""
	targetdevice = ""
	totalimgsec = ""

	s = str.split(line)
	modelname = s[1]


	if d:
	s = str.split(line)
	dataset = s[1]

	if b:
	s = str.split(line)
	# use three numerals for benchmark size
	batchsize = '{:03d}'.format(int(s[2]))


	if v:
	if c:
	targetdevice = "CPU"
	elif g:
	targetdevice = "GPU"
	else:
	targetdevice = "UNKNOWN"

	if t:
	s = str.split(line)
	n = float(s[2])
	if (n < 1):
	totalimgsec = '{:1.2f}'.format(n)
	elif (n >= 1) and (n < 10):
	totalimgsec = '{:2.1f}'.format(n)
	elif (n >= 10) and (n < 100):
	totalimgsec = '{:3.1f}'.format(n)
	else :
	totalimgsec = '{:.0f}'.format(n)


	# have all the required data, must have encountered a completed run
	if (modelname != "") and (dataset != "") and (batchsize != 0) and \
	(targetdevice != "") and (totalimgsec != "") :

	s = '{0}, {1}, {2}, {3}{4}{5}{6}, {7}'.format( \
	targetdevice, dataset, modelname, batchsize, \
	flagXLA, flagROCm, miopenwarn, totalimgsec)
	# print(s)

	# BUGFix: trivial & CPU/mobilenet/imagenet combinations are incorrectly
	# processed as they do not use any MIOpen Compute Optimizations
	# but parsing logs is easier if you assume they do, correct here
	# TODO: Find some related invariant in the logs that can be used
	if (modelname == 'trivial') or ( (modelname == 'mobilenet') and \
	(dataset == 'imagenet') and \
	(targetdevice == 'CPU') ) or \
	(modelname == 'official_ncf') and (dataset == 'imagenet') :
	bxr = '{0}{1}{2}{3}'.format(batchsize, flagXLA, flagROCm, "")
	else:
	bxr = '{0}{1}{2}{3}'.format(batchsize, flagXLA, flagROCm, miopenwarn)


	results.append((targetdevice, dataset, modelname, bxr, totalimgsec))
	# print(targetdevice, dataset, modelname, bxr, totalimgsec)

	# reset parsing
	flagXLA = ""
	flagROCm = ""
	miopenwarn = miopenCUtext
	modelname = ""
	dataset = ""
	batchsize = ""
	targetdevice = ""
	totalimgsec = ""

	# end for line in fileIterator

	return results

	#end get_results_from_logfile(fileIterator)




	# find the unique keys in results
	def find_unique_keys_in_results(results):

	v = []
	# find all possible devices
	for i in range(len(results)):
	v.append(results[i][0])
	devices = sorted(set(v))


	d = []
	# find all possible data sets
	for i in range(len(results)):
	d.append(results[i][1])
	datasets = sorted(set(d))


	m = []
	# find all possible model names
	for i in range(len(results)):
	m.append(results[i][2])
	modelnames = sorted(set(m))


	b = []
	# find all possible batch size and flag combinations
	for i in range(len(results)):
	b.append(results[i][3])
	batchsizes = sorted(set(b))


	# find baseline batch size values
	# baseline values are runs with no flags enabled
	k = []
	for a in b:
	try:
	n = int(a)
	k.append(n)
	except Exception:
	continue

	batchsizeBaselineKeys = sorted(set(k))

	return (devices, datasets, modelnames, batchsizes, batchsizeBaselineKeys)

	# end find_unique_keys_in_results(results)




	# print results with model names as rows and batch size+flags as columns
	#
	# model batchsize=16 32 64 128 256
	# trivial 3619 6778 11854 19279 26723
	# alexnet 193 258 311 340 355
	# googlenet 121 136 143 145 132
	# inception3 22.3 23.0 23.7 na na
	# inception4 10.7 10.9 na na na
	# lenet5 3706 6212 10154 14438 17627
	# ...
	def print_tabular_results(results, devices, datasets, modelnames, \
	batchsizes, batchsizeBaselineKeys):

	seperator = '\t'
	for v in devices:
	print("")
	print(v)

	# for each data set:
	for d in datasets:
	print("")
	print(d)

	# print each batch size value in the legend
	s = "model batchsize"
	for e in batchsizes:
	s = s + seperator + e
	print(s.expandtabs(tabSizeInSpaces))

	# for each model ...
	for m in modelnames:

	rslt = m

	# find its results for each batch size
	for b in batchsizes:

	foundrslt = 0
	for i in range(len(results)):
	if (results[i][0] == v) and (results[i][1] == d) and \
	(results[i][2] == m) and (results[i][3] == b) and \
	(foundrslt == 0):

	foundrslt = results[i][4]
	rslt = rslt + seperator + results[i][4]

	if foundrslt == 0:
	rslt = rslt + seperator + "na"

	print(rslt.expandtabs(tabSizeInSpaces))
	# end print_tabular_results(results, ...




	# print results with model names as rows and batch size+flags as columns
	# but with flagged results normalized relative to standard/baseline results
	#
	# the results are comma-seperated in CSV-compatible output that can be
	# imported into a spreadsheet for further processing
	#
	# imagenet
	# model, 016, 016CU, 016XRCU, 32, 032CU, 032XRCU, 64, 064CU
	# alexnet, 0, 4.15, 5.18, 0, 8.53, 4.65, 0, 10.29
	# googlenet, 0, -1.65, 0, 0, 0, 0, 0, 0.7
	# inception3,0, 0.45, 4.93, 0, 2.17, 4.78, 0, 1.67
	# ...
	def print_relative_tabular_results(results, devices, datasets, modelnames, \
	batchsizes, batchsizeBaselineKeys):
	seperator = ','
	for v in devices:
	print("")
	print(v)

	# for each data set:
	for d in datasets:
	print("")
	print(d)

	# print each batch size value in the legend
	s = "model batchsize"
	for e in batchsizes:
	s = s + seperator + e
	#print(s.expandtabs(tabSizeInSpaces))
	print(s)

	# for each model ...
	for m in modelnames:

	rslt = m
	baseline = 1.0
	# find its results for each batch size
	for b in batchsizes:

	foundrslt = 0
	for i in range(len(results)):
	if (results[i][0] == v) and (results[i][1] == d) and \
	(results[i][2] == m) and (results[i][3] == b) and \
	(foundrslt == 0):

	# output results relative to baseline batch sizes
	try:
	n = int(b)
	if n in batchsizeBaselineKeys:
	baseline = float(results[i][4])
	except Exception:
	pass

	foundrslt = results[i][4]
	prcntGain = ((float(foundrslt) / baseline) * 100) - 100
	prcntGainStr = '{:2.2f}'.format(prcntGain)
	rslt = rslt + seperator + prcntGainStr

	if foundrslt == 0:
	rslt = rslt + seperator + "na"

	#print(rslt.expandtabs(tabSizeInSpaces))
	print(rslt)

	#end print_relative_tabular_results(results, ...






	# output formatted values for each device-type/model/dataset
	fileIterator = iter(fileinput.input())

	results = get_results_from_logfile(fileIterator)

	(devices, datasets, modelnames, batchsizes, batchsizeBaselineKeys) = \
	find_unique_keys_in_results(results)

	print("Legend:")
	print(" X means XLA was enabled")
	print(" export TF_XLA_FLAGS=--tf_xla_cpu_global_jit")
	print(" R means ROCm Fusion was enabled")
	print(" export TF_ROCM_FUSION_ENABLE=1")
	print(" CU means MIOpen Compute Unit optimations")
	print(" were used with this unsupported video card")
	print(" na means the benchmark did not run or batch size was too large")

	print_tabular_results(results, devices, datasets, modelnames, \
	batchsizes, batchsizeBaselineKeys)

	print("\n\n\n\n")

	print_relative_tabular_results(results, devices, datasets, modelnames, \
	batchsizes, batchsizeBaselineKeys)