mwrnd/parse.py

## parse.py
"""
# Copyright 2020 Matthew Wielgus. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

Purpose: Process the output logs of multiple calls to tensorflow/benchmark's
         tf_cnn_benchmarks.py script into an easy-to-read summary.

Usage:
0) Linux system with tensorflow and tensorflow/benchmarks installed
   Version used for development and testing:
   tensorflow versions: tensorflow-rocm-2.1.1
   tensorflow/benchmarks version:
   https://codeload.github.com/tensorflow/benchmarks/zip/cnn_tf_v2.1_compatible
1) Generate command-line calls with cmds.py:
   https://gist.github.com/mwrnd/52799f24fcad44d24f759076c323d203
     python3 cmds.py > benchmarkcommands
2) Run the commands and output results to a file:
     bash -x <benchmarkcommands 2>&1 | tee benchmarklog
3) Parse the results with this script to generate summary:
     python3 parse.py benchmarklog 2>&1 | tee benchmarkresults

Notes: - tested on local CPU and GPU devices
       - cat benchmarklog | grep "PUdev\|TF_\|images\/sec\|--model" | less
"""

import future
import fileinput
import re


tabSizeInSpaces = 10


# find all tf_cnn_benchmarks.py results in a log file
def get_results_from_logfile(fileIterator):
  results = []
  flagXLA = ""
  flagROCm = ""
  miopenwarn = ""
  fp16used = ""
  modelname = ""
  dataset = ""
  batchsize = ""
  targetdevice = ""
  totalimgsec = ""

  xlae = re.compile('TF_XLA_FLAGS=--tf_xla_cpu_global_jit')
  xlad = re.compile('TODO: XLA Failure Search String')
  rocm = re.compile('TF_ROCM_FUSION_ENABLE=1')
  mipn = re.compile('MIOpen\(HIP\): Warning \[FindRecordUnsafe\] File is unr')
  fp16 = re.compile('--use_fp16')
  modl = re.compile('Model:     ')
  dats = re.compile('Dataset:')
  batc = re.compile('Batch size:')
  devs = re.compile('Devices:     \[\'')
  cpud = re.compile('CPU')
  gpud = re.compile('GPU')
  totl = re.compile('total images/sec:')

  linenum = 0

  for line in fileIterator:

    linenum = linenum + 1

    x = xlae.search(line)
    y = xlad.search(line)
    r = rocm.search(line)
    n = mipn.search(line)
    f = fp16.search(line)
    m = modl.search(line)
    d = dats.search(line)
    b = batc.search(line)
    v = devs.search(line)
    c = cpud.search(line)
    g = gpud.search(line)
    t = totl.search(line)

    if x:
      flagXLA = "X"

    # XLA Service can start, but then not run
    if y:
      flagXLA = ""

    if r:
      flagROCm = "R"

    if n:
      # No Compute Unit Optimizations for current GPU
      miopenwarn = "C"


    if m:
      # if you get Model twice then this run failed, restart parsing
      if (modelname != ""):
        flagXLA = ""
        flagROCm = ""
        miopenwarn = ""
        fp16used = ""
        modelname = ""
        dataset = ""
        batchsize = ""
        targetdevice = ""
        totalimgsec = ""

      s = str.split(line)
      modelname = s[1]


    if f:
      fp16used = "F"

    if d:
      s = str.split(line)
      dataset = s[1]

    if b:
      s = str.split(line)
      # use three numerals for benchmark size
      batchsize = '{:03d}'.format(int(s[2]))


    if v:
      if c:
        targetdevice = "CPU"
      elif g:
        targetdevice = "GPU"
      else:
        targetdevice = "UNKNOWN"

    if t:
      s = str.split(line)
      n = float(s[2])
      if   (n < 1):
        totalimgsec = '{:1.2f}'.format(n)
      elif (n >= 1) and (n < 10):
        totalimgsec = '{:2.1f}'.format(n)
      elif (n >= 10) and (n < 100):
        totalimgsec = '{:3.1f}'.format(n)
      else :
        totalimgsec = '{:.0f}'.format(n)


    # have all the required data, must have encountered a completed run
    if (modelname != "") and (dataset != "") and (batchsize != 0) and \
       (targetdevice != "") and (totalimgsec != "") :

      s = '{0}, {1}, {2}, {3}{4}{5}{6}, {7}'.format( \
        targetdevice, dataset, modelname, batchsize, \
        flagXLA, flagROCm, miopenwarn, totalimgsec)
      # print(s)

      # BUGFix: trivial & CPU/mobilenet/imagenet combinations are incorrectly
      #         processed as they do not use any MIOpen Compute Optimizations
      #         but parsing logs is easier if you assume they do, correct here
      # TODO:   Find some related invariant in the logs that can be used
      if ( (modelname == 'trivial') )              or \
         ( (modelname == 'official_ncf') and \
           (dataset == 'imagenet')             )   or \
         ( (modelname == 'mobilenet') and \
           (dataset == 'imagenet')    and \
           (targetdevice == 'CPU')       ):
        bxr = '{0}{1}{2}{3}{4}'.format(batchsize,flagXLA,flagROCm,"",fp16used)
      else:
        bxr = '{0}{1}{2}{3}{4}'.format(batchsize, flagXLA, flagROCm, \
                                       miopenwarn, fp16used)

      # might be easier to exclude trivial and ncf models
      #if (modelname != 'trivial') and (modelname != 'official_ncf'):

      results.append((targetdevice, dataset, modelname, \
                      bxr, totalimgsec, linenum))

      # reset parsing
      flagXLA = ""
      flagROCm = ""
      miopenwarn = ""
      fp16used = ""
      modelname = ""
      dataset = ""
      batchsize = ""
      targetdevice = ""
      totalimgsec = ""

  # end  for line in fileIterator

  return results

#end get_results_from_logfile(fileIterator)


# find the unique keys in results
def find_unique_keys_in_results(results):

  v = []
  # find all possible devices
  for i in range(len(results)):
    v.append(results[i][0])
  devices = sorted(set(v))


  d = []
  # find all possible data sets
  for i in range(len(results)):
    d.append(results[i][1])
  datasets = sorted(set(d))


  m = []
  # find all possible model names
  for i in range(len(results)):
    m.append(results[i][2])
  modelnames = sorted(set(m))


  b = []
  # find all possible batch size and flag combinations
  for i in range(len(results)):
    b.append(results[i][3])
  batchsizes = sorted(set(b))


  # find baseline batch size values
  # baseline values are runs with no flags enabled
  k = []
  for a in b:
    try:
      n = int(a)
      k.append(n)
    except Exception:
      continue

  batchsizeBaselineKeys = sorted(set(k))


  return (devices, datasets, modelnames, batchsizes, batchsizeBaselineKeys)

# end find_unique_keys_in_results(results)


# print results with model names as rows and batch size+flags as columns
#
#
# imagenet
# model batchsize=016    016F   016XRF  032    032F   032XRF
# alexnet         223    237    239     318    325    327
# googlenet       135    138    138     155    160    161
# inception3      25.7   29.7   30.2    26.6   31.3   32.0
# inception4      12.2   14.1   14.3    12.6   14.7   14.9
# lenet5          3920   4104   4167    6918   6785   6761
# mobilenet       270    219    221     338    275    273
# ...
def print_tabular_results(results, devices, datasets, modelnames, \
                          batchsizes, batchsizeBaselineKeys):

  seperator = '\t'
  for v in devices:
    print("")
    print(v)

    # for each data set:
    for d in datasets:
      print("")
      print(d)

      # print each batch size value in the legend
      s = "model batchsize="
      for e in batchsizes:
        s = s + seperator + e
      #print(s.expandtabs(tabSizeInSpaces))
      print(s)

      # for each model ...
      for m in modelnames:

        rslt = m

        # find its results for each batch size
        for b in batchsizes:

          foundrslt = 0
          for i in range(len(results)):
            if (results[i][0] == v) and (results[i][1] == d) and \
               (results[i][2] == m) and (results[i][3] == b) and \
               (foundrslt == 0):

              foundrslt = results[i][4]
              rslt = rslt + seperator + results[i][4]

          if foundrslt == 0:
            rslt = rslt + seperator + "na"

        #print(rslt.expandtabs(tabSizeInSpaces))
        print(rslt)
# end print_tabular_results(results, ...


# print results with model names as rows and batch size+flags as columns
# but with flagged results normalized relative to standard/baseline results
#
# the results are comma-seperated in CSV-compatible output that can be
# imported into a spreadsheet for further processing
#
# imagenet
# model batchsize, 016,   016F,   016XRF
# alexnet,         0.00,  6.28,   7.17
# googlenet,       0.00,  2.22,   2.22
# inception3,      0.00,  15.56,  17.51
# inception4,      0.00,  15.57,  17.21
# lenet5,          0.00,  4.69,   6.30
# mobilenet,       0.00,  -18.89, -18.15
# ...
def print_relative_tabular_results(results, devices, datasets, modelnames, \
                                   batchsizes, batchsizeBaselineKeys):
  seperator = ','
  for v in devices:
    print("")
    print(v)

    # for each data set:
    for d in datasets:
      print("")
      print(d)

      # print each batch size value in the legend
      s = "model batchsize="
      for e in batchsizes:
        s = s + seperator + e
      #print(s.expandtabs(tabSizeInSpaces))
      print(s)

      # for each model ...
      for m in modelnames:

        rslt = m
        baseline = 1.0
        # find its results for each batch size
        for b in batchsizes:

          foundrslt = 0
          for i in range(len(results)):
            if (results[i][0] == v) and (results[i][1] == d) and \
               (results[i][2] == m) and (results[i][3] == b) and \
               (foundrslt == 0):

              # output results relative to baseline batch sizes
              try:
                n = int(b)
                if n in batchsizeBaselineKeys:
                  baseline = float(results[i][4])
              except Exception:
                pass

              foundrslt = results[i][4]
              prcntGain = ((float(foundrslt) / baseline) * 100) - 100
              prcntGainStr = '{:2.2f}'.format(prcntGain)
              rslt = rslt + seperator + prcntGainStr

          if foundrslt == 0:
            rslt = rslt + seperator + "na"

        #print(rslt.expandtabs(tabSizeInSpaces))
        print(rslt)

#end print_relative_tabular_results(results, ...


# output formatted values for each device-type/model/dataset
fileIterator = iter(fileinput.input())

results = get_results_from_logfile(fileIterator)

"""
for r in sorted(results):
  s = ""
  for i in range(len(results[0])):
    s = s + '{0}-'.format(r[i])
  print(s)
"""

(devices, datasets, modelnames, batchsizes, batchsizeBaselineKeys) = \
  find_unique_keys_in_results(results)

print("Legend:")
print("  X means XLA was enabled")
print("    export TF_XLA_FLAGS=--tf_xla_cpu_global_jit")
print("  R means ROCm Fusion was enabled")
print("    export TF_ROCM_FUSION_ENABLE=1")
print("  F means 16-Bit Floating Point was used (--use_fp16)")
print("  C means MIOpen Compute Unit optimations do not exist for this GPU")
print("  na means the benchmark did not run or batch size was too large")

print_tabular_results(results, devices, datasets, modelnames, \
                      batchsizes, batchsizeBaselineKeys)

print("\n\n\n\n")

print_relative_tabular_results(results, devices, datasets, modelnames, \
                      batchsizes, batchsizeBaselineKeys)
	"""
	# Copyright 2020 Matthew Wielgus. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ============================================================================

	Purpose: Process the output logs of multiple calls to tensorflow/benchmark's
	tf_cnn_benchmarks.py script into an easy-to-read summary.

	Usage:
	0) Linux system with tensorflow and tensorflow/benchmarks installed
	Version used for development and testing:
	tensorflow versions: tensorflow-rocm-2.1.1
	tensorflow/benchmarks version:
	https://codeload.github.com/tensorflow/benchmarks/zip/cnn_tf_v2.1_compatible
	1) Generate command-line calls with cmds.py:
	https://gist.github.com/mwrnd/52799f24fcad44d24f759076c323d203
	python3 cmds.py > benchmarkcommands
	2) Run the commands and output results to a file:
	bash -x <benchmarkcommands 2>&1 \| tee benchmarklog
	3) Parse the results with this script to generate summary:
	python3 parse.py benchmarklog 2>&1 \| tee benchmarkresults

	Notes: - tested on local CPU and GPU devices
	- cat benchmarklog \| grep "PUdev\\|TF_\\|images\/sec\\|--model" \| less
	"""

	import future
	import fileinput
	import re


	tabSizeInSpaces = 10


	# find all tf_cnn_benchmarks.py results in a log file
	def get_results_from_logfile(fileIterator):
	results = []
	flagXLA = ""
	flagROCm = ""
	miopenwarn = ""
	fp16used = ""
	modelname = ""
	dataset = ""
	batchsize = ""
	targetdevice = ""
	totalimgsec = ""

	xlae = re.compile('TF_XLA_FLAGS=--tf_xla_cpu_global_jit')
	xlad = re.compile('TODO: XLA Failure Search String')
	rocm = re.compile('TF_ROCM_FUSION_ENABLE=1')
	mipn = re.compile('MIOpen\(HIP\): Warning \[FindRecordUnsafe\] File is unr')
	fp16 = re.compile('--use_fp16')
	modl = re.compile('Model: ')
	dats = re.compile('Dataset:')
	batc = re.compile('Batch size:')
	devs = re.compile('Devices: \[\'')
	cpud = re.compile('CPU')
	gpud = re.compile('GPU')
	totl = re.compile('total images/sec:')

	linenum = 0

	for line in fileIterator:

	linenum = linenum + 1

	x = xlae.search(line)
	y = xlad.search(line)
	r = rocm.search(line)
	n = mipn.search(line)
	f = fp16.search(line)
	m = modl.search(line)
	d = dats.search(line)
	b = batc.search(line)
	v = devs.search(line)
	c = cpud.search(line)
	g = gpud.search(line)
	t = totl.search(line)

	if x:
	flagXLA = "X"

	# XLA Service can start, but then not run
	if y:
	flagXLA = ""

	if r:
	flagROCm = "R"

	if n:
	# No Compute Unit Optimizations for current GPU
	miopenwarn = "C"


	if m:
	# if you get Model twice then this run failed, restart parsing
	if (modelname != ""):
	flagXLA = ""
	flagROCm = ""
	miopenwarn = ""
	fp16used = ""
	modelname = ""
	dataset = ""
	batchsize = ""
	targetdevice = ""
	totalimgsec = ""

	s = str.split(line)
	modelname = s[1]


	if f:
	fp16used = "F"

	if d:
	s = str.split(line)
	dataset = s[1]

	if b:
	s = str.split(line)
	# use three numerals for benchmark size
	batchsize = '{:03d}'.format(int(s[2]))


	if v:
	if c:
	targetdevice = "CPU"
	elif g:
	targetdevice = "GPU"
	else:
	targetdevice = "UNKNOWN"

	if t:
	s = str.split(line)
	n = float(s[2])
	if (n < 1):
	totalimgsec = '{:1.2f}'.format(n)
	elif (n >= 1) and (n < 10):
	totalimgsec = '{:2.1f}'.format(n)
	elif (n >= 10) and (n < 100):
	totalimgsec = '{:3.1f}'.format(n)
	else :
	totalimgsec = '{:.0f}'.format(n)


	# have all the required data, must have encountered a completed run
	if (modelname != "") and (dataset != "") and (batchsize != 0) and \
	(targetdevice != "") and (totalimgsec != "") :

	s = '{0}, {1}, {2}, {3}{4}{5}{6}, {7}'.format( \
	targetdevice, dataset, modelname, batchsize, \
	flagXLA, flagROCm, miopenwarn, totalimgsec)
	# print(s)

	# BUGFix: trivial & CPU/mobilenet/imagenet combinations are incorrectly
	# processed as they do not use any MIOpen Compute Optimizations
	# but parsing logs is easier if you assume they do, correct here
	# TODO: Find some related invariant in the logs that can be used
	if ( (modelname == 'trivial') ) or \
	( (modelname == 'official_ncf') and \
	(dataset == 'imagenet') ) or \
	( (modelname == 'mobilenet') and \
	(dataset == 'imagenet') and \
	(targetdevice == 'CPU') ):
	bxr = '{0}{1}{2}{3}{4}'.format(batchsize,flagXLA,flagROCm,"",fp16used)
	else:
	bxr = '{0}{1}{2}{3}{4}'.format(batchsize, flagXLA, flagROCm, \
	miopenwarn, fp16used)

	# might be easier to exclude trivial and ncf models
	#if (modelname != 'trivial') and (modelname != 'official_ncf'):

	results.append((targetdevice, dataset, modelname, \
	bxr, totalimgsec, linenum))

	# reset parsing
	flagXLA = ""
	flagROCm = ""
	miopenwarn = ""
	fp16used = ""
	modelname = ""
	dataset = ""
	batchsize = ""
	targetdevice = ""
	totalimgsec = ""

	# end for line in fileIterator

	return results

	#end get_results_from_logfile(fileIterator)




	# find the unique keys in results
	def find_unique_keys_in_results(results):

	v = []
	# find all possible devices
	for i in range(len(results)):
	v.append(results[i][0])
	devices = sorted(set(v))


	d = []
	# find all possible data sets
	for i in range(len(results)):
	d.append(results[i][1])
	datasets = sorted(set(d))


	m = []
	# find all possible model names
	for i in range(len(results)):
	m.append(results[i][2])
	modelnames = sorted(set(m))


	b = []
	# find all possible batch size and flag combinations
	for i in range(len(results)):
	b.append(results[i][3])
	batchsizes = sorted(set(b))


	# find baseline batch size values
	# baseline values are runs with no flags enabled
	k = []
	for a in b:
	try:
	n = int(a)
	k.append(n)
	except Exception:
	continue

	batchsizeBaselineKeys = sorted(set(k))


	return (devices, datasets, modelnames, batchsizes, batchsizeBaselineKeys)

	# end find_unique_keys_in_results(results)




	# print results with model names as rows and batch size+flags as columns
	#
	#
	# imagenet
	# model batchsize=016 016F 016XRF 032 032F 032XRF
	# alexnet 223 237 239 318 325 327
	# googlenet 135 138 138 155 160 161
	# inception3 25.7 29.7 30.2 26.6 31.3 32.0
	# inception4 12.2 14.1 14.3 12.6 14.7 14.9
	# lenet5 3920 4104 4167 6918 6785 6761
	# mobilenet 270 219 221 338 275 273
	# ...
	def print_tabular_results(results, devices, datasets, modelnames, \
	batchsizes, batchsizeBaselineKeys):

	seperator = '\t'
	for v in devices:
	print("")
	print(v)

	# for each data set:
	for d in datasets:
	print("")
	print(d)

	# print each batch size value in the legend
	s = "model batchsize="
	for e in batchsizes:
	s = s + seperator + e
	#print(s.expandtabs(tabSizeInSpaces))
	print(s)

	# for each model ...
	for m in modelnames:

	rslt = m

	# find its results for each batch size
	for b in batchsizes:

	foundrslt = 0
	for i in range(len(results)):
	if (results[i][0] == v) and (results[i][1] == d) and \
	(results[i][2] == m) and (results[i][3] == b) and \
	(foundrslt == 0):

	foundrslt = results[i][4]
	rslt = rslt + seperator + results[i][4]

	if foundrslt == 0:
	rslt = rslt + seperator + "na"

	#print(rslt.expandtabs(tabSizeInSpaces))
	print(rslt)
	# end print_tabular_results(results, ...




	# print results with model names as rows and batch size+flags as columns
	# but with flagged results normalized relative to standard/baseline results
	#
	# the results are comma-seperated in CSV-compatible output that can be
	# imported into a spreadsheet for further processing
	#
	# imagenet
	# model batchsize, 016, 016F, 016XRF
	# alexnet, 0.00, 6.28, 7.17
	# googlenet, 0.00, 2.22, 2.22
	# inception3, 0.00, 15.56, 17.51
	# inception4, 0.00, 15.57, 17.21
	# lenet5, 0.00, 4.69, 6.30
	# mobilenet, 0.00, -18.89, -18.15
	# ...
	def print_relative_tabular_results(results, devices, datasets, modelnames, \
	batchsizes, batchsizeBaselineKeys):
	seperator = ','
	for v in devices:
	print("")
	print(v)

	# for each data set:
	for d in datasets:
	print("")
	print(d)

	# print each batch size value in the legend
	s = "model batchsize="
	for e in batchsizes:
	s = s + seperator + e
	#print(s.expandtabs(tabSizeInSpaces))
	print(s)

	# for each model ...
	for m in modelnames:

	rslt = m
	baseline = 1.0
	# find its results for each batch size
	for b in batchsizes:

	foundrslt = 0
	for i in range(len(results)):
	if (results[i][0] == v) and (results[i][1] == d) and \
	(results[i][2] == m) and (results[i][3] == b) and \
	(foundrslt == 0):

	# output results relative to baseline batch sizes
	try:
	n = int(b)
	if n in batchsizeBaselineKeys:
	baseline = float(results[i][4])
	except Exception:
	pass

	foundrslt = results[i][4]
	prcntGain = ((float(foundrslt) / baseline) * 100) - 100
	prcntGainStr = '{:2.2f}'.format(prcntGain)
	rslt = rslt + seperator + prcntGainStr

	if foundrslt == 0:
	rslt = rslt + seperator + "na"

	#print(rslt.expandtabs(tabSizeInSpaces))
	print(rslt)

	#end print_relative_tabular_results(results, ...






	# output formatted values for each device-type/model/dataset
	fileIterator = iter(fileinput.input())

	results = get_results_from_logfile(fileIterator)

	"""
	for r in sorted(results):
	s = ""
	for i in range(len(results[0])):
	s = s + '{0}-'.format(r[i])
	print(s)
	"""

	(devices, datasets, modelnames, batchsizes, batchsizeBaselineKeys) = \
	find_unique_keys_in_results(results)

	print("Legend:")
	print(" X means XLA was enabled")
	print(" export TF_XLA_FLAGS=--tf_xla_cpu_global_jit")
	print(" R means ROCm Fusion was enabled")
	print(" export TF_ROCM_FUSION_ENABLE=1")
	print(" F means 16-Bit Floating Point was used (--use_fp16)")
	print(" C means MIOpen Compute Unit optimations do not exist for this GPU")
	print(" na means the benchmark did not run or batch size was too large")

	print_tabular_results(results, devices, datasets, modelnames, \
	batchsizes, batchsizeBaselineKeys)

	print("\n\n\n\n")

	print_relative_tabular_results(results, devices, datasets, modelnames, \
	batchsizes, batchsizeBaselineKeys)