Last active
April 12, 2020 06:35
-
-
Save mwrnd/3c929a85a2a43632288b564bc5f9c62a to your computer and use it in GitHub Desktop.
Process the output logs of multiple calls to tensorflow/benchmark's tf_cnn_benchmarks.py v2.1 script into an easy-to-read summary.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
# Copyright 2020 Matthew Wielgus. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================ | |
Purpose: Process the output logs of multiple calls to tensorflow/benchmark's | |
tf_cnn_benchmarks.py script into an easy-to-read summary. | |
Usage: | |
0) Linux system with tensorflow and tensorflow/benchmarks installed | |
Version used for development and testing: | |
tensorflow versions: tensorflow-rocm-2.1.1 | |
tensorflow/benchmarks version: | |
https://codeload.github.com/tensorflow/benchmarks/zip/cnn_tf_v2.1_compatible | |
1) Generate command-line calls with cmds.py: | |
https://gist.github.com/mwrnd/52799f24fcad44d24f759076c323d203 | |
python3 cmds.py > benchmarkcommands | |
2) Run the commands and output results to a file: | |
bash -x <benchmarkcommands 2>&1 | tee benchmarklog | |
3) Parse the results with this script to generate summary: | |
python3 parse.py benchmarklog 2>&1 | tee benchmarkresults | |
Notes: - tested on local CPU and GPU devices | |
- cat benchmarklog | grep "PUdev\|TF_\|images\/sec\|--model" | less | |
""" | |
import future | |
import fileinput | |
import re | |
tabSizeInSpaces = 10 | |
# find all tf_cnn_benchmarks.py results in a log file | |
def get_results_from_logfile(fileIterator): | |
results = [] | |
flagXLA = "" | |
flagROCm = "" | |
miopenwarn = "" | |
fp16used = "" | |
modelname = "" | |
dataset = "" | |
batchsize = "" | |
targetdevice = "" | |
totalimgsec = "" | |
xlae = re.compile('TF_XLA_FLAGS=--tf_xla_cpu_global_jit') | |
xlad = re.compile('TODO: XLA Failure Search String') | |
rocm = re.compile('TF_ROCM_FUSION_ENABLE=1') | |
mipn = re.compile('MIOpen\(HIP\): Warning \[FindRecordUnsafe\] File is unr') | |
fp16 = re.compile('--use_fp16') | |
modl = re.compile('Model: ') | |
dats = re.compile('Dataset:') | |
batc = re.compile('Batch size:') | |
devs = re.compile('Devices: \[\'') | |
cpud = re.compile('CPU') | |
gpud = re.compile('GPU') | |
totl = re.compile('total images/sec:') | |
linenum = 0 | |
for line in fileIterator: | |
linenum = linenum + 1 | |
x = xlae.search(line) | |
y = xlad.search(line) | |
r = rocm.search(line) | |
n = mipn.search(line) | |
f = fp16.search(line) | |
m = modl.search(line) | |
d = dats.search(line) | |
b = batc.search(line) | |
v = devs.search(line) | |
c = cpud.search(line) | |
g = gpud.search(line) | |
t = totl.search(line) | |
if x: | |
flagXLA = "X" | |
# XLA Service can start, but then not run | |
if y: | |
flagXLA = "" | |
if r: | |
flagROCm = "R" | |
if n: | |
# No Compute Unit Optimizations for current GPU | |
miopenwarn = "C" | |
if m: | |
# if you get Model twice then this run failed, restart parsing | |
if (modelname != ""): | |
flagXLA = "" | |
flagROCm = "" | |
miopenwarn = "" | |
fp16used = "" | |
modelname = "" | |
dataset = "" | |
batchsize = "" | |
targetdevice = "" | |
totalimgsec = "" | |
s = str.split(line) | |
modelname = s[1] | |
if f: | |
fp16used = "F" | |
if d: | |
s = str.split(line) | |
dataset = s[1] | |
if b: | |
s = str.split(line) | |
# use three numerals for benchmark size | |
batchsize = '{:03d}'.format(int(s[2])) | |
if v: | |
if c: | |
targetdevice = "CPU" | |
elif g: | |
targetdevice = "GPU" | |
else: | |
targetdevice = "UNKNOWN" | |
if t: | |
s = str.split(line) | |
n = float(s[2]) | |
if (n < 1): | |
totalimgsec = '{:1.2f}'.format(n) | |
elif (n >= 1) and (n < 10): | |
totalimgsec = '{:2.1f}'.format(n) | |
elif (n >= 10) and (n < 100): | |
totalimgsec = '{:3.1f}'.format(n) | |
else : | |
totalimgsec = '{:.0f}'.format(n) | |
# have all the required data, must have encountered a completed run | |
if (modelname != "") and (dataset != "") and (batchsize != 0) and \ | |
(targetdevice != "") and (totalimgsec != "") : | |
s = '{0}, {1}, {2}, {3}{4}{5}{6}, {7}'.format( \ | |
targetdevice, dataset, modelname, batchsize, \ | |
flagXLA, flagROCm, miopenwarn, totalimgsec) | |
# print(s) | |
# BUGFix: trivial & CPU/mobilenet/imagenet combinations are incorrectly | |
# processed as they do not use any MIOpen Compute Optimizations | |
# but parsing logs is easier if you assume they do, correct here | |
# TODO: Find some related invariant in the logs that can be used | |
if ( (modelname == 'trivial') ) or \ | |
( (modelname == 'official_ncf') and \ | |
(dataset == 'imagenet') ) or \ | |
( (modelname == 'mobilenet') and \ | |
(dataset == 'imagenet') and \ | |
(targetdevice == 'CPU') ): | |
bxr = '{0}{1}{2}{3}{4}'.format(batchsize,flagXLA,flagROCm,"",fp16used) | |
else: | |
bxr = '{0}{1}{2}{3}{4}'.format(batchsize, flagXLA, flagROCm, \ | |
miopenwarn, fp16used) | |
# might be easier to exclude trivial and ncf models | |
#if (modelname != 'trivial') and (modelname != 'official_ncf'): | |
results.append((targetdevice, dataset, modelname, \ | |
bxr, totalimgsec, linenum)) | |
# reset parsing | |
flagXLA = "" | |
flagROCm = "" | |
miopenwarn = "" | |
fp16used = "" | |
modelname = "" | |
dataset = "" | |
batchsize = "" | |
targetdevice = "" | |
totalimgsec = "" | |
# end for line in fileIterator | |
return results | |
#end get_results_from_logfile(fileIterator) | |
# find the unique keys in results | |
def find_unique_keys_in_results(results): | |
v = [] | |
# find all possible devices | |
for i in range(len(results)): | |
v.append(results[i][0]) | |
devices = sorted(set(v)) | |
d = [] | |
# find all possible data sets | |
for i in range(len(results)): | |
d.append(results[i][1]) | |
datasets = sorted(set(d)) | |
m = [] | |
# find all possible model names | |
for i in range(len(results)): | |
m.append(results[i][2]) | |
modelnames = sorted(set(m)) | |
b = [] | |
# find all possible batch size and flag combinations | |
for i in range(len(results)): | |
b.append(results[i][3]) | |
batchsizes = sorted(set(b)) | |
# find baseline batch size values | |
# baseline values are runs with no flags enabled | |
k = [] | |
for a in b: | |
try: | |
n = int(a) | |
k.append(n) | |
except Exception: | |
continue | |
batchsizeBaselineKeys = sorted(set(k)) | |
return (devices, datasets, modelnames, batchsizes, batchsizeBaselineKeys) | |
# end find_unique_keys_in_results(results) | |
# print results with model names as rows and batch size+flags as columns | |
# | |
# | |
# imagenet | |
# model batchsize=016 016F 016XRF 032 032F 032XRF | |
# alexnet 223 237 239 318 325 327 | |
# googlenet 135 138 138 155 160 161 | |
# inception3 25.7 29.7 30.2 26.6 31.3 32.0 | |
# inception4 12.2 14.1 14.3 12.6 14.7 14.9 | |
# lenet5 3920 4104 4167 6918 6785 6761 | |
# mobilenet 270 219 221 338 275 273 | |
# ... | |
def print_tabular_results(results, devices, datasets, modelnames, \ | |
batchsizes, batchsizeBaselineKeys): | |
seperator = '\t' | |
for v in devices: | |
print("") | |
print(v) | |
# for each data set: | |
for d in datasets: | |
print("") | |
print(d) | |
# print each batch size value in the legend | |
s = "model batchsize=" | |
for e in batchsizes: | |
s = s + seperator + e | |
#print(s.expandtabs(tabSizeInSpaces)) | |
print(s) | |
# for each model ... | |
for m in modelnames: | |
rslt = m | |
# find its results for each batch size | |
for b in batchsizes: | |
foundrslt = 0 | |
for i in range(len(results)): | |
if (results[i][0] == v) and (results[i][1] == d) and \ | |
(results[i][2] == m) and (results[i][3] == b) and \ | |
(foundrslt == 0): | |
foundrslt = results[i][4] | |
rslt = rslt + seperator + results[i][4] | |
if foundrslt == 0: | |
rslt = rslt + seperator + "na" | |
#print(rslt.expandtabs(tabSizeInSpaces)) | |
print(rslt) | |
# end print_tabular_results(results, ... | |
# print results with model names as rows and batch size+flags as columns | |
# but with flagged results normalized relative to standard/baseline results | |
# | |
# the results are comma-seperated in CSV-compatible output that can be | |
# imported into a spreadsheet for further processing | |
# | |
# imagenet | |
# model batchsize, 016, 016F, 016XRF | |
# alexnet, 0.00, 6.28, 7.17 | |
# googlenet, 0.00, 2.22, 2.22 | |
# inception3, 0.00, 15.56, 17.51 | |
# inception4, 0.00, 15.57, 17.21 | |
# lenet5, 0.00, 4.69, 6.30 | |
# mobilenet, 0.00, -18.89, -18.15 | |
# ... | |
def print_relative_tabular_results(results, devices, datasets, modelnames, \ | |
batchsizes, batchsizeBaselineKeys): | |
seperator = ',' | |
for v in devices: | |
print("") | |
print(v) | |
# for each data set: | |
for d in datasets: | |
print("") | |
print(d) | |
# print each batch size value in the legend | |
s = "model batchsize=" | |
for e in batchsizes: | |
s = s + seperator + e | |
#print(s.expandtabs(tabSizeInSpaces)) | |
print(s) | |
# for each model ... | |
for m in modelnames: | |
rslt = m | |
baseline = 1.0 | |
# find its results for each batch size | |
for b in batchsizes: | |
foundrslt = 0 | |
for i in range(len(results)): | |
if (results[i][0] == v) and (results[i][1] == d) and \ | |
(results[i][2] == m) and (results[i][3] == b) and \ | |
(foundrslt == 0): | |
# output results relative to baseline batch sizes | |
try: | |
n = int(b) | |
if n in batchsizeBaselineKeys: | |
baseline = float(results[i][4]) | |
except Exception: | |
pass | |
foundrslt = results[i][4] | |
prcntGain = ((float(foundrslt) / baseline) * 100) - 100 | |
prcntGainStr = '{:2.2f}'.format(prcntGain) | |
rslt = rslt + seperator + prcntGainStr | |
if foundrslt == 0: | |
rslt = rslt + seperator + "na" | |
#print(rslt.expandtabs(tabSizeInSpaces)) | |
print(rslt) | |
#end print_relative_tabular_results(results, ... | |
# output formatted values for each device-type/model/dataset | |
fileIterator = iter(fileinput.input()) | |
results = get_results_from_logfile(fileIterator) | |
""" | |
for r in sorted(results): | |
s = "" | |
for i in range(len(results[0])): | |
s = s + '{0}-'.format(r[i]) | |
print(s) | |
""" | |
(devices, datasets, modelnames, batchsizes, batchsizeBaselineKeys) = \ | |
find_unique_keys_in_results(results) | |
print("Legend:") | |
print(" X means XLA was enabled") | |
print(" export TF_XLA_FLAGS=--tf_xla_cpu_global_jit") | |
print(" R means ROCm Fusion was enabled") | |
print(" export TF_ROCM_FUSION_ENABLE=1") | |
print(" F means 16-Bit Floating Point was used (--use_fp16)") | |
print(" C means MIOpen Compute Unit optimations do not exist for this GPU") | |
print(" na means the benchmark did not run or batch size was too large") | |
print_tabular_results(results, devices, datasets, modelnames, \ | |
batchsizes, batchsizeBaselineKeys) | |
print("\n\n\n\n") | |
print_relative_tabular_results(results, devices, datasets, modelnames, \ | |
batchsizes, batchsizeBaselineKeys) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment