Created
October 16, 2019 14:42
-
-
Save mwrnd/7931accea091b6dcf5f95348025c525e to your computer and use it in GitHub Desktop.
Process the output logs of multiple calls to tensorflow/benchmark's tf_cnn_benchmarks.py script into an easy-to-read summary.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
# Copyright 2019 Matthew Wielgus. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================ | |
Purpose: Process the output logs of multiple calls to tensorflow/benchmark's | |
tf_cnn_benchmarks.py script into an easy-to-read summary. | |
Usage: | |
0) Linux system with tensorflow and tensorflow/benchmarks installed | |
Versions used for development and testing: | |
tensorflow/benchmarks version: abb1aec2f2db4ba73fac2e1359227aef59b10258 | |
https://codeload.github.com/tensorflow/benchmarks/zip/abb1aec2f2db4ba73fac2e1359227aef59b10258 | |
tensorflow version: tensorflow-rocm-1.14.2 | |
1) Generate command-line calls with cmds.py: | |
python cmds.py > benchmarkcommands | |
2) Run the commands and output results to a file: | |
bash -x <benchmarkcommands 2>&1 | tee benchmarklog | |
3) Parse the results with this script to generate summary: | |
python parse.py benchmarklog 2>&1 | tee benchmarkresults | |
Notes: - tested on CPU and GPU target devices | |
- cat benchmarklog | grep "PUdev\|TF_\|images\/sec\|--model" | less | |
""" | |
import future | |
import fileinput | |
import re | |
tabSizeInSpaces = 8 | |
miopenCUtext = "CU" | |
# find all tf_cnn_benchmarks.py results in a log file | |
def get_results_from_logfile(fileIterator): | |
results = [] | |
flagXLA = "" | |
flagROCm = "" | |
miopenwarn = miopenCUtext | |
modelname = "" | |
dataset = "" | |
batchsize = "" | |
targetdevice = "" | |
totalimgsec = "" | |
xlae = re.compile('XLA service') | |
xlad = re.compile('If you want XLA:CPU') | |
rocm = re.compile('ROCm Fusion is enabled') | |
mipn = re.compile('MIOpen\(HIP\): Warning \[FindRecordUnsafe\] File is unrea') | |
modl = re.compile('Model: ') | |
dats = re.compile('Dataset:') | |
batc = re.compile('Batch size:') | |
devs = re.compile('Devices: \[\'') | |
cpud = re.compile('CPU') | |
gpud = re.compile('GPU') | |
totl = re.compile('total images/sec:') | |
for line in fileIterator: | |
x = xlae.search(line) | |
y = xlad.search(line) | |
r = rocm.search(line) | |
n = mipn.search(line) | |
m = modl.search(line) | |
d = dats.search(line) | |
b = batc.search(line) | |
v = devs.search(line) | |
c = cpud.search(line) | |
g = gpud.search(line) | |
t = totl.search(line) | |
if x: | |
flagXLA = "X" | |
# XLA Service can start, but then not run | |
if y: | |
flagXLA = "" | |
if r: | |
flagROCm = "R" | |
if n: | |
# No Compute Unit Optimizations for current GPU | |
miopenwarn = "" | |
if m: | |
# if you get Model twice then this run failed, restart parsing | |
if (modelname != ""): | |
flagXLA = "" | |
flagROCm = "" | |
miopenwarn = miopenCUtext | |
modelname = "" | |
dataset = "" | |
batchsize = "" | |
targetdevice = "" | |
totalimgsec = "" | |
s = str.split(line) | |
modelname = s[1] | |
if d: | |
s = str.split(line) | |
dataset = s[1] | |
if b: | |
s = str.split(line) | |
# use three numerals for benchmark size | |
batchsize = '{:03d}'.format(int(s[2])) | |
if v: | |
if c: | |
targetdevice = "CPU" | |
elif g: | |
targetdevice = "GPU" | |
else: | |
targetdevice = "UNKNOWN" | |
if t: | |
s = str.split(line) | |
n = float(s[2]) | |
if (n < 1): | |
totalimgsec = '{:1.2f}'.format(n) | |
elif (n >= 1) and (n < 10): | |
totalimgsec = '{:2.1f}'.format(n) | |
elif (n >= 10) and (n < 100): | |
totalimgsec = '{:3.1f}'.format(n) | |
else : | |
totalimgsec = '{:.0f}'.format(n) | |
# have all the required data, must have encountered a completed run | |
if (modelname != "") and (dataset != "") and (batchsize != 0) and \ | |
(targetdevice != "") and (totalimgsec != "") : | |
s = '{0}, {1}, {2}, {3}{4}{5}{6}, {7}'.format( \ | |
targetdevice, dataset, modelname, batchsize, \ | |
flagXLA, flagROCm, miopenwarn, totalimgsec) | |
# print(s) | |
# BUGFix: trivial & CPU/mobilenet/imagenet combinations are incorrectly | |
# processed as they do not use any MIOpen Compute Optimizations | |
# but parsing logs is easier if you assume they do, correct here | |
# TODO: Find some related invariant in the logs that can be used | |
if (modelname == 'trivial') or ( (modelname == 'mobilenet') and \ | |
(dataset == 'imagenet') and \ | |
(targetdevice == 'CPU') ) or \ | |
(modelname == 'official_ncf') and (dataset == 'imagenet') : | |
bxr = '{0}{1}{2}{3}'.format(batchsize, flagXLA, flagROCm, "") | |
else: | |
bxr = '{0}{1}{2}{3}'.format(batchsize, flagXLA, flagROCm, miopenwarn) | |
results.append((targetdevice, dataset, modelname, bxr, totalimgsec)) | |
# print(targetdevice, dataset, modelname, bxr, totalimgsec) | |
# reset parsing | |
flagXLA = "" | |
flagROCm = "" | |
miopenwarn = miopenCUtext | |
modelname = "" | |
dataset = "" | |
batchsize = "" | |
targetdevice = "" | |
totalimgsec = "" | |
# end for line in fileIterator | |
return results | |
#end get_results_from_logfile(fileIterator) | |
# find the unique keys in results | |
def find_unique_keys_in_results(results): | |
v = [] | |
# find all possible devices | |
for i in range(len(results)): | |
v.append(results[i][0]) | |
devices = sorted(set(v)) | |
d = [] | |
# find all possible data sets | |
for i in range(len(results)): | |
d.append(results[i][1]) | |
datasets = sorted(set(d)) | |
m = [] | |
# find all possible model names | |
for i in range(len(results)): | |
m.append(results[i][2]) | |
modelnames = sorted(set(m)) | |
b = [] | |
# find all possible batch size and flag combinations | |
for i in range(len(results)): | |
b.append(results[i][3]) | |
batchsizes = sorted(set(b)) | |
# find baseline batch size values | |
# baseline values are runs with no flags enabled | |
k = [] | |
for a in b: | |
try: | |
n = int(a) | |
k.append(n) | |
except Exception: | |
continue | |
batchsizeBaselineKeys = sorted(set(k)) | |
return (devices, datasets, modelnames, batchsizes, batchsizeBaselineKeys) | |
# end find_unique_keys_in_results(results) | |
# print results with model names as rows and batch size+flags as columns | |
# | |
# model batchsize=16 32 64 128 256 | |
# trivial 3619 6778 11854 19279 26723 | |
# alexnet 193 258 311 340 355 | |
# googlenet 121 136 143 145 132 | |
# inception3 22.3 23.0 23.7 na na | |
# inception4 10.7 10.9 na na na | |
# lenet5 3706 6212 10154 14438 17627 | |
# ... | |
def print_tabular_results(results, devices, datasets, modelnames, \ | |
batchsizes, batchsizeBaselineKeys): | |
seperator = '\t' | |
for v in devices: | |
print("") | |
print(v) | |
# for each data set: | |
for d in datasets: | |
print("") | |
print(d) | |
# print each batch size value in the legend | |
s = "model batchsize" | |
for e in batchsizes: | |
s = s + seperator + e | |
print(s.expandtabs(tabSizeInSpaces)) | |
# for each model ... | |
for m in modelnames: | |
rslt = m | |
# find its results for each batch size | |
for b in batchsizes: | |
foundrslt = 0 | |
for i in range(len(results)): | |
if (results[i][0] == v) and (results[i][1] == d) and \ | |
(results[i][2] == m) and (results[i][3] == b) and \ | |
(foundrslt == 0): | |
foundrslt = results[i][4] | |
rslt = rslt + seperator + results[i][4] | |
if foundrslt == 0: | |
rslt = rslt + seperator + "na" | |
print(rslt.expandtabs(tabSizeInSpaces)) | |
# end print_tabular_results(results, ... | |
# print results with model names as rows and batch size+flags as columns | |
# but with flagged results normalized relative to standard/baseline results | |
# | |
# the results are comma-seperated in CSV-compatible output that can be | |
# imported into a spreadsheet for further processing | |
# | |
# imagenet | |
# model, 016, 016CU, 016XRCU, 32, 032CU, 032XRCU, 64, 064CU | |
# alexnet, 0, 4.15, 5.18, 0, 8.53, 4.65, 0, 10.29 | |
# googlenet, 0, -1.65, 0, 0, 0, 0, 0, 0.7 | |
# inception3,0, 0.45, 4.93, 0, 2.17, 4.78, 0, 1.67 | |
# ... | |
def print_relative_tabular_results(results, devices, datasets, modelnames, \ | |
batchsizes, batchsizeBaselineKeys): | |
seperator = ',' | |
for v in devices: | |
print("") | |
print(v) | |
# for each data set: | |
for d in datasets: | |
print("") | |
print(d) | |
# print each batch size value in the legend | |
s = "model batchsize" | |
for e in batchsizes: | |
s = s + seperator + e | |
#print(s.expandtabs(tabSizeInSpaces)) | |
print(s) | |
# for each model ... | |
for m in modelnames: | |
rslt = m | |
baseline = 1.0 | |
# find its results for each batch size | |
for b in batchsizes: | |
foundrslt = 0 | |
for i in range(len(results)): | |
if (results[i][0] == v) and (results[i][1] == d) and \ | |
(results[i][2] == m) and (results[i][3] == b) and \ | |
(foundrslt == 0): | |
# output results relative to baseline batch sizes | |
try: | |
n = int(b) | |
if n in batchsizeBaselineKeys: | |
baseline = float(results[i][4]) | |
except Exception: | |
pass | |
foundrslt = results[i][4] | |
prcntGain = ((float(foundrslt) / baseline) * 100) - 100 | |
prcntGainStr = '{:2.2f}'.format(prcntGain) | |
rslt = rslt + seperator + prcntGainStr | |
if foundrslt == 0: | |
rslt = rslt + seperator + "na" | |
#print(rslt.expandtabs(tabSizeInSpaces)) | |
print(rslt) | |
#end print_relative_tabular_results(results, ... | |
# output formatted values for each device-type/model/dataset | |
fileIterator = iter(fileinput.input()) | |
results = get_results_from_logfile(fileIterator) | |
(devices, datasets, modelnames, batchsizes, batchsizeBaselineKeys) = \ | |
find_unique_keys_in_results(results) | |
print("Legend:") | |
print(" X means XLA was enabled") | |
print(" export TF_XLA_FLAGS=--tf_xla_cpu_global_jit") | |
print(" R means ROCm Fusion was enabled") | |
print(" export TF_ROCM_FUSION_ENABLE=1") | |
print(" CU means MIOpen Compute Unit optimations") | |
print(" were used with this unsupported video card") | |
print(" na means the benchmark did not run or batch size was too large") | |
print_tabular_results(results, devices, datasets, modelnames, \ | |
batchsizes, batchsizeBaselineKeys) | |
print("\n\n\n\n") | |
print_relative_tabular_results(results, devices, datasets, modelnames, \ | |
batchsizes, batchsizeBaselineKeys) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment