Skip to content

Instantly share code, notes, and snippets.

@fffonion
Created September 23, 2015 22:57
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save fffonion/57039efab2e66a7ee3ad to your computer and use it in GitHub Desktop.
Save fffonion/57039efab2e66a7ee3ad to your computer and use it in GitHub Desktop.
testing gcc optimization options
args run_time
-Dfoo 1.942
-ftree-sink SAME
-fauto-inc-dec SAME
-fcrossjumping 1.925
-fif-conversion SAME
-faggressive-loop-optimizations SAME
-fconserve-stack SAME
-ftree-vrp SAME
-ftree-vrp -fdelete-null-pointer-checks SAME
-fsched-pressure SAME
-fmodulo-sched SAME
-fno-lto SAME
-ftree-ter 1.887
-frerun-cse-after-loop SAME
-ftree-loop-if-convert SAME
-fivopts SAME
-fcombine-stack-adjustments SAME
-fcompare-elim SAME
-fearly-inlining SAME
-fcaller-saves SAME
-fsel-sched-pipelining-outer-loops -fsel-sched-pipelining SAME
-fsched-spec-load-dangerous SAME
-fipa-sra SAME
-fomit-frame-pointer 1.776
-floop-block SAME
-fshrink-wrap SAME
-fbranch-count-reg SAME
-floop-interchange SAME
-fthread-jumps SAME
-ffunction-cse SAME
-fipa-reference SAME
-fno-sched-spec SAME
-fno-strict-overflow SAME
-findirect-inlining -finline-functions SAME
-findirect-inlining -finline-small-functions SAME
-findirect-inlining -finline-functions SAME
-findirect-inlining -finline-small-functions SAME
-fkeep-inline-functions SAME
-fsched2-use-superblocks SAME
-fpeephole SAME
-fsched-dep-count-heuristic SAME
-fipa-pta SAME
-ftree-builtin-call-dce SAME
-fcse-skip-blocks SAME
-fsplit-wide-types SAME
-fkeep-static-consts SAME
-fno-zero-initialized-in-bss SAME
-ftree-sra SAME
-fwhole-program 1.927
-fno-function-cse SAME
-ffat-lto-objects SAME
-fprofile-generate 3.857
-ftree-loop-im SAME
-ftree-loop-im -funswitch-loops SAME
-fno-ira-share-spill-slots SAME
-ftree-vectorize SAME
-fmerge-all-constants SAME
-fno-align-jumps SAME
-fzero-initialized-in-bss SAME
-fno-align-functions SAME
-fno-defer-pop SAME
-fsched-group-heuristic SAME
-fipa-profile SAME
-fforward-propagate SAME
-ftree-dse SAME
-fno-keep-static-consts SAME
-fstrict-overflow 1.919
-fstrict-overflow -fwrapv SAME
-ftree-dce SAME
-fno-peephole SAME
-fno-align-loops SAME
-fmodulo-sched-allow-regmoves -fmodulo-sched SAME
-fgcse-sm SAME
-fgcse-sm -fgcse-lm SAME
-freorder-blocks-and-partition 1.935
-fno-delete-null-pointer-checks SAME
-ftree-fre SAME
-fno-gcse SAME
-ftree-loop-distribute-patterns SAME
-ftree-pta SAME
-fno-sched-interblock SAME
-fsched-spec-load SAME
-fpartial-inlining SAME
-ftree-slsr SAME
-funsafe-loop-optimizations SAME
-ftree-slp-vectorize SAME
-flto-report SAME
-flto-report -flto 1.890
-ftree-partial-pre SAME
-ftree-loop-optimize SAME
-ftree-switch-conversion SAME
-fipa-cp-clone SAME
-fno-peephole2 SAME
-fsched-rank-heuristic SAME
-fselective-scheduling SAME
-fgcse-las SAME
-ftree-forwprop SAME
-fprofile-correction SAME
-fgcse-lm SAME
-fsched-spec-insn-heuristic SAME
-fno-toplevel-reorder SAME
-free SAME
-ftree-phiprop SAME
-flto-partition=1to1 SAME
-flto-partition=balanced SAME
-flto-partition=max SAME
-flto-partition=none SAME
-ftree-reassoc SAME
-fdce SAME
-floop-parallelize-all SAME
-fsched-critical-path-heuristic SAME
-foptimize-strlen SAME
-fira-hoist-pressure SAME
-fgcse-after-reload SAME
-fpredictive-commoning SAME
-ftree-ch SAME
-fdse SAME
-fvariable-expansion-in-unroller SAME
-freschedule-modulo-scheduled-loops SAME
-fdevirtualize SAME
-ftree-loop-ivcanon SAME
-fira-algorithm=priority SAME
-fira-algorithm=CB SAME
-fschedule-insns SAME
-fdelayed-branch SAME
-ftree-copy-prop SAME
-fira-region=all SAME
-fira-region=mixed SAME
-fira-region=one SAME
-ftree-loop-if-convert-stores SAME
-fsel-sched-pipelining -fselective-scheduling SAME
-fsel-sched-pipelining -fselective-scheduling2 SAME
-fsel-sched-pipelining -fselective-scheduling SAME
-fsel-sched-pipelining -fselective-scheduling2 SAME
-fsplit-ivs-in-unroller SAME
-fsplit-ivs-in-unroller -fweb SAME
-fno-ira-share-save-slots SAME
-flto-compression-level=0 SAME
-flto-compression-level=1 SAME
-flto-compression-level=2 SAME
-flto-compression-level=3 SAME
-flto-compression-level=4 SAME
-flto-compression-level=5 SAME
-flto-compression-level=6 SAME
-flto-compression-level=7 SAME
-flto-compression-level=8 SAME
-flto-compression-level=9 SAME
-flto 1.906
-flto -fuse-linker-plugin 1.904
-fno-align-labels SAME
-fdelete-null-pointer-checks SAME
-ftree-bit-ccp -ftree-ccp SAME
-finline-functions SAME
-ftree-loop-linear SAME
-fprefetch-loop-arrays SAME
-foptimize-sibling-calls SAME
-freorder-functions SAME
-ftree-dominator-opts SAME
-fgcse SAME
-finline-functions-called-once SAME
-fguess-branch-probability SAME
-fhoist-adjacent-loads SAME
-fipa-pure-const SAME
-fno-branch-count-reg SAME
-fstrict-aliasing SAME
-fmerge-constants SAME
-floop-nest-optimize SAME
-fschedule-insns2 SAME
-fipa-cp SAME
-fcprop-registers SAME
-floop-strip-mine SAME
-fno-inline SAME
-ftree-pre SAME
-fcse-follow-jumps SAME
-fno-merge-constants SAME
-fgraphite-identity SAME
-fno-guess-branch-probability SAME
-ftree-tail-merge SAME
-fexpensive-optimizations SAME
-fselective-scheduling2 SAME
-fira-loop-pressure SAME
-freorder-blocks SAME
-ftree-loop-distribution SAME
-fweb SAME
-fuse-linker-plugin SAME
-fpeephole2 SAME
-fsched-last-insn-heuristic SAME
-finline-small-functions SAME
-fif-conversion2 SAME
args = {
'fine_tuning':
{
"-fno-defer-pop":None,
"-fforward-propagate":None,
"-ffp-contract=style":{
'p_range':['style', 'off', 'fast']
},
"-fomit-frame-pointer":None,
"-foptimize-sibling-calls":None,
"-foptimize-strlen":None,
"-fno-inline":None,
"-finline-small-functions":None,
"-findirect-inlining":{
'd_or':[
'-finline-functions' ,'-finline-small-functions'
]
},
"-finline-functions":None,
"-finline-functions-called-once":None,
"-fearly-inlining":None,
"-fprofile-generate":None,
"-fipa-sra":None,
"-finline-limit":{
'p_type':(int, None)
},
"-fno-keep-inline-dllexport":None,
"-fkeep-inline-functions":None,
"-fkeep-static-consts":None,
"-fno-keep-static-consts":None,
"-fmerge-constants":None,
"-fno-merge-constants":None,
"-fmerge-all-constants":None,
"-fmodulo-sched":None,
"-fmodulo-sched-allow-regmoves" :{
'd_and':["-fmodulo-sched"]
},
"-fno-branch-count-reg":None,
"-fbranch-count-reg":None,
"-fno-function-cse":None,
"-ffunction-cse":None,
"-fno-zero-initialized-in-bss":None,
"-fzero-initialized-in-bss":None,
"-fthread-jumps":None,
"-fsplit-wide-types":None,
"-fcse-follow-jumps":None,
"-fcse-skip-blocks":None,
"-frerun-cse-after-loop":None,
"-fgcse":None,
"-fno-gcse":None,
"-fgcse-lm":None,
"-fgcse":None,
"-fgcse-sm":{
'd_opt':["-fgcse-lm"]
},
"-fgcse-las":None,
"-fgcse-after-reload":None,
"-faggressive-loop-optimizations":None,
"-funsafe-loop-optimizations":None,
"-fcrossjumping":None,
"-fauto-inc-dec":None,
"-fdce":None,
"-fdse":None,
"-fif-conversion":None,
"-fif-conversion2":None,
"-fdeclone-ctor-dtor":None,
"-fdelete-null-pointer-checks":None,
"-fno-delete-null-pointer-checks":None,
"-fdevirtualize":None,
"-fdevirtualize-speculatively":None,
"-fdevirtualize-at-ltrans":None,
"-fexpensive-optimizations":None,
"-free":None,
"-fno-lifetime-dse":None,
"-flive-range-shrinkage":None,
"-fira-algorithm":{
'p_range':['priority', 'CB']
},
"-fira-region":{
'p_range':['all', 'mixed', 'one']
},
"-fira-hoist-pressure":None,
"-fira-loop-pressure":None,
"-fno-ira-share-save-slots":None,
"-fno-ira-share-spill-slots":None,
"-fira-verbose":{
'p_type':[int, 5]
},
"-flra-remat":None,
"-fdelayed-branch":None,
"-fschedule-insns":None,
"-fschedule-insns2":None,
"-fno-sched-interblock":None,
"-fno-sched-spec":None,
"-fsched-pressure":None,
"-fsched-spec-load":None,
"-fsched-spec-load-dangerous":None,
"-fsched-stalled-insns":{
'p_type':[int, 1]
},
"-fsched-stalled-insns-dep":{
'p_type':[int, 1],
},
"-fsched2-use-superblocks":None,
"-fsched-group-heuristic":None,
"-fsched-critical-path-heuristic":None,
"-fsched-spec-insn-heuristic":None,
"-fsched-rank-heuristic":None,
"-fsched-last-insn-heuristic":None,
"-fsched-dep-count-heuristic":None,
"-freschedule-modulo-scheduled-loops":None,
"-fselective-scheduling":None,
"-fselective-scheduling2":None,
"-fsel-sched-pipelining":{
'd_or':["-fselective-scheduling", "-fselective-scheduling2"]
},
"-fsel-sched-pipelining-outer-loops":{
'd_and':["-fsel-sched-pipelining"]
},
"-fsemantic-interposition":None,
"-fno-semantic-interposition":None,
"-fshrink-wrap":None,
"-fcaller-saves":None,
"-fcombine-stack-adjustments":None,
"-fipa-ra":None,
"-fconserve-stack":None,
"-ftree-reassoc":None,
"-ftree-pre":None,
"-ftree-partial-pre":None,
"-ftree-forwprop":None,
"-ftree-fre":None,
"-ftree-phiprop":None,
"-fhoist-adjacent-loads":None,
"-ftree-copy-prop":None,
"-fipa-pure-const":None,
"-fipa-reference":None,
"-fipa-pta":None,
"-fipa-profile":None,
"-fipa-cp":None,
"-fipa-cp-clone":None,
"-fipa-cp-alignment":{
'd_and':["-fipa-cp"]
},
"-fipa-icf":None,
"-fisolate-erroneous-paths-dereference":{
'd_and':["-fdelete-null-pointer-checks"]
},
"-fisolate-erroneous-paths-attribute":None,
"-ftree-sink":None,
"-ftree-bit-ccp":{
'd_and':["-ftree-ccp"]
},
"-fssa-phiopt":None,
"-ftree-switch-conversion":None,
"-ftree-tail-merge":None,
"-ftree-dce":None,
"-ftree-builtin-call-dce":None,
"-ftree-dominator-opts":None,
"-ftree-dse":None,
"-ftree-ch":None,
"-ftree-loop-optimize":None,
"-ftree-loop-linear":None,
"-floop-interchange":None,
"-floop-strip-mine":None,
"-floop-block":None,
"-floop-unroll-and-jam":None,
"-floop-nest-optimize":None,
"-fgraphite-identity":None,
"-floop-nest-optimize":None,
"-floop-parallelize-all":None,
"-ftree-coalesce-vars":{
'c_and':["-fno-var-tracking-assignment"]
},
"-ftree-loop-if-convert":None,
"-ftree-loop-if-convert-stores":None,
"-ftree-loop-distribution":None,
"-ftree-loop-distribute-patterns":None,
"-ftree-loop-im":{
'd_opt':["-funswitch-loops"]
},
"-ftree-loop-ivcanon":None,
"-fivopts":None,
"-ftree-parallelize-loops":{
'p_type':[int, 1]
},
"-ftree-pta":None,
"-ftree-sra":None,
"-ftree-ter":None,
"-ftree-slsr":None,
"-ftree-vectorize":None,
"-ftree-loop-vectorize":None,
"-ftree-vectorize":None,
"-ftree-slp-vectorize":None,
"-fvect-cost-model":{
'p_range':["unlimited", "dynamic", "cheap"]
},
"-fsimd-cost-model":{
'p_range':["unlimited", "dynamic", "cheap"]
},
"-ftree-vrp":{
'd_opt':["-fdelete-null-pointer-checks"]
},
"-fsplit-ivs-in-unroller":{
'd_opt':['-fweb']
},
"-fvariable-expansion-in-unroller":None,
"-fpartial-inlining":None,
"-fpredictive-commoning":None,
"-fprefetch-loop-arrays":None,
"-fno-peephole":None,
"-fno-peephole2":None,
"-fpeephole":None,
"-fpeephole2":None,
"-fno-guess-branch-probability":None,
"-fguess-branch-probability":None,
"-freorder-blocks":None,
"-freorder-blocks-and-partition":None,
"-freorder-functions":None,
"-fstrict-aliasing":None,
"-fields":None,
"-fstrict-aliasing":None,
"-fstrict-overflow":{
'd_opt':['-fwrapv']
},
"-fno-strict-overflow":None,
"-falign-functions":{
'p_type':[int, None],
'p_remark':'should be power of 2'
},
"-fno-align-functions":None,
"-falign-labels":{
'p_type':[int, None],
'p_remark':'should be power of 2'
},
"-fno-align-labels":None,
"-falign-loops":{
'p_type':[int, None],
'p_remark':'should be power of 2'
},
"-fno-align-loops":None,
"-falign-jumps":{
'p_type':[int, None],
'p_remark':'should be power of 2'
},
"-fno-align-jumps":None,
"-fno-toplevel-reorder":None,
"-fweb":None,
"-fwhole-program":None,
"-flto":{
'd_opt':["-fuse-linker-plugin"]
},# needs deeper investigation
"-fno-lto":None,
"-flto-partition":{
'p_range':["1to1", "balanced", "max", "none"]
},
"-flto-odr-type-merging":None,
"-flto-compression-level":{
'p_range':[0,1,2,3,4,5,6,7,8,9,None],
'd_and':["-flto"]
},
"-flto-report":{
'd_opt':["-flto"]
},
"-flto-report-wpa":None,
"-fuse-linker-plugin":None,
"-ffat-lto-objects":None,
"-fcompare-elim":None,
"-fcprop-registers":None,
"-fprofile-correction":None,
#"-fprofile-dir=path":None,
#"-fprofile-generate=path":None,
#"-fprofile-use=path":None,
#"-fauto-profile=path":None,
},
"float_tuning":{
"-ffloat-store":None,
"-fexcess-precision=standard":{
'c_or':["-ffast-math", "-funsafe-math-optimizations"]
},
"-fexcess-presision=fast":None,
"-ffast-math":None,
"-fno-math-errno":None,
"-fmath-errno":None,
"-funsafe-math-optimizations":None,
"-fassociative-math":None,
"-fno-associative-math":None,
"-ffinite-math-only":None,
"-fsigned-zeros":None,
"-fno-signed-zeros":None,
"-ftrapping-math":None,
"-fno-trapping-math":None,
"-frounding-math":None,
"-fno-rounding-math":None,
"-fsignaling-nans":None,
"-fno-signaling-nans":None,
"-fsingle-precision-constant":None,
"-fcx-limited-range":None,
"-fno-cx-limited-range":None,
"-ffast-math":None,
#"-fcx-fortran-rules":None,
#"-fno-cx-fortran-rules":None,
},
"pgo":{
"-fbranch-probabilities":None,
"-fprofile-generate"
# "-fprofile-arcs":None,
# "-fprofile-values":None,
# "-fprofile-use":None,
# # "-fprofile-reorder-functions":None,
# # "-ftracer":None,
# # "-funroll-loops":None,
# # "-funroll-all-loops":None,
# # "-fpeel-loops":None,
# # # "-frename-registers":None,
# "-fvpt":{
# 'd_opt':["-fprofile-arcs"]
#},
"-funswitch-loops":None,
"-ffunction-sections":None,
"-fdata-sections":None,
"-fbranch-target-load-optimize":None,
"-fbranch-target-load-optimize2":None,
"-fbtr-bb-exclusive":None,
"-fstack-protector":None,
"-fstack-protector-all":None,
"-fstack-protector-strong":None,
"-fstack-protector-explicit":None,
"-fstdarg-opt":None,
"-fsection-anchors":None,
# "-fschedule-fusion":None,
# "-fmove-loop-invariants":None,
}
}
import os
import subprocess
import time
import hashlib
SAMPLE_TIME = 10
CC = "gcc"
SOURCE = './drystone.c'
OUTFILE = './drystone'
ASMFILE = './drystone.s'
RESULT_FILE = './result.md'
DEFAULT_ARG = ' '
ignored_sections = ['float_tuning', 'pgo']
def popen(*args):
return subprocess.Popen(
args,
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
def _md5(b):
return hashlib.md5(b).hexdigest()
resf = open(RESULT_FILE, 'w', False)
resf.write('''
args| run_time|
------|------
''')
baseline_hash = ''
def compile_and_timeit(*arg):
global baseline_hash
_args = [CC, SOURCE] + list(arg) + ["-o", OUTFILE]
_args_s = ' '.join(_args[2:-2])
p = popen(*_args)
p.wait()
if p.returncode:
cc_err = p.communicate()[1]
print("err %s" % cc_err)
return
else:
print('compiled using arg: "%s"' % _args_s)
run_time = 0
# test file if is same
fhash = _md5(open(OUTFILE, 'rb').read())
if '-Dfoo' in _args:
baseline_hash = fhash
print('baseline hash = %s' % baseline_hash)
else:
if fhash == baseline_hash:
print('arg: "%s" exact same as baseline' % _args_s)
resf.write("%s|SAME\n" % _args_s)
return
for i in range(SAMPLE_TIME):
_t1 = time.time()
try:
p = popen(OUTFILE)
except OSError as ex:
if errno == 2:
continue
else:
p.wait()
run_time += time.time() - _t1
print('arg: "%s" reaches %.3fs' % (_args_s, run_time / SAMPLE_TIME))
resf.write("%s|%.3f\n" %(_args_s, run_time / SAMPLE_TIME))
for sec in args:
if sec in ignored_sections:
continue
print("Testing sec %s" % sec)
for arg in ['-Dfoo'] + args[sec].keys():
try:
os.remove(OUTFILE)
except:
pass
meta = args[sec][arg] if arg!="-Dfoo" else None
if meta:
if 'd_opt' in meta or 'd_or' in meta:
if 'd_opt' in meta:
compile_and_timeit(arg)
k = 'd_opt'
else:
k = 'd_or'
for a in meta[k]:
compile_and_timeit(arg, a)
if 'd_and' in meta:
compile_and_timeit(arg, *meta['d_and'])
if 'd_or' in meta:
for a in meta['d_or']:
compile_and_timeit(arg, a)
if 'p_range' in meta:
for p in meta['p_range']:
compile_and_timeit('%s=%s' % (arg, p))
if 'p_type' in meta:
continue
# TODO c_or for conflicts
else:
compile_and_timeit(arg)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment