args | run_time |
---|---|
-Dfoo | 1.942 |
-ftree-sink | SAME |
-fauto-inc-dec | SAME |
-fcrossjumping | 1.925 |
-fif-conversion | SAME |
-faggressive-loop-optimizations | SAME |
-fconserve-stack | SAME |
-ftree-vrp | SAME |
-ftree-vrp -fdelete-null-pointer-checks | SAME |
-fsched-pressure | SAME |
-fmodulo-sched | SAME |
-fno-lto | SAME |
-ftree-ter | 1.887 |
-frerun-cse-after-loop | SAME |
-ftree-loop-if-convert | SAME |
-fivopts | SAME |
-fcombine-stack-adjustments | SAME |
-fcompare-elim | SAME |
-fearly-inlining | SAME |
-fcaller-saves | SAME |
-fsel-sched-pipelining-outer-loops -fsel-sched-pipelining | SAME |
-fsched-spec-load-dangerous | SAME |
-fipa-sra | SAME |
-fomit-frame-pointer | 1.776 |
-floop-block | SAME |
-fshrink-wrap | SAME |
-fbranch-count-reg | SAME |
-floop-interchange | SAME |
-fthread-jumps | SAME |
-ffunction-cse | SAME |
-fipa-reference | SAME |
-fno-sched-spec | SAME |
-fno-strict-overflow | SAME |
-findirect-inlining -finline-functions | SAME |
-findirect-inlining -finline-small-functions | SAME |
-findirect-inlining -finline-functions | SAME |
-findirect-inlining -finline-small-functions | SAME |
-fkeep-inline-functions | SAME |
-fsched2-use-superblocks | SAME |
-fpeephole | SAME |
-fsched-dep-count-heuristic | SAME |
-fipa-pta | SAME |
-ftree-builtin-call-dce | SAME |
-fcse-skip-blocks | SAME |
-fsplit-wide-types | SAME |
-fkeep-static-consts | SAME |
-fno-zero-initialized-in-bss | SAME |
-ftree-sra | SAME |
-fwhole-program | 1.927 |
-fno-function-cse | SAME |
-ffat-lto-objects | SAME |
-fprofile-generate | 3.857 |
-ftree-loop-im | SAME |
-ftree-loop-im -funswitch-loops | SAME |
-fno-ira-share-spill-slots | SAME |
-ftree-vectorize | SAME |
-fmerge-all-constants | SAME |
-fno-align-jumps | SAME |
-fzero-initialized-in-bss | SAME |
-fno-align-functions | SAME |
-fno-defer-pop | SAME |
-fsched-group-heuristic | SAME |
-fipa-profile | SAME |
-fforward-propagate | SAME |
-ftree-dse | SAME |
-fno-keep-static-consts | SAME |
-fstrict-overflow | 1.919 |
-fstrict-overflow -fwrapv | SAME |
-ftree-dce | SAME |
-fno-peephole | SAME |
-fno-align-loops | SAME |
-fmodulo-sched-allow-regmoves -fmodulo-sched | SAME |
-fgcse-sm | SAME |
-fgcse-sm -fgcse-lm | SAME |
-freorder-blocks-and-partition | 1.935 |
-fno-delete-null-pointer-checks | SAME |
-ftree-fre | SAME |
-fno-gcse | SAME |
-ftree-loop-distribute-patterns | SAME |
-ftree-pta | SAME |
-fno-sched-interblock | SAME |
-fsched-spec-load | SAME |
-fpartial-inlining | SAME |
-ftree-slsr | SAME |
-funsafe-loop-optimizations | SAME |
-ftree-slp-vectorize | SAME |
-flto-report | SAME |
-flto-report -flto | 1.890 |
-ftree-partial-pre | SAME |
-ftree-loop-optimize | SAME |
-ftree-switch-conversion | SAME |
-fipa-cp-clone | SAME |
-fno-peephole2 | SAME |
-fsched-rank-heuristic | SAME |
-fselective-scheduling | SAME |
-fgcse-las | SAME |
-ftree-forwprop | SAME |
-fprofile-correction | SAME |
-fgcse-lm | SAME |
-fsched-spec-insn-heuristic | SAME |
-fno-toplevel-reorder | SAME |
-free | SAME |
-ftree-phiprop | SAME |
-flto-partition=1to1 | SAME |
-flto-partition=balanced | SAME |
-flto-partition=max | SAME |
-flto-partition=none | SAME |
-ftree-reassoc | SAME |
-fdce | SAME |
-floop-parallelize-all | SAME |
-fsched-critical-path-heuristic | SAME |
-foptimize-strlen | SAME |
-fira-hoist-pressure | SAME |
-fgcse-after-reload | SAME |
-fpredictive-commoning | SAME |
-ftree-ch | SAME |
-fdse | SAME |
-fvariable-expansion-in-unroller | SAME |
-freschedule-modulo-scheduled-loops | SAME |
-fdevirtualize | SAME |
-ftree-loop-ivcanon | SAME |
-fira-algorithm=priority | SAME |
-fira-algorithm=CB | SAME |
-fschedule-insns | SAME |
-fdelayed-branch | SAME |
-ftree-copy-prop | SAME |
-fira-region=all | SAME |
-fira-region=mixed | SAME |
-fira-region=one | SAME |
-ftree-loop-if-convert-stores | SAME |
-fsel-sched-pipelining -fselective-scheduling | SAME |
-fsel-sched-pipelining -fselective-scheduling2 | SAME |
-fsel-sched-pipelining -fselective-scheduling | SAME |
-fsel-sched-pipelining -fselective-scheduling2 | SAME |
-fsplit-ivs-in-unroller | SAME |
-fsplit-ivs-in-unroller -fweb | SAME |
-fno-ira-share-save-slots | SAME |
-flto-compression-level=0 | SAME |
-flto-compression-level=1 | SAME |
-flto-compression-level=2 | SAME |
-flto-compression-level=3 | SAME |
-flto-compression-level=4 | SAME |
-flto-compression-level=5 | SAME |
-flto-compression-level=6 | SAME |
-flto-compression-level=7 | SAME |
-flto-compression-level=8 | SAME |
-flto-compression-level=9 | SAME |
-flto | 1.906 |
-flto -fuse-linker-plugin | 1.904 |
-fno-align-labels | SAME |
-fdelete-null-pointer-checks | SAME |
-ftree-bit-ccp -ftree-ccp | SAME |
-finline-functions | SAME |
-ftree-loop-linear | SAME |
-fprefetch-loop-arrays | SAME |
-foptimize-sibling-calls | SAME |
-freorder-functions | SAME |
-ftree-dominator-opts | SAME |
-fgcse | SAME |
-finline-functions-called-once | SAME |
-fguess-branch-probability | SAME |
-fhoist-adjacent-loads | SAME |
-fipa-pure-const | SAME |
-fno-branch-count-reg | SAME |
-fstrict-aliasing | SAME |
-fmerge-constants | SAME |
-floop-nest-optimize | SAME |
-fschedule-insns2 | SAME |
-fipa-cp | SAME |
-fcprop-registers | SAME |
-floop-strip-mine | SAME |
-fno-inline | SAME |
-ftree-pre | SAME |
-fcse-follow-jumps | SAME |
-fno-merge-constants | SAME |
-fgraphite-identity | SAME |
-fno-guess-branch-probability | SAME |
-ftree-tail-merge | SAME |
-fexpensive-optimizations | SAME |
-fselective-scheduling2 | SAME |
-fira-loop-pressure | SAME |
-freorder-blocks | SAME |
-ftree-loop-distribution | SAME |
-fweb | SAME |
-fuse-linker-plugin | SAME |
-fpeephole2 | SAME |
-fsched-last-insn-heuristic | SAME |
-finline-small-functions | SAME |
-fif-conversion2 | SAME |
Created
September 23, 2015 22:57
-
-
Save fffonion/57039efab2e66a7ee3ad to your computer and use it in GitHub Desktop.
testing gcc optimization options
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
args = { | |
'fine_tuning': | |
{ | |
"-fno-defer-pop":None, | |
"-fforward-propagate":None, | |
"-ffp-contract=style":{ | |
'p_range':['style', 'off', 'fast'] | |
}, | |
"-fomit-frame-pointer":None, | |
"-foptimize-sibling-calls":None, | |
"-foptimize-strlen":None, | |
"-fno-inline":None, | |
"-finline-small-functions":None, | |
"-findirect-inlining":{ | |
'd_or':[ | |
'-finline-functions' ,'-finline-small-functions' | |
] | |
}, | |
"-finline-functions":None, | |
"-finline-functions-called-once":None, | |
"-fearly-inlining":None, | |
"-fprofile-generate":None, | |
"-fipa-sra":None, | |
"-finline-limit":{ | |
'p_type':(int, None) | |
}, | |
"-fno-keep-inline-dllexport":None, | |
"-fkeep-inline-functions":None, | |
"-fkeep-static-consts":None, | |
"-fno-keep-static-consts":None, | |
"-fmerge-constants":None, | |
"-fno-merge-constants":None, | |
"-fmerge-all-constants":None, | |
"-fmodulo-sched":None, | |
"-fmodulo-sched-allow-regmoves" :{ | |
'd_and':["-fmodulo-sched"] | |
}, | |
"-fno-branch-count-reg":None, | |
"-fbranch-count-reg":None, | |
"-fno-function-cse":None, | |
"-ffunction-cse":None, | |
"-fno-zero-initialized-in-bss":None, | |
"-fzero-initialized-in-bss":None, | |
"-fthread-jumps":None, | |
"-fsplit-wide-types":None, | |
"-fcse-follow-jumps":None, | |
"-fcse-skip-blocks":None, | |
"-frerun-cse-after-loop":None, | |
"-fgcse":None, | |
"-fno-gcse":None, | |
"-fgcse-lm":None, | |
"-fgcse":None, | |
"-fgcse-sm":{ | |
'd_opt':["-fgcse-lm"] | |
}, | |
"-fgcse-las":None, | |
"-fgcse-after-reload":None, | |
"-faggressive-loop-optimizations":None, | |
"-funsafe-loop-optimizations":None, | |
"-fcrossjumping":None, | |
"-fauto-inc-dec":None, | |
"-fdce":None, | |
"-fdse":None, | |
"-fif-conversion":None, | |
"-fif-conversion2":None, | |
"-fdeclone-ctor-dtor":None, | |
"-fdelete-null-pointer-checks":None, | |
"-fno-delete-null-pointer-checks":None, | |
"-fdevirtualize":None, | |
"-fdevirtualize-speculatively":None, | |
"-fdevirtualize-at-ltrans":None, | |
"-fexpensive-optimizations":None, | |
"-free":None, | |
"-fno-lifetime-dse":None, | |
"-flive-range-shrinkage":None, | |
"-fira-algorithm":{ | |
'p_range':['priority', 'CB'] | |
}, | |
"-fira-region":{ | |
'p_range':['all', 'mixed', 'one'] | |
}, | |
"-fira-hoist-pressure":None, | |
"-fira-loop-pressure":None, | |
"-fno-ira-share-save-slots":None, | |
"-fno-ira-share-spill-slots":None, | |
"-fira-verbose":{ | |
'p_type':[int, 5] | |
}, | |
"-flra-remat":None, | |
"-fdelayed-branch":None, | |
"-fschedule-insns":None, | |
"-fschedule-insns2":None, | |
"-fno-sched-interblock":None, | |
"-fno-sched-spec":None, | |
"-fsched-pressure":None, | |
"-fsched-spec-load":None, | |
"-fsched-spec-load-dangerous":None, | |
"-fsched-stalled-insns":{ | |
'p_type':[int, 1] | |
}, | |
"-fsched-stalled-insns-dep":{ | |
'p_type':[int, 1], | |
}, | |
"-fsched2-use-superblocks":None, | |
"-fsched-group-heuristic":None, | |
"-fsched-critical-path-heuristic":None, | |
"-fsched-spec-insn-heuristic":None, | |
"-fsched-rank-heuristic":None, | |
"-fsched-last-insn-heuristic":None, | |
"-fsched-dep-count-heuristic":None, | |
"-freschedule-modulo-scheduled-loops":None, | |
"-fselective-scheduling":None, | |
"-fselective-scheduling2":None, | |
"-fsel-sched-pipelining":{ | |
'd_or':["-fselective-scheduling", "-fselective-scheduling2"] | |
}, | |
"-fsel-sched-pipelining-outer-loops":{ | |
'd_and':["-fsel-sched-pipelining"] | |
}, | |
"-fsemantic-interposition":None, | |
"-fno-semantic-interposition":None, | |
"-fshrink-wrap":None, | |
"-fcaller-saves":None, | |
"-fcombine-stack-adjustments":None, | |
"-fipa-ra":None, | |
"-fconserve-stack":None, | |
"-ftree-reassoc":None, | |
"-ftree-pre":None, | |
"-ftree-partial-pre":None, | |
"-ftree-forwprop":None, | |
"-ftree-fre":None, | |
"-ftree-phiprop":None, | |
"-fhoist-adjacent-loads":None, | |
"-ftree-copy-prop":None, | |
"-fipa-pure-const":None, | |
"-fipa-reference":None, | |
"-fipa-pta":None, | |
"-fipa-profile":None, | |
"-fipa-cp":None, | |
"-fipa-cp-clone":None, | |
"-fipa-cp-alignment":{ | |
'd_and':["-fipa-cp"] | |
}, | |
"-fipa-icf":None, | |
"-fisolate-erroneous-paths-dereference":{ | |
'd_and':["-fdelete-null-pointer-checks"] | |
}, | |
"-fisolate-erroneous-paths-attribute":None, | |
"-ftree-sink":None, | |
"-ftree-bit-ccp":{ | |
'd_and':["-ftree-ccp"] | |
}, | |
"-fssa-phiopt":None, | |
"-ftree-switch-conversion":None, | |
"-ftree-tail-merge":None, | |
"-ftree-dce":None, | |
"-ftree-builtin-call-dce":None, | |
"-ftree-dominator-opts":None, | |
"-ftree-dse":None, | |
"-ftree-ch":None, | |
"-ftree-loop-optimize":None, | |
"-ftree-loop-linear":None, | |
"-floop-interchange":None, | |
"-floop-strip-mine":None, | |
"-floop-block":None, | |
"-floop-unroll-and-jam":None, | |
"-floop-nest-optimize":None, | |
"-fgraphite-identity":None, | |
"-floop-nest-optimize":None, | |
"-floop-parallelize-all":None, | |
"-ftree-coalesce-vars":{ | |
'c_and':["-fno-var-tracking-assignment"] | |
}, | |
"-ftree-loop-if-convert":None, | |
"-ftree-loop-if-convert-stores":None, | |
"-ftree-loop-distribution":None, | |
"-ftree-loop-distribute-patterns":None, | |
"-ftree-loop-im":{ | |
'd_opt':["-funswitch-loops"] | |
}, | |
"-ftree-loop-ivcanon":None, | |
"-fivopts":None, | |
"-ftree-parallelize-loops":{ | |
'p_type':[int, 1] | |
}, | |
"-ftree-pta":None, | |
"-ftree-sra":None, | |
"-ftree-ter":None, | |
"-ftree-slsr":None, | |
"-ftree-vectorize":None, | |
"-ftree-loop-vectorize":None, | |
"-ftree-vectorize":None, | |
"-ftree-slp-vectorize":None, | |
"-fvect-cost-model":{ | |
'p_range':["unlimited", "dynamic", "cheap"] | |
}, | |
"-fsimd-cost-model":{ | |
'p_range':["unlimited", "dynamic", "cheap"] | |
}, | |
"-ftree-vrp":{ | |
'd_opt':["-fdelete-null-pointer-checks"] | |
}, | |
"-fsplit-ivs-in-unroller":{ | |
'd_opt':['-fweb'] | |
}, | |
"-fvariable-expansion-in-unroller":None, | |
"-fpartial-inlining":None, | |
"-fpredictive-commoning":None, | |
"-fprefetch-loop-arrays":None, | |
"-fno-peephole":None, | |
"-fno-peephole2":None, | |
"-fpeephole":None, | |
"-fpeephole2":None, | |
"-fno-guess-branch-probability":None, | |
"-fguess-branch-probability":None, | |
"-freorder-blocks":None, | |
"-freorder-blocks-and-partition":None, | |
"-freorder-functions":None, | |
"-fstrict-aliasing":None, | |
"-fields":None, | |
"-fstrict-aliasing":None, | |
"-fstrict-overflow":{ | |
'd_opt':['-fwrapv'] | |
}, | |
"-fno-strict-overflow":None, | |
"-falign-functions":{ | |
'p_type':[int, None], | |
'p_remark':'should be power of 2' | |
}, | |
"-fno-align-functions":None, | |
"-falign-labels":{ | |
'p_type':[int, None], | |
'p_remark':'should be power of 2' | |
}, | |
"-fno-align-labels":None, | |
"-falign-loops":{ | |
'p_type':[int, None], | |
'p_remark':'should be power of 2' | |
}, | |
"-fno-align-loops":None, | |
"-falign-jumps":{ | |
'p_type':[int, None], | |
'p_remark':'should be power of 2' | |
}, | |
"-fno-align-jumps":None, | |
"-fno-toplevel-reorder":None, | |
"-fweb":None, | |
"-fwhole-program":None, | |
"-flto":{ | |
'd_opt':["-fuse-linker-plugin"] | |
},# needs deeper investigation | |
"-fno-lto":None, | |
"-flto-partition":{ | |
'p_range':["1to1", "balanced", "max", "none"] | |
}, | |
"-flto-odr-type-merging":None, | |
"-flto-compression-level":{ | |
'p_range':[0,1,2,3,4,5,6,7,8,9,None], | |
'd_and':["-flto"] | |
}, | |
"-flto-report":{ | |
'd_opt':["-flto"] | |
}, | |
"-flto-report-wpa":None, | |
"-fuse-linker-plugin":None, | |
"-ffat-lto-objects":None, | |
"-fcompare-elim":None, | |
"-fcprop-registers":None, | |
"-fprofile-correction":None, | |
#"-fprofile-dir=path":None, | |
#"-fprofile-generate=path":None, | |
#"-fprofile-use=path":None, | |
#"-fauto-profile=path":None, | |
}, | |
"float_tuning":{ | |
"-ffloat-store":None, | |
"-fexcess-precision=standard":{ | |
'c_or':["-ffast-math", "-funsafe-math-optimizations"] | |
}, | |
"-fexcess-presision=fast":None, | |
"-ffast-math":None, | |
"-fno-math-errno":None, | |
"-fmath-errno":None, | |
"-funsafe-math-optimizations":None, | |
"-fassociative-math":None, | |
"-fno-associative-math":None, | |
"-ffinite-math-only":None, | |
"-fsigned-zeros":None, | |
"-fno-signed-zeros":None, | |
"-ftrapping-math":None, | |
"-fno-trapping-math":None, | |
"-frounding-math":None, | |
"-fno-rounding-math":None, | |
"-fsignaling-nans":None, | |
"-fno-signaling-nans":None, | |
"-fsingle-precision-constant":None, | |
"-fcx-limited-range":None, | |
"-fno-cx-limited-range":None, | |
"-ffast-math":None, | |
#"-fcx-fortran-rules":None, | |
#"-fno-cx-fortran-rules":None, | |
}, | |
"pgo":{ | |
"-fbranch-probabilities":None, | |
"-fprofile-generate" | |
# "-fprofile-arcs":None, | |
# "-fprofile-values":None, | |
# "-fprofile-use":None, | |
# # "-fprofile-reorder-functions":None, | |
# # "-ftracer":None, | |
# # "-funroll-loops":None, | |
# # "-funroll-all-loops":None, | |
# # "-fpeel-loops":None, | |
# # # "-frename-registers":None, | |
# "-fvpt":{ | |
# 'd_opt':["-fprofile-arcs"] | |
#}, | |
"-funswitch-loops":None, | |
"-ffunction-sections":None, | |
"-fdata-sections":None, | |
"-fbranch-target-load-optimize":None, | |
"-fbranch-target-load-optimize2":None, | |
"-fbtr-bb-exclusive":None, | |
"-fstack-protector":None, | |
"-fstack-protector-all":None, | |
"-fstack-protector-strong":None, | |
"-fstack-protector-explicit":None, | |
"-fstdarg-opt":None, | |
"-fsection-anchors":None, | |
# "-fschedule-fusion":None, | |
# "-fmove-loop-invariants":None, | |
} | |
} | |
import os | |
import subprocess | |
import time | |
import hashlib | |
SAMPLE_TIME = 10 | |
CC = "gcc" | |
SOURCE = './drystone.c' | |
OUTFILE = './drystone' | |
ASMFILE = './drystone.s' | |
RESULT_FILE = './result.md' | |
DEFAULT_ARG = ' ' | |
ignored_sections = ['float_tuning', 'pgo'] | |
def popen(*args): | |
return subprocess.Popen( | |
args, | |
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
def _md5(b): | |
return hashlib.md5(b).hexdigest() | |
resf = open(RESULT_FILE, 'w', False) | |
resf.write(''' | |
args| run_time| | |
------|------ | |
''') | |
baseline_hash = '' | |
def compile_and_timeit(*arg): | |
global baseline_hash | |
_args = [CC, SOURCE] + list(arg) + ["-o", OUTFILE] | |
_args_s = ' '.join(_args[2:-2]) | |
p = popen(*_args) | |
p.wait() | |
if p.returncode: | |
cc_err = p.communicate()[1] | |
print("err %s" % cc_err) | |
return | |
else: | |
print('compiled using arg: "%s"' % _args_s) | |
run_time = 0 | |
# test file if is same | |
fhash = _md5(open(OUTFILE, 'rb').read()) | |
if '-Dfoo' in _args: | |
baseline_hash = fhash | |
print('baseline hash = %s' % baseline_hash) | |
else: | |
if fhash == baseline_hash: | |
print('arg: "%s" exact same as baseline' % _args_s) | |
resf.write("%s|SAME\n" % _args_s) | |
return | |
for i in range(SAMPLE_TIME): | |
_t1 = time.time() | |
try: | |
p = popen(OUTFILE) | |
except OSError as ex: | |
if errno == 2: | |
continue | |
else: | |
p.wait() | |
run_time += time.time() - _t1 | |
print('arg: "%s" reaches %.3fs' % (_args_s, run_time / SAMPLE_TIME)) | |
resf.write("%s|%.3f\n" %(_args_s, run_time / SAMPLE_TIME)) | |
for sec in args: | |
if sec in ignored_sections: | |
continue | |
print("Testing sec %s" % sec) | |
for arg in ['-Dfoo'] + args[sec].keys(): | |
try: | |
os.remove(OUTFILE) | |
except: | |
pass | |
meta = args[sec][arg] if arg!="-Dfoo" else None | |
if meta: | |
if 'd_opt' in meta or 'd_or' in meta: | |
if 'd_opt' in meta: | |
compile_and_timeit(arg) | |
k = 'd_opt' | |
else: | |
k = 'd_or' | |
for a in meta[k]: | |
compile_and_timeit(arg, a) | |
if 'd_and' in meta: | |
compile_and_timeit(arg, *meta['d_and']) | |
if 'd_or' in meta: | |
for a in meta['d_or']: | |
compile_and_timeit(arg, a) | |
if 'p_range' in meta: | |
for p in meta['p_range']: | |
compile_and_timeit('%s=%s' % (arg, p)) | |
if 'p_type' in meta: | |
continue | |
# TODO c_or for conflicts | |
else: | |
compile_and_timeit(arg) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment