Skip to content

Instantly share code, notes, and snippets.

@ChrisCummins
Last active February 9, 2022 17:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ChrisCummins/d4386fd043c1edbeb4da75789e277a9f to your computer and use it in GitHub Desktop.
Save ChrisCummins/d4386fd043c1edbeb4da75789e277a9f to your computer and use it in GitHub Desktop.
Evaluating binary sizes of CHStone benchmarks when compiled using different combinations of clang / opt flags
"""Script to evaluate binary sizes of CHStone benchmarks when compiled using
different combinations of clang / opt flags.
Requires latest CompilerGym:
python -m pip install compiler_gym -U
Usage:
python opt_wtf.py
Output on my machine:
List of compilation methods used:
01 = clang $< -o $@ -Oz
02 = clang $< -o a.bc -Oz -emit-llvm -c ; clang -Oz a.bc -o $@
03 = clang $< -o a.bc -Oz -emit-llvm -c ; opt -Oz a.bc -o b.bc ; clang -Oz b.bc -o $@
04 = clang $< -o a.bc -Oz -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; clang a.bc -Oz -o $@
05 = clang $< -o a.bc -Oz -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; opt -Oz a.bc -o b.bc ; clang b.bc -o $@ -Oz
06 = clang $< -o a.bc -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; clang a.bc -o $@ -Oz
07 = clang $< -o a.bc -Oz -emit-llvm -c ; clang a.bc -o $@
08 = clang $< -o a.bc -Oz -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; opt -Oz a.bc -o out.bc ; clang out.bc -o $@
Size of benchmarks when compiled using each method (in bytes):
mips gsm adpcm motion dfadd dfmul dfdiv aes blowfish dfsin sha jpeg
Method 01: 2760 5048 6842 7523 5399 4463 4863 11361 23320 7229 18804 31329
Method 02: 2760 4968 6842 7523 5399 4439 4863 11225 23320 7229 18836 31321
Method 03: 2760 4968 6842 7523 5399 4439 4863 11225 23320 7229 18836 31321
Method 04: 2760 5048 6842 7523 5399 4463 4863 11361 23320 7229 18804 31329
Method 05: 2760 4968 6842 7523 5399 4439 4863 11225 23320 7229 18804 31321
Method 06: 3952 7416 9226 8635 6919 5775 6567 16765 27048 9933 19708 35460
Method 07: 5360 8000 9378 9043 7831 6135 7279 15793 26669 12261 20156 36825
Method 08: 5360 8336 9378 9043 7831 6135 7279 15993 26765 12261 20172 36825
Size deltas of each method, relative to Method 01 (in bytes):
mips gsm adpcm motion dfadd dfmul dfdiv aes blowfish dfsin sha jpeg
Method 01: 0 0 0 0 0 0 0 0 0 0 0 0
Method 02: 0 -80 0 0 0 -24 0 -136 0 0 32 -8
Method 03: 0 -80 0 0 0 -24 0 -136 0 0 32 -8
Method 04: 0 0 0 0 0 0 0 0 0 0 0 0
Method 05: 0 -80 0 0 0 -24 0 -136 0 0 0 -8
Method 06: 1192 2368 2384 1112 1520 1312 1704 5404 3728 2704 904 4131
Method 07: 2600 2952 2536 1520 2432 1672 2416 4432 3349 5032 1352 5496
Method 08: 2600 3288 2536 1520 2432 1672 2416 4632 3445 5032 1368 5496
"""
import shlex
import compiler_gym
from pathlib import Path
import subprocess
import pandas as pd
from compiler_gym.util.shell_format import emph
import tempfile
compiler = "clang-10"
opt = "opt-10"
def print_size_of_gcc_chstone_benchmarks():
sizes = []
with compiler_gym.make("gcc-v0") as env:
for bm in env.datasets["benchmark://chstone-v0"]:
env.reset(benchmark=bm)
sizes.append((env.benchmark, env.observation.obj_size()))
sizes.sort(key=lambda x: x[-1])
for bm, size in sizes:
print(bm, size)
def write_preprocessed_chstone_benchmark_sources_to_file(outdir: Path):
outdir.mkdir(exist_ok=True, parents=True)
with compiler_gym.make("gcc-v0") as env:
for bm in env.datasets["benchmark://chstone-v0"]:
env.reset(benchmark=bm)
outpath = f"{outdir}{env.benchmark.uri.path}.c"
print(outpath, flush=True)
src = env.benchmark.proto.program.contents.decode("utf-8")
with open(outpath, "w") as f:
print(src, file=f)
def enumerate_benchmark_names(outdir: Path):
for file in outdir.iterdir():
if file.suffix == ".c":
yield file.stem
def size_of_text_section(binary: Path) -> int:
"""Return the size of the .TEXT section for a binary."""
stdout = subprocess.check_output(
["llvm-size-10", str(binary)], universal_newlines=True
)
return int(stdout.split("\n")[-2].split()[0])
def main():
outdir = Path("chstone")
if not (outdir / "adpcm.c").is_file():
write_preprocessed_chstone_benchmark_sources_to_file(outdir)
benchmarks = list(enumerate_benchmark_names(outdir))
methods = []
rows = []
print(emph("List of compilation methods used:"))
def method(command_line: str):
def _run_one(cmd: str, benchmark: str):
assert (outdir / f"{benchmark}.c").is_file()
with tempfile.TemporaryDirectory() as d:
d = Path(d)
commands = cmd.split(";")
for command in commands:
args = shlex.split(command)
# Substitute variables and binary names.
args = [compiler if c == "clang" else c for c in args]
args = [opt if c == "opt" else c for c in args]
args = [str(outdir / benchmark) + ".c" if c == "$<" else c for c in args]
args = [str(d / "a.out") if c == "$@" else c for c in args]
subprocess.check_call(args, timeout=60)
return size_of_text_section(str(d / "a.out"))
methods.append(command_line)
print(f"{len(methods):02d} = {command_line}")
rows.append({
bm: _run_one(command_line, bm)
for bm in benchmarks
})
def finish():
deltas = []
for row in rows:
deltas.append({k: v - rows[0][k] for k, v in row.items()})
index = [f"Method {i:02}:" for i, _ in enumerate(methods, start=1)]
sizes = pd.DataFrame(rows, index=index)
deltas = pd.DataFrame(deltas, index=index)
print()
print(emph("Size of benchmarks when compiled using each method (in bytes):"))
print(sizes)
print()
print(emph("Size deltas of each method, relative to Method 01 (in bytes):"))
print(deltas)
method("clang $< -o $@ -Oz")
method("clang $< -o a.bc -Oz -emit-llvm -c ; clang -Oz a.bc -o $@")
method("clang $< -o a.bc -Oz -emit-llvm -c ; opt -Oz a.bc -o b.bc ; clang -Oz b.bc -o $@")
method("clang $< -o a.bc -Oz -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; clang a.bc -Oz -o $@")
method("clang $< -o a.bc -Oz -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; opt -Oz a.bc -o b.bc ; clang b.bc -o $@ -Oz")
method("clang $< -o a.bc -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; clang a.bc -o $@ -Oz")
method("clang $< -o a.bc -Oz -emit-llvm -c ; clang a.bc -o $@")
method("clang $< -o a.bc -Oz -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; opt -Oz a.bc -o out.bc ; clang out.bc -o $@")
finish()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment