Skip to content

Instantly share code, notes, and snippets.

@sztomi
Created March 13, 2024 22:30
Show Gist options
  • Save sztomi/2f56a8d1b2974e932b3cf7b31e038e0d to your computer and use it in GitHub Desktop.
Save sztomi/2f56a8d1b2974e932b3cf7b31e038e0d to your computer and use it in GitHub Desktop.
PGO mystery
#! /usr/bin/env python3
import json
import os
import shlex
import subprocess as sp
import time
from contextlib import contextmanager
from pathlib import Path
from tempfile import TemporaryDirectory
@contextmanager
def chdir(path: Path):
cwd = Path.cwd()
os.chdir(path)
yield
os.chdir(cwd)
def commands(path: str) -> list[dict]:
print("Loading commands...")
path = Path(path)
data = json.loads(path.read_text())
for cmd in data:
command = shlex.split(cmd["command"])
cmd["command"] = command[1:]
return data
def benchmark_single_cmd(cmd: list[str]) -> float:
output = Path(cmd[cmd.index("-o") + 1])
output.parent.mkdir(exist_ok=True, parents=True)
input_path = Path(cmd[cmd.index("-c") + 1])
print(f"Benchmarking {input_path.name} compilation...", end=" ", flush=True)
#print(f"Command: {' '.join(cmd)}")
def _measure():
if output.exists():
output.unlink()
start = time.perf_counter_ns()
sp.run(cmd, check=False, stdout=sp.DEVNULL, stderr=sp.DEVNULL)
end = time.perf_counter_ns()
return (end - start) / 1e9
# Warmup
for _ in range(3):
_measure()
vals = sorted(_measure() for _ in range(10))
vals = vals[2:-2]
mean = sum(vals) / len(vals)
print(f"Mean: {mean:.3f}s")
return mean
def benchmark(compiler: Path, db: list[dict], count: int = 20):
total = 0
with TemporaryDirectory() as tmpdir:
for i in range(count):
cmd = db[i]
cmd = [str(compiler.resolve()), *cmd["command"]]
with chdir(tmpdir):
total += benchmark_single_cmd(cmd)
return total
if __name__ == "__main__":
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("compile_commands", type=commands, help="Path to compile_commands.json file")
parser.add_argument("compiler", type=Path, help="Path to compiler")
parser.add_argument("-c", "--count", type=int, default=20, help="Number of commands to benchmark")
args = parser.parse_args()
total = benchmark(args.compiler, args.compile_commands, args.count)
print(f"Total: {total:.3f}s")
common:
clang-common-defs: &clang-common-defs
CLANG_DEFAULT_LINKER: lld
CLANG_DEFAULT_OBJCOPY: llvm-objcopy
CLANG_DEFAULT_CXX_STDLIB: libc++
CLANG_DEFAULT_UNWINDLIB: libunwind
CLANG_VENDOR_UTI: com.knit
CLANG_DEFAULT_RTLIB: compiler-rt
BOOSTRAP_LLVM_ENABLE_LLD: ON
BOOSTRAP_LLVM_ENABLE_LTO: ON
CLANG_ENABLE_BOOTSTRAP: ON
LLVM_BUILD_TESTS: OFF
LLVM_ENABLE_ASSERTIONS: OFF
LLVM_ENABLE_BINDINGS: OFF
LLVM_ENABLE_LIBXML2: OFF
LLVM_ENABLE_LIBCXX: OFF
LLVM_ENABLE_NEW_PASS_MANAGER: ON
LLVM_ENABLE_TERMINFO: OFF
LLVM_ENABLE_THREADS: ON
LLVM_OPTIMIZED_TABLEGEN: ON
LLVM_INCLUDE_BENCHMARKS: OFF
LLVM_INCLUDE_EXAMPLES: OFF
LLVM_INCLUDE_DOCS: OFF
LLVM_INCLUDE_TESTS: OFF
LLVM_USE_SANITIZER: OFF
LLVM_STATIC_LINK_CXX_STDLIB: ON
ZLIB_ROOT: ${package_dir("zlib-bootstrap")}
zstd_ROOT: ${package_dir("zstd-bootstrap")}
clang-cflags: &clang-cflags
CMAKE_C_FLAGS: -mtune=ivybridge
CMAKE_CXX_FLAGS: -mtune=ivybridge -fnew-infallible
builtins-common: &builtins-common
CMAKE_SYSTEM_NAME: Linux
CMAKE_BUILD_TYPE: Release
x86_64: &x86_64
CMAKE_C_COMPILER_TARGET: x86_64-knit-linux-gnu
CMAKE_CXX_COMPILER_TARGET: x86_64-knit-linux-gnu
CMAKE_ASM_COMPILER_TARGET: x86_64-knit-linux-gnu
CMAKE_SYSTEM_PROCESSOR: x86_64
CMAKE_SYSROOT: ${package_dir("gcc-sysroots")}/x86_64-knit-linux-gnu/x86_64-knit-linux-gnu/sysroot
aarch64: &aarch64
CMAKE_C_COMPILER_TARGET: aarch64-knit-linux-gnu
CMAKE_CXX_COMPILER_TARGET: aarch64-knit-linux-gnu
CMAKE_ASM_COMPILER_TARGET: aarch64-knit-linux-gnu
CMAKE_SYSTEM_PROCESSOR: aarch64
CMAKE_SYSROOT: ${package_dir("gcc-sysroots")}/aarch64-knit-linux-gnu/aarch64-knit-linux-gnu/sysroot
arm: &arm
CMAKE_C_COMPILER_TARGET: arm-knit-linux-gnueabihf
CMAKE_CXX_COMPILER_TARGET: arm-knit-linux-gnueabihf
CMAKE_ASM_COMPILER_TARGET: arm-knit-linux-gnueabihf
CMAKE_SYSTEM_PROCESSOR: armv7hf
CMAKE_SYSROOT: ${package_dir("gcc-sysroots")}/arm-knit-linux-gnueabihf/arm-knit-linux-gnueabihf/sysroot
compiler-rt: &compiler-rt
COMPILER_RT_BUILD_SANITIZERS: OFF
COMPILER_RT_BUILD_XRAY: OFF
COMPILER_RT_BUILD_LIBFUZZER: OFF
COMPILER_RT_BUILD_MEMPROF: OFF
COMPILER_RT_BUILD_ORC: OFF
COMPILER_RT_BUILD_GWP_ASAN: OFF
COMPILER_RT_BUILD_PROFILE: ON
libcxx: &libcxx
LIBCXX_CXX_ABI: libcxxabi
LIBCXX_USE_COMPILER_RT: ON
LIBCXX_ABI_UNSTABLE: ON
LIBCXX_ABI_VERSION: "2"
LIBCXX_ENABLE_STATIC_ABI_LIBRARY: ON
libunwind: &libunwind
LIBUNWIND_ENABLE_SHARED: ON
LIBUNWIND_ENABLE_STATIC: ON
clang-bolt:
CMAKE_EXE_LINKER_FLAGS: --emit-relocs
CMAKE_SHARED_LINKER_FLAGS: --emit-relocs
packages:
- package: clang-stage1
kind: cmake-llvm # special package kind that knows about runtumes, builtins etc.
requires: zlib-bootstrap, zstd-bootstrap, gcc-sysroots, cmake, ninja
source-dir: llvm-project/llvm
env:
BUILD_DIR: ${build_dir("clang-stage1")}
PACKAGE_DIR: ${package_dir("clang-stage1")}
export-env:
+PATH: "${package_dir('clang-stage1')}/bin:"
build-targets: bolt, install-distribution-stripped
# not using the toolchain because it gets passed down to the runtime builds which is not
# what we want. We need to use the just-built clang to build the runtimes which is capable
# of building for the targets.
# toolchain: gcc-toolchain
defs:
<<: [*clang-common-defs, *x86_64, *gcc-toolchain]
CMAKE_BUILD_TYPE: Release
CMAKE_C_FLAGS_INIT: -fno-reorder-blocks-and-partition -static-libgcc
CMAKE_CXX_FLAGS_INIT: -fno-reorder-blocks-and-partition -static-libgcc
PACKAGE_VENDOR: knit-Bootstrap
CLANG_LINKS_TO_CREATE: clang++;clang-cpp
LLVM_CCACHE_BUILD: ON
LLVM_ENABLE_ZLIB: FORCE_ON
LLVM_ENABLE_ZSTD: FORCE_ON
triples:
- x86_64-knit-linux-gnu
- aarch64-knit-linux-gnu
- arm-knit-linux-gnueabihf
targets:
- X86
- ARM
- AArch64
distribution-components:
- clang-resource-headers
- clang
- lld
- llvm-ar
- bolt
- llvm-config
- llvm-nm
- llvm-objcopy
- llvm-profdata
- llvm-ranlib
- llvm-strip
- llvm-tblgen
- llvm-strip
- llvm-readelf
- llvm-windres
- merge-fdata
- LTO
- llvm-lipo
- runtimes
- builtins
projects:
- clang
- lld
- bolt
runtimes:
- compiler-rt
- libcxx
- libcxxabi
- libunwind
create-links: true # create <target>-clang and <target>-clang++ links
after-install:
- ${SHIM} cp $BUILD_DIR/bin/clang-tblgen $PACKAGE_DIR/bin/clang-tblgen
compiler-configs:
common: &configs-common |
-Wl,-rpath,<CFGDIR>/../lib/
-resource-dir <CFGDIR>/../lib/clang/18
-Qunused-arguments
-pie
-fPIC
x86_64-knit-linux-gnu:
- -Wl,-rpath,<CFGDIR>/../lib/x86_64-knit-linux-gnu
- *configs-common
aarch64-knit-linux-gnu:
- -Wl,-rpath,<CFGDIR>/../lib/aarch64-knit-linux-gnu
- *configs-common
arm-knit-linux-gnueabihf:
- -Wl,-rpath,<CFGDIR>/../lib/arm-knit-linux-gnueabihf
- *configs-common
llvm:
builtins:
x86_64-knit-linux-gnu:
<<: [*builtins-common, *x86_64]
aarch64-knit-linux-gnu:
<<: [*builtins-common, *aarch64]
arm-knit-linux-gnueabihf:
<<: [*builtins-common, *arm]
runtimes:
x86_64-knit-linux-gnu:
<<: [*x86_64, *compiler-rt, *libcxx, *libunwind]
aarch64-knit-linux-gnu:
<<: [*aarch64, *compiler-rt, *libcxx, *libunwind]
arm-knit-linux-gnueabihf:
<<: [*arm, *compiler-rt, *libcxx, *libunwind]
- package: rpmalloc
kind: shell
requires: clang-stage1
source-dir: rpmalloc
env:
BUILD_DIR: ${build_dir("rpmalloc")}
PACKAGE_DIR: ${package_dir("rpmalloc")}
SOURCE_DIR: ${source_dir("rpmalloc")}
CC: ${package_dir("clang-stage1")}/bin/${TARGET}-clang
CXX: ${package_dir("clang-stage1")}/bin/${TARGET}-clang++
AR: ${package_dir("clang-stage1")}/bin/llvm-ar
CFLAGS: -Wno-unsafe-buffer-usage
CXXFLAGS: -Wno-unsafe-buffer-usage
script:
- python configure.py --toolchain clang --lto --target linux --host linux
- ninja
- package: clang-stage2-instrumented
kind: cmake-llvm
requires:
- zlib-bootstrap, zstd-bootstrap, clang-stage1, cmake, ninja
- if ${build.os == "Windows"}: rpmalloc
source-dir: llvm-project/llvm
#toolchain: clang-stage1
export-env:
+PATH: "${package_dir('clang-stage2-instrumented')}/bin:"
build-targets: install-distribution-stripped
defs:
<<: [*clang-common-defs, *clang-stage1, *clang-cflags]
CMAKE_BUILD_TYPE: Release
CMAKE_CXX_FLAGS_INIT: -mllvm -vp-counters-per-site=2
CMAKE_EXE_LINKER_FLAGS_INIT: -Wl,--emit-relocs,-znow
PACKAGE_VENDOR: knit-Instrumented
LLVM_BUILD_INSTRUMENTED: IR
CLANG_LINKS_TO_CREATE: clang++;clang-cpp
CLANG_ENABLE_BOOTSTRAP: OFF
LLVM_CCACHE_BUILD: ON
LLVM_ENABLE_ZLIB: FORCE_ON
LLVM_ENABLE_ZSTD: FORCE_ON
LLVM_ENABLE_LTO: Thin
LLVM_PROFILE_DATA_DIR: ${build_dir("pgo-data")}/profiles
LLVM_TABLEGEN: ${package_dir("clang-stage1")}/bin/llvm-tblgen
CLANG_TABLEGEN: ${package_dir("clang-stage1")}/bin/clang-tblgen
LLVM_DEFAULT_TARGET_TRIPLE: x86_64-knit-linux-gnu
triples:
- x86_64-knit-linux-gnu
targets:
- Native
distribution-components:
- clang-resource-headers
- clang
- lld
- llvm-ar
- llvm-config
- llvm-nm
- llvm-objcopy
- llvm-profdata
- llvm-ranlib
- llvm-strip
- llvm-tblgen
- llvm-strip
- llvm-readelf
- llvm-windres
- LTO
- llvm-lipo
- runtimes
- builtins
projects:
- clang
- lld
runtimes:
- compiler-rt
- libcxx
- libcxxabi
- libunwind
compiler-configs:
x86_64-knit-linux-gnu:
- -Wl,-rpath,<CFGDIR>/../lib/
- -Wl,-rpath,<CFGDIR>/../lib/x86_64-knit-linux-gnu
- -resource-dir <CFGDIR>/../lib/clang/18
- -Qunused-arguments
- -pie
- -fPIC
create-links: true # create <target>-clang and <target>-clang++ links
llvm:
builtins:
x86_64-knit-linux-gnu:
<<: [*builtins-common, *x86_64]
runtimes:
x86_64-knit-linux-gnu:
<<: [*compiler-rt, *libcxx, *libunwind]
- package: clang-stage2
kind: cmake-llvm
requires: zlib-bootstrap, zstd-bootstrap, gcc-sysroots, cmake, ninja, clang-stage1, pgo-data
source-dir: llvm-project/llvm
env:
BUILD_DIR: ${build_dir("clang-stage2")}
PACKAGE_DIR: ${package_dir("clang-stage2")}
export-env:
+PATH: "${package_dir('clang-stage2')}/bin:"
build-targets: install-distribution
defs:
<<: [*clang-common-defs, *x86_64, *clang-stage1, *clang-cflags]
CMAKE_BUILD_TYPE: Release
CMAKE_CXX_FLAGS_INIT: -mllvm -vp-counters-per-site=2
CMAKE_EXE_LINKER_FLAGS_INIT: -Wl,--emit-relocs,-znow
PACKAGE_VENDOR: knit
CLANG_LINKS_TO_CREATE: clang++;clang-cpp
LLVM_CCACHE_BUILD: ON
LLVM_ENABLE_ZLIB: FORCE_ON
LLVM_ENABLE_ZSTD: FORCE_ON
LLVM_ENABLE_LTO: Thin
LLVM_PROFDATA_FILE: ${package_dir("pgo-data")}/clang.profdata
LLVM_TABLEGEN: ${package_dir("clang-stage1")}/bin/llvm-tblgen
CLANG_TABLEGEN: ${package_dir("clang-stage1")}/bin/clang-tblgen
triples:
- x86_64-knit-linux-gnu
- aarch64-knit-linux-gnu
- arm-knit-linux-gnueabihf
targets:
- X86
- ARM
- AArch64
- NVPTX
distribution-components:
- clang-resource-headers
- clang
- lld
- llvm-ar
- llvm-config
- llvm-nm
- llvm-objcopy
- llvm-profdata
- llvm-ranlib
- llvm-strip
- llvm-tblgen
- llvm-strip
- llvm-readelf
- llvm-windres
- LTO
- llvm-lipo
- runtimes
- builtins
projects:
- clang
- lld
runtimes:
- compiler-rt
- libcxx
- libcxxabi
- libunwind
create-links: true # create <target>-clang and <target>-clang++ links
# bolt-instrument:
# - ${package_dir("clang-stage2")}/bin/clang-18
# - ${package_dir("clang-stage2")}/bin/lld
compiler-configs:
x86_64-knit-linux-gnu:
- -Wl,-rpath,<CFGDIR>/../lib/x86_64-knit-linux-gnu
- *configs-common
aarch64-knit-linux-gnu:
- -Wl,-rpath,<CFGDIR>/../lib/aarch64-knit-linux-gnu
- *configs-common
arm-knit-linux-gnueabihf:
- -Wl,-rpath,<CFGDIR>/../lib/arm-knit-linux-gnueabihf
- *configs-common
llvm:
builtins:
x86_64-knit-linux-gnu:
<<: [*builtins-common, *x86_64]
aarch64-knit-linux-gnu:
<<: [*builtins-common, *aarch64]
arm-knit-linux-gnueabihf:
<<: [*builtins-common, *arm]
runtimes:
x86_64-knit-linux-gnu:
<<: [*x86_64, *compiler-rt, *libcxx, *libunwind]
aarch64-knit-linux-gnu:
<<: [*aarch64, *compiler-rt, *libcxx, *libunwind]
arm-knit-linux-gnueabihf:
<<: [*arm, *compiler-rt, *libcxx, *libunwind]
# BOLT-optimized stage2
# - package: clang-stage3
# kind: postprocess
# requires: clang-stage1, clang-stage2, pgo-data, gcc-sysroots, cmake, ninja, rpmalloc
# no-source: true
# env:
# BUILD_DIR: ${build_dir("clang-stage3")}
# PACKAGE_DIR: ${package_dir("clang-stage3")}
# STAGE2: ${package_dir("clang-stage2")}
# SYSROOTS: ${package_dir("gcc-sysroots")}
# export-env:
# +PATH: "${package_dir('clang-stage3')}/bin:"
# copy:
# ${package_dir('clang-stage2')}/: ${package_dir('clang-stage3')}/
# ${package_dir('gcc-sysroots')}/arm-knit-linux-gnueabihf/arm-knit-linux-gnueabihf/sysroot: ${package_dir('clang-stage3')}/sysroots/arm-knit-linux-gnueabihf/
# ${package_dir('gcc-sysroots')}/aarch64-knit-linux-gnu/aarch64-knit-linux-gnu/sysroot: ${package_dir('clang-stage3')}/sysroots/aarch64-knit-linux-gnu/
# ${package_dir('gcc-sysroots')}/x86_64-knit-linux-gnu/x86_64-knit-linux-gnu/sysroot: ${package_dir('clang-stage3')}/sysroots/x86_64-knit-linux-gnu/
# ${package_dir("cmake")}/bin/: ${package_dir('clang-stage3')}/bin
# ${workspace()}/toolchains/arm-knit-linux-gnueabihf.cmake: ${package_dir('clang-stage3')}/toolchains/
# ${workspace()}/toolchains/aarch64-knit-linux-gnu.cmake: ${package_dir('clang-stage3')}/toolchains/
# ${workspace()}/toolchains/x86_64-knit-linux-gnu.cmake: ${package_dir('clang-stage3')}/toolchains/
# script: |
# mv $PACKAGE_DIR/bin/clang-18 $PACKAGE_DIR/bin/clang-18.bolt
# llvm-bolt $PACKAGE_DIR/bin/clang-18.orig \
# --data ${package_dir('bolt-data')}/clang.fdata \
# -o $PACKAGE_DIR/bin/clang-18 \
# -reorder-blocks=ext-tsp \
# -reorder-functions=hfsort+ \
# -split-functions \
# -split-all-cold \
# -split-eh \
# -dyno-stats \
# -icf=1 \
# -use-gnu-stack \
# -plt=hot
# mv $PACKAGE_DIR/bin/lld $PACKAGE_DIR/bin/lld.bolt
# llvm-bolt $PACKAGE_DIR/bin/lld.orig \
# --data ${package_dir('bolt-data')}/lld.fdata \
# -o $PACKAGE_DIR/bin/lld \
# -reorder-blocks=cache+ \
# -reorder-functions=hfsort+ \
# -split-functions \
# -split-all-cold \
# -split-eh \
# -dyno-stats \
# -icf=1 \
# -use-gnu-stack \
# -plt=hot
# rm $PACKAGE_DIR/bin/lld.orig $PACKAGE_DIR/bin/clang-18.orig $PACKAGE_DIR/bin/clang-18.bolt $PACKAGE_DIR/bin/lld.bolt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment