Skip to content

Instantly share code, notes, and snippets.

@foxtran
Created October 25, 2023 12:36
Show Gist options
  • Save foxtran/fdc4abf8e2de127800f670b9edeeb9f2 to your computer and use it in GitHub Desktop.
Save foxtran/fdc4abf8e2de127800f670b9edeeb9f2 to your computer and use it in GitHub Desktop.
#!/bin/bash
## This script does the following:
## 1. It checks out and builds trunk LLVM.
## 2. It checks out and builds the create_llvm_prof tool.
## 3. It builds multiple clang binaries towards building a
## propeller optimized clang binary.
## 4. It runs performance comparisons of a baseline clang
## binary and the Propeller optimized clang binary.
## To run this script please set BASE_PROPELLER_CLANG_DIR and run:
## sh propeller_optimize_clang.sh
## The propeller optimized clang binary will be in:
## ${BASE_PROPELLER_CLANG_DIR}/propeller_build/bin/clang
set -eu
# Set this path and run the script.
BASE_PROPELLER_CLANG_DIR="$(cd $(dirname $0); pwd)"/propeller_optimize_clang.dir
if [[ -z "${BASE_PROPELLER_CLANG_DIR}" ]]; then
echo "Please change this script to set variable BASE_PROPELLER_CLANG_DIR to an empty directory."
exit 1
fi
mkdir -p "${BASE_PROPELLER_CLANG_DIR}"
PATH_TO_LLVM_SOURCES=${BASE_PROPELLER_CLANG_DIR}/sources
PATH_TO_TRUNK_LLVM_BUILD=${BASE_PROPELLER_CLANG_DIR}/trunk_llvm_build
PATH_TO_TRUNK_LLVM_INSTALL=${BASE_PROPELLER_CLANG_DIR}/trunk_llvm_install
# Build Trunk LLVM
mkdir -p ${PATH_TO_LLVM_SOURCES} && cd ${PATH_TO_LLVM_SOURCES}
git clone -b release/17.x --single-branch https://github.com/llvm/llvm-project.git
mkdir -p ${PATH_TO_TRUNK_LLVM_BUILD} && cd ${PATH_TO_TRUNK_LLVM_BUILD}
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_TARGETS_TO_BUILD=X86 \
-DCMAKE_INSTALL_PREFIX="${PATH_TO_TRUNK_LLVM_INSTALL}" \
-DLLVM_ENABLE_RTTI=On -DLLVM_INCLUDE_TESTS=Off \
-DLLVM_ENABLE_PROJECTS="clang;lld" ${PATH_TO_LLVM_SOURCES}/llvm-project/llvm
ninja install
#Build create_llvm_prof
PATH_TO_CREATE_LLVM_PROF=${BASE_PROPELLER_CLANG_DIR}/create_llvm_prof_build
mkdir -p ${PATH_TO_CREATE_LLVM_PROF} && cd ${PATH_TO_CREATE_LLVM_PROF}
git clone --recursive https://github.com/google/autofdo.git
mkdir build && cd build
cmake -G Ninja -DCMAKE_INSTALL_PREFIX="." \
-DCMAKE_C_COMPILER="${PATH_TO_TRUNK_LLVM_INSTALL}/bin/clang" \
-DCMAKE_CXX_COMPILER="${PATH_TO_TRUNK_LLVM_INSTALL}/bin/clang++" \
-DLLVM_PATH="${PATH_TO_TRUNK_LLVM_INSTALL}" ../autofdo/
ninja
ls create_llvm_prof
# Common CMAKE Flags
COMMON_CMAKE_FLAGS=(
"-DLLVM_OPTIMIZED_TABLEGEN=On"
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_TARGETS_TO_BUILD=X86"
"-DLLVM_ENABLE_PROJECTS=clang"
"-DCMAKE_C_COMPILER=${PATH_TO_TRUNK_LLVM_BUILD}/bin/clang"
"-DCMAKE_CXX_COMPILER=${PATH_TO_TRUNK_LLVM_BUILD}/bin/clang++" )
# Additional Baseline CMAKE flags
BASELINE_CC_LD_CMAKE_FLAGS=(
"-DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=lld"
"-DCMAKE_SHARED_LINKER_FLAGS=-fuse-ld=lld"
"-DCMAKE_MODULE_LINKER_FLAGS=-fuse-ld=lld" )
# Build Baseline Clang Binary
PATH_TO_BASELINE_CLANG_BUILD=${BASE_PROPELLER_CLANG_DIR}/baseline_clang_build
mkdir -p ${PATH_TO_BASELINE_CLANG_BUILD} && cd ${PATH_TO_BASELINE_CLANG_BUILD}
cmake -G Ninja "${COMMON_CMAKE_FLAGS[@]}" "${BASELINE_CC_LD_CMAKE_FLAGS[@]}" ${PATH_TO_LLVM_SOURCES}/llvm-project/llvm
ninja clang
# Labels CMAKE Flags
LABELS_CC_LD_CMAKE_FLAGS=(
"-DCMAKE_C_FLAGS=-funique-internal-linkage-names -fbasic-block-sections=labels"
"-DCMAKE_CXX_FLAGS=-funique-internal-linkage-names -fbasic-block-sections=labels"
"-DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=lld"
"-DCMAKE_SHARED_LINKER_FLAGS=-fuse-ld=lld"
"-DCMAKE_MODULE_LINKER_FLAGS=-fuse-ld=lld" )
# Build Labels Clang binary
PATH_TO_LABELS_CLANG_BUILD=${BASE_PROPELLER_CLANG_DIR}/labels_clang_build
mkdir -p ${PATH_TO_LABELS_CLANG_BUILD} && cd ${PATH_TO_LABELS_CLANG_BUILD}
cmake -G Ninja "${COMMON_CMAKE_FLAGS[@]}" "${LABELS_CC_LD_CMAKE_FLAGS[@]}" ${PATH_TO_LLVM_SOURCES}/llvm-project/llvm
ninja clang
# Set up Benchmarking and BUILD
BENCHMARKING_CLANG_BUILD=${BASE_PROPELLER_CLANG_DIR}/benchmarking_clang_build
mkdir -p ${BENCHMARKING_CLANG_BUILD} && cd ${BENCHMARKING_CLANG_BUILD}
mkdir -p symlink_to_clang_binary && cd symlink_to_clang_binary
CLANG_VERSION=$(sed -Ene 's!^CLANG_EXECUTABLE_VERSION:STRING=(.*)$!\1!p' ${PATH_TO_TRUNK_LLVM_BUILD}/CMakeCache.txt) #'
ln -sf ${PATH_TO_LABELS_CLANG_BUILD}/bin/clang-${CLANG_VERSION} clang
ln -sf ${PATH_TO_LABELS_CLANG_BUILD}/bin/clang-${CLANG_VERSION} clang++
# Setup cmake for Benchmarking BUILD
cd ${BENCHMARKING_CLANG_BUILD}
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_ENABLE_PROJECTS=clang \
-DCMAKE_C_COMPILER=${BENCHMARKING_CLANG_BUILD}/symlink_to_clang_binary/clang \
-DCMAKE_CXX_COMPILER=${BENCHMARKING_CLANG_BUILD}/symlink_to_clang_binary/clang++ \
${PATH_TO_LLVM_SOURCES}/llvm-project/llvm
# Profile labels binary, just 10 compilations should do.
ninja -t commands | head -100 >& ./perf_commands.sh
chmod +x ./perf_commands.sh
perf record -e cycles:u -j any,u -- ./perf_commands.sh
ls perf.data
# Convert profiles using create_llvm_prof
cd ${BENCHMARKING_CLANG_BUILD}
${PATH_TO_CREATE_LLVM_PROF}/build/create_llvm_prof --format=propeller \
--binary=${PATH_TO_LABELS_CLANG_BUILD}/bin/clang-${CLANG_VERSION} \
--profiled_binary_name=${PATH_TO_LABELS_CLANG_BUILD}/bin/clang-${CLANG_VERSION} \
--profile=perf.data --out=cluster.txt --propeller_symorder=symorder.txt 2>/dev/null 1>/dev/null
ls cluster.txt symorder.txt
# Set Propeller's CMAKE Flags
PROPELLER_CC_LD_CMAKE_FLAGS=(
"-DCMAKE_C_FLAGS=-funique-internal-linkage-names -fbasic-block-sections=list=${BENCHMARKING_CLANG_BUILD}/cluster.txt"
"-DCMAKE_CXX_FLAGS=-funique-internal-linkage-names -fbasic-block-sections=list=${BENCHMARKING_CLANG_BUILD}/cluster.txt"
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,--symbol-ordering-file=${BENCHMARKING_CLANG_BUILD}/symorder.txt -Wl,--no-warn-symbol-ordering -fuse-ld=lld"
"-DCMAKE_SHARED_LINKER_FLAGS=-Wl,--symbol-ordering-file=${BENCHMARKING_CLANG_BUILD}/symorder.txt -Wl,--no-warn-symbol-ordering -fuse-ld=lld"
"-DCMAKE_MODULE_LINKER_FLAGS=-Wl,--symbol-ordering-file=${BENCHMARKING_CLANG_BUILD}/symorder.txt -Wl,--no-warn-symbol-ordering -fuse-ld=lld" )
# Build Propeller Optimized Clang
PATH_TO_PROPELLER_CLANG_BUILD=${BASE_PROPELLER_CLANG_DIR}/propeller_build
mkdir -p ${PATH_TO_PROPELLER_CLANG_BUILD} && cd ${PATH_TO_PROPELLER_CLANG_BUILD}
cmake -G Ninja "${COMMON_CMAKE_FLAGS[@]}" "${PROPELLER_CC_LD_CMAKE_FLAGS[@]}" ${PATH_TO_LLVM_SOURCES}/llvm-project/llvm
ninja clang
# Run comparison of baseline verus propeller optimized clang
cd ${BENCHMARKING_CLANG_BUILD}/symlink_to_clang_binary
ln -sf ${PATH_TO_BASELINE_CLANG_BUILD}/bin/clang-${CLANG_VERSION} clang
ln -sf ${PATH_TO_BASELINE_CLANG_BUILD}/bin/clang-${CLANG_VERSION} clang++
cd ..
ninja clean
perf stat -r5 -e instructions,cycles,L1-icache-misses,iTLB-misses -- bash -c "ninja clang && ninja clean"
cd ${BENCHMARKING_CLANG_BUILD}/symlink_to_clang_binary
ln -sf ${PATH_TO_PROPELLER_CLANG_BUILD}/bin/clang-${CLANG_VERSION} clang
ln -sf ${PATH_TO_PROPELLER_CLANG_BUILD}/bin/clang-${CLANG_VERSION} clang++
cd ..
ninja clean
perf stat -r5 -e instructions,cycles,L1-icache-misses,iTLB-misses -- bash -c "ninja clang && ninja clean"
BASELINE (samples=5):
Performance counter stats for 'bash -c ninja clang && ninja clean' (5 runs):
29911672560885 instructions:u # 0.73 insn per cycle ( +- 0.00% )
40762914939742 cycles:u ( +- 0.01% )
2198963872412 L1-icache-misses:u ( +- 0.01% )
16606325255 iTLB-misses:u ( +- 0.05% )
119.413 +- 0.212 seconds time elapsed ( +- 0.18% )
PROPELLER (samples=5):
Performance counter stats for 'bash -c ninja clang && ninja clean' (5 runs):
30835273549813 instructions:u # 0.63 insn per cycle ( +- 0.00% )
49008268336239 cycles:u ( +- 0.01% )
3025079343587 L1-icache-misses:u ( +- 0.02% )
16944457932 iTLB-misses:u ( +- 0.03% )
139.041 +- 0.250 seconds time elapsed ( +- 0.18% )
Tested in RAM-disk on $ lscpu
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 152
On-line CPU(s) list: 0-151
Thread(s) per core: 2
Core(s) per socket: 38
Socket(s): 2
NUMA node(s): 2
Vendor ID: GenuineIntel
CPU family: 6
Model: 106
Model name: Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz
Stepping: 6
CPU MHz: 3400.000
CPU max MHz: 3400.0000
CPU min MHz: 800.0000
BogoMIPS: 4800.00
Virtualization: VT-x
L1d cache: 48K
L1i cache: 32K
L2 cache: 1280K
L3 cache: 58368K
NUMA node0 CPU(s): 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,130,132,134,136,138,140,142,144,146,148,150
NUMA node1 CPU(s): 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63,65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95,97,99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,129,131,133,135,137,139,141,143,145,147,149,151
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment