Skip to content

Instantly share code, notes, and snippets.

@cgmb
Last active February 22, 2023 18:58
Show Gist options
  • Save cgmb/9b0ef8c36c47a1b6956d10f89a6f3b6b to your computer and use it in GitHub Desktop.
Save cgmb/9b0ef8c36c47a1b6956d10f89a6f3b6b to your computer and use it in GitHub Desktop.
Using perf on Ubuntu 20.04

Using perf on Ubuntu 20.04

Build the ROCm stack

The build-rocm-5.4.2.sh script will build the ROCm stack from source with all the debug symbols needed.

Install perf

Use apt search linux-tools-generic to find the package version that matches your kernel version reported by uname -a. This install command assumes you're using the Ubuntu 20.04 HWE stack:

sudo apt-get -qq install linux-tools-generic-hwe-20.04

Build the program for analysis

Set the CMAKE_PREFIX_PATH to wherever you installed ROCm and build your library. If you use the build-rocm-5.4.2.sh script, the CMAKE_PREFIX_PATH should be whatever you set as the $OUTDIR (defaults to $(pwd)/rocm).

# build with debug symbols and stack trace info
export CXXFLAGS="-g -fno-omit-frame-pointer"
cmake -S. -Bbuild -DCMAKE_BUILD_TYPE=Release \
  -DCMAKE_PREFIX_PATH="$OUTDIR"
make -C build

Run the program

You can run the built program with

build/rbi

and you will notice an error about libhsa-amd-aqlprofile64 being missing. This is because libhsa-amd-aqlprofile64 is closed source as was not included in the build script. Don't worry. It's an optional component. aqlprofile is only used for GPU profiling. It's not needed for profiling the host code with perf.

Run the program with perf

To get CPU performance statistics:

sudo perf stat build/rbi

To create a perf.data file:

sudo perf record build/rbi

To view the profiling results:

sudo perf report perf.data

Other resources

#!/usr/bin/env bash
# Build the ROCm stack for ROCm 5.4.2 with debug info on Ubuntu 20.04
set -exuo pipefail
sudo apt-get -qq update
sudo apt-get -qq upgrade
sudo apt-get -qq install build-essential cmake wget
WORKSPACE=$(pwd)/src # where to download and build the sources
OUTDIR=$(pwd)/rocm # where to install the build results
mkdir -p "$WORKSPACE" "$OUTDIR"
ARCH=gfx906:xnack- # https://llvm.org/docs/AMDGPUUsage.html
# llvm-amdgpu
cd "$WORKSPACE"
sudo apt-get -qq install python3
wget -qO- https://github.com/RadeonOpenCompute/llvm-project/archive/refs/tags/rocm-5.4.2.tar.gz | tar xz
cd llvm-project-rocm-5.4.2
cmake -Sllvm -Bbuild -DCMAKE_BUILD_TYPE=Release \
-DLLVM_ENABLE_PROJECTS="clang;lld;clang-tools-extra;compiler-rt" \
-DLLVM_TARGETS_TO_BUILD="AMDGPU;X86" \
-DCMAKE_INSTALL_PREFIX="$OUTDIR/llvm"
make -j16 -C build
make -C build install
# build everything else with debug symbols and stack trace info
export CXXFLAGS="-g -fno-omit-frame-pointer"
# rocm-cmake
cd "$WORKSPACE"
wget -qO- https://github.com/RadeonOpenCompute/rocm-cmake/archive/refs/tags/rocm-5.4.2.tar.gz | tar xz
cd rocm-cmake-rocm-5.4.2
cmake -S. -Bbuild -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX="$OUTDIR"
make -j16 -C build
make -C build install
# rocm-device-libs
cd "$WORKSPACE"
wget -qO- https://github.com/RadeonOpenCompute/ROCm-Device-Libs/archive/refs/tags/rocm-5.4.2.tar.gz | tar xz
cd ROCm-Device-Libs-rocm-5.4.2
cmake -S. -Bbuild -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER="$OUTDIR/llvm/bin/clang" \
-DCMAKE_INSTALL_PREFIX="$OUTDIR"
make -j16 -C build
make -C build install
# roct-thunk-interface
cd "$WORKSPACE"
sudo apt-get -qq install libnuma-dev pkg-config libdrm-dev zlib1g-dev libudev-dev
wget -qO- https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/archive/refs/tags/rocm-5.4.2.tar.gz | tar xz
cd ROCT-Thunk-Interface-rocm-5.4.2
cmake -S. -Bbuild -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_PREFIX_PATH="$OUTDIR" \
-DCMAKE_INSTALL_PREFIX="$OUTDIR"
make -j16 -C build
make -C build install
# rocr-runtime
cd "$WORKSPACE"
sudo apt-get -qq install libelf-dev xxd
wget -qO- https://github.com/RadeonOpenCompute/ROCR-Runtime/archive/refs/tags/rocm-5.4.2.tar.gz | tar xz
cd ROCR-Runtime-rocm-5.4.2
cmake -Ssrc -Bbuild \
-DCMAKE_PREFIX_PATH="$OUTDIR" \
-DCMAKE_INSTALL_PREFIX="$OUTDIR"
make -j16 -C build
make -C build install
# rocminfo
cd "$WORKSPACE"
sudo apt-get -qq install kmod python3
wget -qO- https://github.com/RadeonOpenCompute/rocminfo/archive/refs/tags/rocm-5.4.2.tar.gz | tar xz
cd rocminfo-rocm-5.4.2
cmake -S. -Bbuild -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_PREFIX_PATH="$OUTDIR" \
-DCMAKE_INSTALL_PREFIX="$OUTDIR"
make -j16 -C build
make -C build install
# comgr
cd "$WORKSPACE"
wget -qO- https://github.com/RadeonOpenCompute/ROCm-CompilerSupport/archive/refs/tags/rocm-5.4.2.tar.gz | tar xz
cd ROCm-CompilerSupport-rocm-5.4.2
# 0001-fix-comgr-cmake-minimum-version.patch
patch -p1 << 'EOF'
diff --git a/lib/comgr/comgr-backward-compat.cmake b/lib/comgr/comgr-backward-compat.cmake
index c9053ea..f63f590 100644
--- a/lib/comgr/comgr-backward-compat.cmake
+++ b/lib/comgr/comgr-backward-compat.cmake
@@ -17,7 +17,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-cmake_minimum_required(VERSION 3.16.8)
+cmake_minimum_required(VERSION 3.16)
set(COMGR_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(COMGR_WRAPPER_DIR ${COMGR_BUILD_DIR}/wrapper_dir)
EOF
cmake -Slib/comgr -Bbuild -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_PREFIX_PATH="$OUTDIR/llvm" \
-DCMAKE_INSTALL_PREFIX="$OUTDIR"
make -j16 -C build
make -C build install
# hip
cd "$WORKSPACE"
sudo apt-get -qq install mesa-common-dev
wget -qO- https://github.com/ROCm-Developer-Tools/hipamd/archive/refs/tags/rocm-5.4.2.tar.gz | tar xz
wget -qO- https://github.com/ROCm-Developer-Tools/ROCclr/archive/refs/tags/rocm-5.4.2.tar.gz | tar xz
wget -qO- https://github.com/RadeonOpenCompute/ROCm-OpenCL-Runtime/archive/refs/tags/rocm-5.4.2.tar.gz | tar xz
wget -qO- https://github.com/ROCm-Developer-Tools/HIP/archive/refs/tags/rocm-5.4.2.tar.gz | tar xz
cd ROCm-OpenCL-Runtime-rocm-5.4.2
# 0002-fix-opencl-cmake-minimum-version.patch
patch -p1 << 'EOF'
diff --git a/opencl-backward-compat.cmake b/opencl-backward-compat.cmake
index e159924..0c9e8fd 100644
--- a/opencl-backward-compat.cmake
+++ b/opencl-backward-compat.cmake
@@ -17,7 +17,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-cmake_minimum_required(VERSION 3.16.8)
+cmake_minimum_required(VERSION 3.16)
set(OPENCL ${PROJECT_NAME})
set(OPENCL_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR})
EOF
cd ..
cd HIP-rocm-5.4.2
# 0003-fix-hip-cmake-minimum-version.patch
patch -p1 << 'EOF'
diff --git a/tests/catch/CMakeLists.txt b/tests/catch/CMakeLists.txt
index e2924a56..5456f57b 100644
--- a/tests/catch/CMakeLists.txt
+++ b/tests/catch/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.16.8)
+cmake_minimum_required(VERSION 3.16)
# to skip the simple compiler test
set(CMAKE_C_COMPILER_WORKS 1)
diff --git a/tests/catch/packaging/hip-tests.txt b/tests/catch/packaging/hip-tests.txt
index f934130e..e43c5d29 100644
--- a/tests/catch/packaging/hip-tests.txt
+++ b/tests/catch/packaging/hip-tests.txt
@@ -18,7 +18,7 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-cmake_minimum_required(VERSION 3.16.8)
+cmake_minimum_required(VERSION 3.16)
project(tests)
MACRO(SUBDIRLIST result curdir)
EOF
cd ..
cd hipamd-rocm-5.4.2
# 0004-fix-hipamd-cmake-minimum-version.patch
patch -p1 << 'EOF'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 86b0c34f..9c1635cc 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,7 +17,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-cmake_minimum_required(VERSION 3.16.8)
+cmake_minimum_required(VERSION 3.16)
project(hip)
include(GNUInstallDirs)
diff --git a/hip-backward-compat.cmake b/hip-backward-compat.cmake
index 7d1468a4..38c7b907 100644
--- a/hip-backward-compat.cmake
+++ b/hip-backward-compat.cmake
@@ -17,7 +17,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-cmake_minimum_required(VERSION 3.16.8)
+cmake_minimum_required(VERSION 3.16)
set(HIP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(HIP_WRAPPER_DIR ${HIP_BUILD_DIR}/wrapper_dir)
diff --git a/packaging/CMakeLists.txt b/packaging/CMakeLists.txt
index 8484795d..bea6a39b 100644
--- a/packaging/CMakeLists.txt
+++ b/packaging/CMakeLists.txt
@@ -18,7 +18,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-cmake_minimum_required(VERSION 3.16.8)
+cmake_minimum_required(VERSION 3.16)
#set components for HIP
if(HIP_CATCH_TEST EQUAL "1")
diff --git a/packaging/hip-tests.txt b/packaging/hip-tests.txt
index 1ca6031f..e798178d 100644
--- a/packaging/hip-tests.txt
+++ b/packaging/hip-tests.txt
@@ -18,7 +18,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-cmake_minimum_required(VERSION 3.16.8)
+cmake_minimum_required(VERSION 3.16)
project(hip_catch_tests)
include(GNUInstallDirs)
EOF
mkdir -p build
cd build
cmake -S.. -B. -DCMAKE_BUILD_TYPE=Release \
-DHIP_COMMON_DIR="$WORKSPACE/HIP-rocm-5.4.2" \
-DAMD_OPENCL_PATH="$WORKSPACE/ROCm-OpenCL-Runtime-rocm-5.4.2" \
-DROCCLR_PATH="$WORKSPACE/ROCclr-rocm-5.4.2" \
-DCMAKE_PREFIX_PATH="$OUTDIR;$OUTDIR/llvm" \
-DUSE_PROF_API=OFF \
-DCMAKE_INSTALL_PREFIX="$OUTDIR"
make -j16
make install
sudo apt-get -qq install perl file # used for hipcc
# msgpack-c
cd "$WORKSPACE"
wget -qO- https://github.com/msgpack/msgpack-c/archive/refs/tags/cpp-3.0.1.tar.gz | tar xz
cd msgpack-c-cpp-3.0.1
cmake -S. -Bbuild -DCMAKE_BUILD_TYPE=Release \
-DMSGPACK_BUILD_TESTS=OFF \
-DMSGPACK_BUILD_EXAMPLES=OFF \
-DCMAKE_INSTALL_PREFIX="$OUTDIR/msgpack"
make -j16 -C build
make -C build install
# rocblas
cd "$WORKSPACE"
sudo apt-get -qq install gfortran python3 python3-pip python3-virtualenv python3-distutils python3-yaml python3-msgpack
wget -qO- https://github.com/ROCmSoftwarePlatform/rocBLAS/archive/refs/tags/rocm-5.4.2.tar.gz | tar xz
wget -qO- https://github.com/ROCmSoftwarePlatform/Tensile/archive/refs/tags/rocm-5.4.2.tar.gz | tar xz
cd rocBLAS-rocm-5.4.2
# 0005-fix-rocblas-cmake-minimum-version.patch
patch -p1 << 'EOF'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 75ea27d6..fffeb71d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,7 +20,7 @@
#
# ########################################################################
-cmake_minimum_required( VERSION 3.16.8 )
+cmake_minimum_required( VERSION 3.16 )
# This has to be initialized before the project() command appears
diff --git a/clients/CMakeLists.txt b/clients/CMakeLists.txt
index ae78b491..49db8553 100755
--- a/clients/CMakeLists.txt
+++ b/clients/CMakeLists.txt
@@ -20,7 +20,7 @@
#
# ########################################################################
-cmake_minimum_required( VERSION 3.16.8 )
+cmake_minimum_required( VERSION 3.16 )
add_definitions(-D_ROCBLAS_INTERNAL_BFLOAT16_)
EOF
cmake -S. -Bbuild -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_COMPILER="$OUTDIR/bin/hipcc" \
-DAMDGPU_TARGETS="$ARCH" \
-DTensile_TEST_LOCAL_PATH="$WORKSPACE/Tensile-rocm-5.4.2" \
-DTensile_LOGIC=asm_full \
-DTensile_CODE_OBJECT_VERSION=V3 \
-DTensile_SEPARATE_ARCHITECTURES=ON \
-DTensile_LAZY_LIBRARY_LOADING=ON \
-DTensile_LIBRARY_FORMAT=msgpack \
-DRUN_HEADER_TESTING=OFF \
-DCMAKE_PREFIX_PATH="$OUTDIR;$OUTDIR/msgpack" \
-DCMAKE_INSTALL_PREFIX="$OUTDIR"
make -j16 -C build
make -C build install
cmake_minimum_required(VERSION 3.16)
project(rocblas-init-benchmark)
find_package(rocblas REQUIRED)
add_executable(rbi main.c)
target_link_libraries(rbi PRIVATE roc::rocblas)
#include <rocblas/rocblas.h>
int main() {
rocblas_initialize();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment