cgmb/CMakeLists.txt

## README.md

      
    Raw
  

              README.md
            
          
    How to setup ROCm 5.4.3 on an Ubuntu 22.04 G4ad instance

Install ROCm 5.4.3

sudo apt-get -y update
sudo apt-get -y upgrade
sudo apt-get -y install linux-modules-extra-aws
wget https://repo.radeon.com/amdgpu-install/5.4.3/ubuntu/jammy/amdgpu-install_5.4.50403-1_all.deb
sudo apt-get -y install ./amdgpu-install_5.4.50403-1_all.deb
sudo amdgpu-install --usecase=rocmdev
sudo usermod -a -G video,render ubuntu
sudo reboot

You can verify that this installation has been successful by running rocminfo and checking that gfx1011 is listed as one of the agents.
Build and run a sample program

apt-get install -y cmake build-essential g++-12
CXX=/opt/rocm/bin/hipcc cmake -S. -Bbuild -DAMDGPU_TARGETS=gfx1011
make -C build
./build/example

Note that AMD does not build the ROCm math libraries for gfx1011 when preparing their official packages, so if you want to use a library like rocSPARSE (whether directly in C++ or indirectly through a framework like PyTorch), you will have to build it from source yourself. The Spack package manager supports building for specific amdgpu architectures and is a useful tool for this purpose.

  
## CMakeLists.txt
cmake_minimum_required(VERSION 3.16)
project(example LANGUAGES CXX)
find_package(hip REQUIRED)
add_executable(example main.cpp)
target_link_libraries(example PRIVATE hip::device)

## main.cpp
#include <stdio.h>
#include <stdlib.h>
#include <hip/hip_runtime.h>

#define CHECK_HIP(expr) do {              \
  hipError_t result = (expr);             \
  if (result != hipSuccess) {             \
    fprintf(stderr, "%s:%d: %s (%d)\n",   \
      __FILE__, __LINE__,                 \
      hipGetErrorString(result), result); \
    exit(EXIT_FAILURE);                   \
  }                                       \
} while(0)

__global__ void sq_arr(float *arr, int n) {
  int tid = blockDim.x*blockIdx.x + threadIdx.x;
  if (tid < n) {
    arr[tid] = arr[tid] * arr[tid];
  }
}

int main() {
  enum { N = 5 };
  float hArr[N] = { 1, 2, 3, 4, 5 };
  float *dArr;
  CHECK_HIP(hipMalloc(&dArr, sizeof(float) * N));
  CHECK_HIP(hipMemcpy(dArr, hArr, sizeof(float) * N, hipMemcpyHostToDevice));
  sq_arr<<<dim3(1), dim3(32,1,1), 0, 0>>>(dArr, N);
  CHECK_HIP(hipMemcpy(hArr, dArr, sizeof(float) * N, hipMemcpyDeviceToHost));
  for (int i = 0; i < N; ++i) {
    printf("%f\n", hArr[i]);
  }
  CHECK_HIP(hipFree(dArr));
  return 0;
}
	cmake_minimum_required(VERSION 3.16)
	project(example LANGUAGES CXX)
	find_package(hip REQUIRED)
	add_executable(example main.cpp)
	target_link_libraries(example PRIVATE hip::device)
	#include <stdio.h>
	#include <stdlib.h>
	#include <hip/hip_runtime.h>

	#define CHECK_HIP(expr) do { \
	hipError_t result = (expr); \
	if (result != hipSuccess) { \
	fprintf(stderr, "%s:%d: %s (%d)\n", \
	__FILE__, __LINE__, \
	hipGetErrorString(result), result); \
	exit(EXIT_FAILURE); \
	} \
	} while(0)

	__global__ void sq_arr(float *arr, int n) {
	int tid = blockDim.x*blockIdx.x + threadIdx.x;
	if (tid < n) {
	arr[tid] = arr[tid] * arr[tid];
	}
	}

	int main() {
	enum { N = 5 };
	float hArr[N] = { 1, 2, 3, 4, 5 };
	float *dArr;
	CHECK_HIP(hipMalloc(&dArr, sizeof(float) * N));
	CHECK_HIP(hipMemcpy(dArr, hArr, sizeof(float) * N, hipMemcpyHostToDevice));
	sq_arr<<<dim3(1), dim3(32,1,1), 0, 0>>>(dArr, N);
	CHECK_HIP(hipMemcpy(hArr, dArr, sizeof(float) * N, hipMemcpyDeviceToHost));
	for (int i = 0; i < N; ++i) {
	printf("%f\n", hArr[i]);
	}
	CHECK_HIP(hipFree(dArr));
	return 0;
	}