Skip to content

Instantly share code, notes, and snippets.

@gboncoffee
Created June 16, 2024 18:41
Show Gist options
  • Save gboncoffee/6e838206d22f1fa2b878018f98ad8a44 to your computer and use it in GitHub Desktop.
Save gboncoffee/6e838206d22f1fa2b878018f98ad8a44 to your computer and use it in GitHub Desktop.
Merge Sort running on Nvidia GPU via CUDA
#include <iostream>
void coutArray(float array[], int n) {
for (auto i = 0; i < n - 1; ++i) std::cout << array[i] << ", ";
std::cout << array[n - 1] << std::endl;
}
__global__ void gpuCopyArray(float dest[], float src[], int n) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
for (auto i = index; i < n; i += stride) dest[i] = src[i];
}
__global__ void gpuMerge(float srcA[], float srcB[], float dest[], int n) {
auto idxA = 0;
auto idxB = 0;
auto i = 0;
while (idxA < n && idxB < n) {
if (srcA[idxA] < srcB[idxB]) {
dest[i] = srcA[idxA];
++idxA;
} else {
dest[i] = srcB[idxB];
++idxB;
}
++i;
}
for (; idxB < n; ++idxB) {
dest[i] = srcB[idxB];
++i;
}
for (; idxA < n; ++idxA) {
dest[i] = srcA[idxA];
++i;
}
}
__global__ void gpuMergeSort(float arr[], float aux[], int n) {
if (n / 2 == 1) return;
int mid = n / 2;
if (threadIdx.x == 1) {
arr = &arr[mid];
aux = &aux[mid];
n = n - mid;
} else {
n = mid;
}
mid = n / 2;
gpuMergeSort<<<1, 2>>>(arr, aux, n);
gpuMerge<<<1, 1>>>(arr, &arr[mid], aux, mid);
gpuCopyArray<<<1, n>>>(arr, aux, n);
}
void mergeSort(float arr[], float aux[], int n) {
int mid = n / 2;
gpuMergeSort<<<1, 2>>>(arr, aux, n);
gpuMerge<<<1, 1>>>(arr, &arr[mid], aux, mid);
gpuCopyArray<<<1, n>>>(arr, aux, n);
}
int main() {
const int N = 16;
float *arr;
float *aux;
cudaMallocManaged(&arr, N * sizeof(float));
cudaMallocManaged(&aux, N * sizeof(float));
for (auto i = 0; i < N; ++i) {
arr[i] = N - i;
}
coutArray(arr, N);
mergeSort(arr, aux, N);
cudaDeviceSynchronize();
coutArray(aux, N);
cudaFree(arr);
cudaFree(aux);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment