Skip to content

Instantly share code, notes, and snippets.

@SiLiKhon
Created August 18, 2023 07:12
Show Gist options
  • Save SiLiKhon/14a83f557f6dda8e88dc2400873d966f to your computer and use it in GitHub Desktop.
Save SiLiKhon/14a83f557f6dda8e88dc2400873d966f to your computer and use it in GitHub Desktop.
Dummy cuda load
#include "stdio.h"
__global__ void cuda_hello() {
printf("Hello World from GPU!\n");
}
#define N 1000000000
__global__ void vector_add(float *out, float *a, float *b, int n) {
for (int i = 0; i < n; i++) {
out[i] = a[i] + b[i];
}
}
int main() {
cuda_hello<<<1,1>>>();
float *a, *b, *out;
cudaMalloc((void**)&a, sizeof(float) * N);
cudaMalloc((void**)&b, sizeof(float) * N);
cudaMalloc((void**)&out, sizeof(float) * N);
for (int i = 0; i < 100; i++)
vector_add<<<1,1>>>(out, a, b, N);
cudaError_t cudaerr = cudaDeviceSynchronize();
if (cudaerr != cudaSuccess)
printf("kernel launch failed with error \"%s\".\n",
cudaGetErrorString(cudaerr));
cudaFree(out);
cudaFree(b);
cudaFree(a);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment