Skip to content

Instantly share code, notes, and snippets.

@tkphd
Last active May 15, 2019 19:59
Show Gist options
  • Save tkphd/e74c8dae03ed8658ce5d2ebc189e1eaa to your computer and use it in GitHub Desktop.
Save tkphd/e74c8dae03ed8658ce5d2ebc189e1eaa to your computer and use it in GitHub Desktop.
CUDA vector addition
/* vector_addition.cu
* Available as a Gist at https://tinyurl.com/y34nfxgc
*/
#include <stdio.h>
#include <cuda.h>
#define N 16
__global__ void vector_addition(int* d_u, int* d_v, int* d_w)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
d_w[i] = d_u[i] + d_v[i];
}
int main()
{
// Allocate host memory
int a[N], b[N], c[N];
for (int i=0; i<N; i++) {
a[i] = i;
b[i] = N - i;
}
// Allocate device memory
int *d_a, *d_b, *d_c;
size_t N_B = N * sizeof(int);
cudaMalloc(&d_a, N_B);
cudaMalloc(&d_b, N_B);
cudaMalloc(&d_c, N_B);
// Copy to device
cudaMemcpy(d_a, a, N_B, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, N_B, cudaMemcpyHostToDevice);
// Add the Vectors
dim3 blocks_per_grid(N/16, 1, 1);
dim3 threads_per_block(16, 1, 1);
vector_addition<<<blocks_per_grid, threads_per_block>>>(d_a, d_b, d_c);
// Copy from the device
cudaMemcpy(c, d_c, N_B, cudaMemcpyDeviceToHost);
// Print stuff
for (int i=0; i<N; i++)
printf("%i + %i = %i\n", a[i], b[i], c[i]);
// Free memory
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment