Skip to content

Instantly share code, notes, and snippets.

@vhxs
Created June 29, 2022 01:57
Show Gist options
  • Save vhxs/5bc0f4a00050ff277c43248a382e680f to your computer and use it in GitHub Desktop.
Save vhxs/5bc0f4a00050ff277c43248a382e680f to your computer and use it in GitHub Desktop.
#include <stdlib.h>
#include <stdio.h>
#include <cuda.h>
#define N 512
// compile with nvcc add_vectors.cu
// forward declaration of CUDA kernel
__global__ void add_vectors(int* A, int* B, int* C);
int main(void) {
// allocate space on CPU
int* A = (int*) malloc(N * sizeof(int));
int* B = (int*) malloc(N * sizeof(int));
int* C = (int*) malloc(N * sizeof(int));
// allocate space on GPU
int* A_d;
int* B_d;
int* C_d;
cudaMalloc(&A_d, N * sizeof(int));
cudaMalloc(&B_d, N * sizeof(int));
cudaMalloc(&C_d, N * sizeof(int));
// populate data on CPU
for (int i = 0; i < N; i++) {
A[i] = i;
B[i] = 2*i;
C[i] = 0;
}
// copy CPU to GPU
cudaMemcpy(A_d, A, N * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(B_d, B, N * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(C_d, C, N * sizeof(int), cudaMemcpyHostToDevice);
// invoke kernel
add_vectors<<<1, N>>>(A_d, B_d, C_d);
// copy result from GPU to CPU
cudaMemcpy(C, C_d, N * sizeof(int), cudaMemcpyDeviceToHost);
// print result
for (int i = 0; i < N; i++) {
printf("%d\n", C[i]);
}
}
__global__ void add_vectors(int* A, int* B, int* C) {
C[threadIdx.x] = A[threadIdx.x] + B[threadIdx.x];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment