Created
April 25, 2020 06:12
-
-
Save imSrbh/17d1784f1874a15c711260eaccea2f36 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%cu | |
#include<cuda_runtime_api.h> | |
#include<stdio.h> | |
#include<time.h> | |
#include<stdlib.h> | |
#include<cstring> | |
__global__ void sum_array_gpu(int * a, int * b, int * c, int size) | |
{ | |
int gid = blockIdx.x * blockDim.x + threadIdx.x; | |
if(gid<size) | |
{ | |
c[gid] = a[gid] + b[gid]; | |
} | |
} | |
void sum_array_cpu(int * a, int * b, int * c, int size) | |
{ | |
for(int i = 0 ; i<size; i++); | |
{ | |
c[i] = a[i] +b[i]; | |
} | |
} | |
int main() | |
{ | |
int size 10000; | |
int block_size = 128; | |
int NO_BYTES = size * sizeof(int); | |
//host pointers | |
int * h_a, *h_b, *gpu_results, *h_c; | |
//allocate memory for host pointers | |
h_a = (int*)malloc(NO_BYTES); | |
h_b = (int*)malloc(NO_BYTES); | |
gpu_results = (int*)malloc(NO_BYTES); | |
h_c = (int*)malloc(NO_BYTES); | |
//initialize host pointer | |
time_t t; | |
srand((unsigned)time(&t)); | |
for(int i = 0; i < size; i++) | |
{ | |
h_a[i] = (int)(rand() & 0xFF); | |
} | |
for(int i = 0; i < size; i++) | |
{ | |
h_b[i] = (int)(rand() & 0xFF); | |
} | |
sum_array_cpu(h_a, h_b, h_c, size); | |
memset(gpu_results,0,NO_BYTES); | |
//device pointer | |
int* d_a, *d_b, *d_c; | |
cudaMalloc((int **)&d_a, NO_BYTES); | |
cudaMalloc((int **)&d_b, NO_BYTES); | |
cudaMalloc((int **)&d_c, NO_BYTES); | |
cudaMemcpy(d_a,h_a,NO_BYTES,cudaMemcpyHostToDevice); | |
cudaMemcpy(d_b,h_b,NO_BYTES,cudaMemcpyHostToDevice); | |
cudaMemcpy(d_c,h_c,NO_BYTES,cudaMemcpyHostToDevice); | |
//launching the grid | |
dim3 block(block_size); | |
dim3 grid((size/block.x) + 1); | |
sum_array_gpu<<< grid, block >>>(d_a,d_b, d_c, size); | |
cudaDeviceSynchronize(); | |
cudaMemcpy(gpu_results, d_c, NO_BYTES, cudaMemcpyDeviceToHost); | |
//array comparison | |
// compare_arrays(gpu_results,h_c,size); | |
// void compare_arrays(int * a, float * b, float size) | |
for (int i = 0; i < size; i++) | |
{ | |
if (gpu_results[i] != h_c[i]) | |
{ | |
printf("Arrays are different \n"); | |
return; | |
} | |
} | |
printf("Arrays are same \n"); | |
cudaFree(d_c); | |
cudaFree(d_b); | |
cudaFree(d_a); | |
free(gpu_results); | |
//free(h_c); | |
free(h_a); | |
free(h_b); | |
cudaDeviceReset(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
/tmp/tmpe8fmjutv/ca2dfea9-2a7d-45ec-929f-a157e8bda3de.cu(22): error: identifier "i" is undefined
/tmp/tmpe8fmjutv/ca2dfea9-2a7d-45ec-929f-a157e8bda3de.cu(28): error: expected a ";"
/tmp/tmpe8fmjutv/ca2dfea9-2a7d-45ec-929f-a157e8bda3de.cu(31): warning: variable "size" is used before its value is set
/tmp/tmpe8fmjutv/ca2dfea9-2a7d-45ec-929f-a157e8bda3de.cu(91): warning: non-void function "main" should return a value
2 errors detected in the compilation of "/tmp/tmpxft_00000115_00000000-8_ca2dfea9-2a7d-45ec-929f-a157e8bda3de.cpp1.ii".