Last active
December 19, 2016 17:24
-
-
Save ebraminio/b1ac55596aa6e91752889e50c25ec355 to your computer and use it in GitHub Desktop.
All Sums
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "cuda_runtime.h" | |
#include "device_launch_parameters.h" | |
#define __CUDACC__ | |
#include "device_functions.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
void print(char*, int[], int); | |
__global__ | |
void allSums(int* A, int size) { | |
extern __shared__ int localA[]; | |
localA[threadIdx.x] = A[threadIdx.x]; | |
for (int i = 1; i < size; i = i << 1) { | |
if ((int)threadIdx.x - i >= 0) | |
localA[threadIdx.x] += localA[threadIdx.x - i]; | |
__syncthreads(); | |
} | |
A[threadIdx.x] = localA[threadIdx.x]; | |
} | |
int main() { | |
int arr[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; | |
int numBytes = sizeof arr; | |
int size = numBytes / sizeof(int); | |
print("arr: ", arr, size); | |
int *arrGPU; | |
cudaMalloc((void**)&arrGPU, numBytes); | |
cudaMemcpy(arrGPU, arr, numBytes, cudaMemcpyHostToDevice); | |
int numBlocks = 1; | |
dim3 threadsPerBlock(size, 1); | |
allSums<<<numBlocks, threadsPerBlock, numBytes>>>(arrGPU, size); | |
cudaDeviceSynchronize(); | |
cudaMemcpy(arr, arrGPU, numBytes, cudaMemcpyDeviceToHost); | |
cudaFree(arrGPU); | |
print("\nResult of GPU processing (x .^ 2): ", arr, size); | |
getchar(); | |
return 0; | |
} | |
void print(char* str, int arr[], int n) { | |
printf("%s{ ", str); | |
for (int i = 0; i < n; ++i) | |
printf("%d%s", arr[i], n - 1 == i ? "" : ", "); | |
printf(" }\n"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment