Created
September 23, 2012 10:31
-
-
Save NicholasShatokhin/3769635 to your computer and use it in GitHub Desktop.
CUDA random
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cuda.h> | |
#include <curand_kernel.h> | |
#include <stdio.h> | |
#define CUDA_CALL(x) do { if((x) != cudaSuccess) { \ | |
printf("Error at %s:%d -- %s\n",__FILE__,__LINE__, cudaGetErrorString(x)); \ | |
return EXIT_FAILURE;}} while(0) | |
#define N 10000 | |
__global__ void setup_kernel ( curandState * state, unsigned long seed ) | |
{ | |
int idx = blockIdx.x * blockDim.x + threadIdx.x; | |
curand_init ( seed, idx, 0, &state[idx] ); | |
} | |
__global__ void generate( curandState* globalState, float * randomArray ) | |
{ | |
int idx = blockIdx.x * blockDim.x + threadIdx.x; | |
curandState localState = globalState[idx]; | |
float RANDOM = curand_uniform( &localState ); | |
randomArray[idx] = RANDOM; | |
globalState[idx] = localState; | |
} | |
void printDevProp(cudaDeviceProp devProp) | |
{ | |
printf("Major revision number: %d\n", devProp.major); | |
printf("Minor revision number: %d\n", devProp.minor); | |
printf("Name: %s\n", devProp.name); | |
printf("Total global memory: %u\n", devProp.totalGlobalMem); | |
printf("Total shared memory per block: %u\n", devProp.sharedMemPerBlock); | |
printf("Total registers per block: %d\n", devProp.regsPerBlock); | |
printf("Warp size: %d\n", devProp.warpSize); | |
printf("Maximum memory pitch: %u\n", devProp.memPitch); | |
printf("Maximum threads per block: %d\n", devProp.maxThreadsPerBlock); | |
for (int i = 0; i < 3; ++i) | |
printf("Maximum dimension %d of block: %d\n", i, devProp.maxThreadsDim[i]); | |
for (int i = 0; i < 3; ++i) | |
printf("Maximum dimension %d of grid: %d\n", i, devProp.maxGridSize[i]); | |
printf("Clock rate: %d\n", devProp.clockRate); | |
printf("Total constant memory: %u\n", devProp.totalConstMem); | |
printf("Texture alignment: %u\n", devProp.textureAlignment); | |
printf("Concurrent copy and execution: %s\n", (devProp.deviceOverlap ? "Yes" : "No")); | |
printf("Number of multiprocessors: %d\n", devProp.multiProcessorCount); | |
printf("Kernel execution timeout: %s\n", (devProp.kernelExecTimeoutEnabled ? "Yes" : "No")); | |
return; | |
} | |
int main( int argc, char** argv) | |
{ | |
// Number of CUDA devices | |
int devCount; | |
cudaGetDeviceCount(&devCount); | |
printf("CUDA Device Query...\n"); | |
printf("There are %d CUDA devices.\n", devCount); | |
int dimension = 0; | |
// Iterate through devices | |
for (int i = 0; i < devCount; ++i) | |
{ | |
// Get device properties | |
printf("\nCUDA Device #%d\n", i); | |
cudaDeviceProp devProp; | |
cudaGetDeviceProperties(&devProp, i); | |
dimension = devProp.maxThreadsDim[0]; | |
printDevProp(devProp); | |
} | |
dim3 threads = dim3(dimension/2, 1); | |
int blocksCount = floor(N / threads.x) + 1; | |
dim3 blocks = dim3(blocksCount, 1); | |
curandState* devStates; | |
float * randomValues = new float[N]; | |
float * devRandomValues; | |
CUDA_CALL(cudaMalloc ( &devStates, N*sizeof( curandState ) )); | |
CUDA_CALL(cudaMalloc ( &devRandomValues, N*sizeof( *randomValues ) )); | |
// setup seeds | |
setup_kernel <<<blocks, threads>>> ( devStates, time(NULL) ); | |
printf("%s\n", cudaGetErrorString(cudaGetLastError())); | |
// generate random numbers | |
generate <<<blocks, threads>>> ( devStates, devRandomValues ); | |
printf("%s\n", cudaGetErrorString(cudaGetLastError())); | |
CUDA_CALL(cudaMemcpy ( randomValues, devRandomValues, N*sizeof(*randomValues), cudaMemcpyDeviceToHost )); | |
for(int i=0;i<N;i++) | |
{ | |
printf("%f\n", randomValues[i]); | |
} | |
CUDA_CALL(cudaFree(devRandomValues)); | |
CUDA_CALL(cudaFree(devStates)); | |
delete randomValues; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment