Skip to content

Instantly share code, notes, and snippets.

@dejnon
Last active November 28, 2018 09:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dejnon/7466985 to your computer and use it in GitHub Desktop.
Save dejnon/7466985 to your computer and use it in GitHub Desktop.
/*
* This program uses the device CURAND API to calculate what
* proportion of pseudo-random ints have low bit set.
*/
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <curand_kernel.h>
/* include MTGP host helper functions */
#include <curand_mtgp32_host.h>
/* include MTGP pre-computed parameter sets */
#include <curand_mtgp32dc_p_11213.h>
#define CUDA_CALL(x) do { if((x) != cudaSuccess) { \
printf("Error at %s:%d\n",__FILE__,__LINE__); \
return EXIT_FAILURE;}} while(0)
#define CURAND_CALL(x) do { if((x) != CURAND_STATUS_SUCCESS) { \
printf("Error at %s:%d\n",__FILE__,__LINE__); \
return EXIT_FAILURE;}} while(0)
__global__ void generate_kernel(curandStateMtgp32 *state,
int n,
int *result)
{
int id = threadIdx.x + blockIdx.x * 256;
int count = 0;
unsigned int x;
/* Generate pseudo-random unsigned ints */
for(int i = 0; i < n; i++) {
x = curand(&state[blockIdx.x]);
/* Check if low bit set */
if(x & 1) {
count++;
}
}
/* Store results */
result[id] += count;
}
int main(int argc, char *argv[])
{
int i;
long long total;
curandStateMtgp32 *devMTGPStates;
mtgp32_kernel_params *devKernelParams;
int *devResults, *hostResults;
int sampleCount = 10000;
/* Allow over-ride of sample count */
if (argc == 2) {
sscanf(argv[1],"%d",&sampleCount);
}
/* Allocate space for results on host */
hostResults = (int *)calloc(64 * 256, sizeof(int));
/* Allocate space for results on device */
CUDA_CALL(cudaMalloc((void **)&devResults, 64 * 256 *
sizeof(int)));
/* Set results to 0 */
CUDA_CALL(cudaMemset(devResults, 0, 64 * 256 *
sizeof(int)));
/* Allocate space for prng states on device */
CUDA_CALL(cudaMalloc((void **)&devMTGPStates, 64 *
sizeof(curandStateMtgp32)));
/* Setup MTGP prng states */
/* Allocate space for MTGP kernel parameters */
CUDA_CALL(cudaMalloc((void**)&devKernelParams, sizeof(mtgp32_kernel_params)));
/* Reformat from predefined parameter sets to kernel format, */
/* and copy kernel parameters to device memory */
CURAND_CALL(curandMakeMTGP32Constants(mtgp32dc_params_fast_11213, devKernelParams));
/* Initialize one state per thread block */
CURAND_CALL(curandMakeMTGP32KernelState(devMTGPStates,
mtgp32dc_params_fast_11213, devKernelParams, 64, 1234));
/* State setup is complete */
/* Generate and use pseudo-random */
for(i = 0; i < 10; i++) {
generate_kernel<<<64, 256>>>(devMTGPStates, sampleCount, devResults);
}
/* Copy device memory to host */
CUDA_CALL(cudaMemcpy(hostResults, devResults, 64 * 256 *
sizeof(int), cudaMemcpyDeviceToHost));
/* Show result */
total = 0;
for(i = 0; i < 64 * 256; i++) {
total += hostResults[i];
}
printf("Fraction with low bit set was %10.13g\n",
(double)total / (64.0f * 256.0f * sampleCount * 10.0f));
/* Cleanup */
CUDA_CALL(cudaFree(devMTGPStates));
CUDA_CALL(cudaFree(devResults));
free(hostResults);
printf("^^^^ kernel_mtgp_example PASSED\n");
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment