Skip to content

Instantly share code, notes, and snippets.

@gyu-don
Created September 2, 2017 07:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gyu-don/fa5e5f75e817b31db97b8f4e5ce0b129 to your computer and use it in GitHub Desktop.
Save gyu-don/fa5e5f75e817b31db97b8f4e5ce0b129 to your computer and use it in GitHub Desktop.
CUDAの練習。100万人ビンゴ大会
#include <stdio.h>
#include <inttypes.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <curand_kernel.h>
__constant__ uint8_t seq[75];
__device__ void make_card(uint8_t *card)
{
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x;
curandState st;
curand_init(12345, idx, 0, &st);
// make a card.
uint8_t a[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
for (int i=0,k=0; i+4<25; i+=5,k+=15) {
uint8_t t = curand(&st) % 15;
uint8_t sw;
card[i] = k + a[t];
sw = a[t];
a[t] = a[14];
a[14] = sw;
t = curand(&st) % 14;
card[i+1] = k + a[t];
sw = a[t];
a[t] = a[13];
a[13] = sw;
t = curand(&st) % 13;
card[i+2] = k + a[t];
sw = a[t];
a[t] = a[12];
a[12] = sw;
t = curand(&st) % 12;
card[i+3] = k + a[t];
sw = a[t];
a[t] = a[11];
a[11] = sw;
t = curand(&st) % 11;
card[i+4] = k + a[t];
}
card[12] = 0;
}
__global__ void bingo(unsigned int *result)
{
uint8_t card[25];
make_card(card);
// do the game.
for (int i=0; i<75; i++) {
uint8_t *pt = card + ((seq[i] - 1) / 15) * 5;
for (int j=0; j<5; j++) {
if (pt[j] == seq[i]) {
pt[j] = 0;
if (!(pt[0] || pt[1] || pt[2] || pt[3] || pt[4])) {
atomicAdd(&result[i], 1);
return;
}
if (!(card[j] || card[5 + j] || card[10 + j] || card[15 + j] || card[20 + j])) {
atomicAdd(&result[i], 1);
return;
}
if (!(card[0] || card[6] || card[18] || card[24])) {
atomicAdd(&result[i], 1);
return;
}
if (!(card[4] || card[8] || card[16] || card[20])) {
atomicAdd(&result[i], 1);
return;
}
break;
}
}
}
}
int main(void)
{
const unsigned int N_THREAD = 1024;
const unsigned int N_BLOCK = 1024;
srand(12345);
uint8_t seq_host[75];
for (int i=0; i<75; i++) seq_host[i] = i + 1;
for (int i=0; i<75; i++) {
int t = rand() % (75 - i);
uint8_t sw;
sw = seq_host[i];
seq_host[i] = seq_host[i + t];
seq_host[i + t] = sw;
}
cudaMemcpyToSymbol(seq, seq_host, 75);
unsigned int *result;
unsigned int host_result[75] = {};
cudaMalloc((void**)&result, sizeof(unsigned int) * 75);
cudaMemset(result, 0, sizeof(unsigned int) * 75);
bingo<<<N_BLOCK, N_THREAD>>>(result);
cudaMemcpy(host_result, result, sizeof(unsigned int) * 75, cudaMemcpyDeviceToHost);
for(int i=0;i<75;i++) {
printf("%2d\t: %6d\n", i + 1, host_result[i]);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment