Created
May 23, 2010 06:13
-
-
Save robbrit/410689 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* This needs Nvidia's CUDA library, and SDL installed. | |
* You can compile using nvcc: | |
* | |
* nvcc -L/path/to/cuda/lib64 -lcudart `sdl-config --libs --cflags` | |
* | |
*/ | |
#include <stdio.h> | |
#include <assert.h> | |
#include <cuda.h> | |
#include <math.h> | |
#include <SDL/SDL.h> | |
const int ITERATIONS = 1000; | |
const int SIZE = 800; | |
const int RAD = 4; | |
int t = 0; | |
int hue_offset = 170; | |
int value_offset = 0; | |
double saturation = 1.0; | |
/* This is the function that does the actual computation. | |
* You pass in the complex coordinates (cr, ci) and let | |
* it do its thing. | |
*/ | |
__global__ void julia(int * d_out, float cr, float ci, int N){ | |
int pt = blockDim.x * blockIdx.x + threadIdx.x; | |
if (pt < N){ | |
int x = pt % SIZE; | |
int y = pt / SIZE; | |
float x0 = (float)y / SIZE * 2.0f - 1.0f; | |
float y0 = (float)x / SIZE * 2.0f - 1.0f; | |
float xtemp; | |
int i; | |
for (i = 0; i < ITERATIONS; i++){ | |
xtemp = x0 * x0 - y0 * y0 + cr; | |
y0 = 2 * x0 * y0 + ci; | |
x0 = xtemp; | |
if (x0 * x0 + y0 * y0 >= RAD){ | |
break; | |
} | |
} | |
d_out[pt] = i; | |
} | |
} | |
/* Plot the pixel using HSV instead of RGB - this | |
* gives us a nicer looking image. The parameter i | |
* is the number of iterations it took for the pixel | |
* to diverge. | |
*/ | |
void plot(SDL_Surface * surface, int x, int y, int i){ | |
double h = ((i+hue_offset) % 360); | |
double s = saturation; | |
double v = i == ITERATIONS ? 0.0 : 1.0; | |
int hi = (int)(h / 60.0) % 6; | |
double f = h / 60.0 - (int)(h / 60); | |
double p = v * (1 - s); | |
double q = v * (1 - f * s); | |
double t = v * (1 - (1 - f) * s); | |
double r, g, b; | |
switch((int)hi){ | |
case 0: | |
r = v; | |
g = t; | |
b = p; | |
break; | |
case 1: | |
r = q; | |
g = v; | |
b = p; | |
break; | |
case 2: | |
r = p; | |
g = v; | |
b = t; | |
break; | |
case 3: | |
r = p; | |
g = q; | |
b = v; | |
break; | |
case 4: | |
r = t; | |
g = p; | |
b = v; | |
break; | |
default: | |
r = v; | |
g = p; | |
b = q; | |
break; | |
} | |
*((Uint32*)surface->pixels + y * surface->w + x) = | |
SDL_MapRGB(surface->format, (int)(r * 255), (int)(g * 255), (int)(b * 255)); | |
} | |
void render(SDL_Surface * surface, int * h_a){ | |
for (int y = 0; y < SIZE; y++){ | |
for (int x = 0; x < SIZE; x++){ | |
plot(surface, x, y, h_a[y * SIZE + x]); | |
} | |
} | |
SDL_UpdateRect(surface, 0, 0, 0, 0); | |
} | |
int main(){ | |
int *h_a; | |
int *d_a; | |
int dimA = SIZE * SIZE; | |
int numThreadsPerBlock = 256; | |
int numBlocks = dimA / numThreadsPerBlock; | |
size_t memSize = numBlocks * numThreadsPerBlock * sizeof(int); | |
h_a = (int*)malloc(memSize); | |
cudaMalloc((void **)&d_a, memSize); | |
for (int i = 0; i < dimA; i++){ | |
h_a[i] = 0; | |
} | |
cudaMemcpy(d_a, h_a, memSize, cudaMemcpyHostToDevice); | |
SDL_Surface * wnd = SDL_SetVideoMode(SIZE, SIZE, 32, SDL_HWSURFACE); | |
SDL_Event evt; | |
while (true){ | |
if (SDL_PollEvent(&evt)){ | |
if (evt.type == SDL_QUIT){ | |
break; | |
} | |
} | |
dim3 dimGrid(numBlocks); | |
dim3 dimBlock(numThreadsPerBlock); | |
/* This rotates us along an ellipse in complex space. Change the | |
* constants here if you want to have a different animation. | |
*/ | |
float cr = (float)cos((float)t / 2 / 180 * M_PI) * 0.11 - 0.12; | |
float ci = (float)sin((float)t / 2 / 180 * M_PI) * 0.10 + 0.74; | |
julia <<< dimGrid, dimBlock >>> (d_a, cr, ci, dimA); | |
cudaThreadSynchronize(); | |
cudaMemcpy(h_a, d_a, memSize, cudaMemcpyDeviceToHost); | |
render(wnd, h_a); | |
t++; | |
if (t == 720) t = 0; | |
SDL_Delay(2); | |
} | |
cudaFree(d_a); | |
free(h_a); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment