Skip to content

Instantly share code, notes, and snippets.

@TheBurnDoc
Created October 21, 2013 12:42
Show Gist options
  • Save TheBurnDoc/7083256 to your computer and use it in GitHub Desktop.
Save TheBurnDoc/7083256 to your computer and use it in GitHub Desktop.
A Gist that demonstrates how to set up and call a CUDA C kernel that generates a Julia set fractal.
#include <cstdio>
#include <cassert>
#include <cuda.h>
#include <cuda_profiler_api.h>
#include "juliaset.h"
// Zn + 1 = Zn^2 * C
// Helper function
static __device__ float julia(uint16_t x, uint16_t y, uint16_t w, uint16_t h, float cr, float ci)
{
const int ITERATION_MAX = 200;
const float THRESHOLD = 1000.0f;
const float scale = 1.5;
float jx = scale * (float)(w/2 - x) / (w/2);
float jy = scale * (float)(h/2 - y) / (h/2);
Complex c(cr, ci);
Complex a(jx, jy);
// Iterate function
for (uint32_t i = 0; i < ITERATION_MAX; i++)
{
a = a * a + c;
if (a.magnitude2() > THRESHOLD)
return (float)i / (float)ITERATION_MAX;
}
return 1.f;
}
// Kernel function
static __global__ void kernel(byte_t* pdBuffer, uint16_t w, uint16_t h, float cr, float ci)
{
int x = blockIdx.x;
int y = blockIdx.y;
// Find buffer offset
int offset = x + y * gridDim.x;
// Compute Julia value
float juliaValue = julia(x, y, w, h, cr, ci);
// Save the color
pdBuffer[offset*3 + 2] = (uint8_t)(juliaValue * 255.0f); // R
pdBuffer[offset*3 + 1] = 0; // G
pdBuffer[offset*3 + 0] = 0; // B
}
void generateJuliaSet(byte_t* pBuffer, uint16_t w, uint16_t h, float cr, float ci)
{
assert(pBuffer);
// Compute buffer size in bytes (3 channel 24-bit RGB)
uint32_t bufferSize = w * h * sizeof(byte_t) * 3;
// Allocate device buffer
byte_t* pdBuffer = 0;
CUDA_CALL(cudaMalloc((void**)&pdBuffer, bufferSize));
// Call the kernel
dim3 grid(w, h);
CUDA_CALL(cudaProfilerStart());
kernel<<<grid,1>>>(pdBuffer, w, h, cr, ci);
CUDA_CALL(cudaProfilerStop());
// Copy result to host buffer (and free device memory)
CUDA_CALL(cudaMemcpy(pBuffer, pdBuffer, bufferSize, cudaMemcpyDeviceToHost));
// Clear device buffer
CUDA_CALL(cudaFree((void*)pdBuffer));
}
#pragma once
// CUDA error checking macro (wrap CUDA calls with this)
#define CUDA_CALL(call) { \
cudaError_t err = call; \
if (err != cudaSuccess) \
{ \
fprintf(stderr, "[FATL] %s(%i): %s\n", __FILE__, __LINE__, cudaGetErrorString(err)); \
exit(EXIT_FAILURE); \
} }
// Complex number data structure (host)
struct Complex
{
float r, i;
inline __device__ Complex(float a, float b) : r(a), i(b) {}
inline __device__ Complex(const Complex& c) : r(c.r), i(c.i) {}
inline __device__ float magnitude2() { return r * r + i * i; }
inline __device__ float magnitude() { return sqrt(this->magnitude2()); }
inline __device__ Complex operator * (const Complex& a) { return Complex(r*a.r - i*a.i, i*a.r + r*a.i); }
inline __device__ Complex operator + (const Complex& a) { return Complex(r+a.r, i+a.i); }
};
// Function which sets up and calls CUDA kernel
void generateJuliaSet(byte_t* pBuffer, uint16_t w, uint16_t h, float cr, float ci);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment