Skip to content

Instantly share code, notes, and snippets.

@hamsham
Last active December 23, 2019 15:19
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hamsham/a6fc2fc05972fff6c2f1 to your computer and use it in GitHub Desktop.
Save hamsham/a6fc2fc05972fff6c2f1 to your computer and use it in GitHub Desktop.
A basic ray marcher using CUDA
/*
* A small raymarcher using CUDA
*
* This program renders a dynamically-generated image to a PPM file.
*
* nvcc cuda_raymarcher.cu -arch=sm_20 -o cuda_raymarcher
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <cuda_runtime.h>
#include "cuda_math.h"
#define EPSILON 1.0e-5
#define MAX_STEPS 128
#define MAX_DIST 100.f
#define MIN_DIST EPSILON
#define LIGHT_INTENSITY 1.f
#define CAM_NEAR 0.1f
__device__ unsigned calc_pixel_index()
{
const unsigned blockId = blockIdx.x + blockIdx.y * gridDim.x;
return blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
}
__device__ float2 calc_pixel_pos(const unsigned w, const unsigned h, const unsigned arrayId)
{
const float wf = (float)w;
const float hf = (float)h;
const unsigned x = arrayId / h;
const unsigned y = arrayId - (x * w);
float2 ret = {(float)x, (float)y};
ret.x = 2.f * (ret.x / wf) - 1.f;
ret.y = 2.f * (ret.y / hf) - 1.f;
return ret;
}
__device__ float calc_scene_distance(const float3 pos)
{
// Sphere
//const float radius = 0.5f;
//return length_vec3f(pos)-radius;
// Torus
const float2 torus = {0.35f, 0.1f};
const float dist = length_vec2f(fill2_vec2f(pos.x, pos.z));
const float2 width = fill2_vec2f(dist-torus.x, pos.y);
return length_vec2f(width)-torus.y;
}
__device__ float get_diffuse_intensity(const float3 vertNorm, const float3 lightDir) {
// All color values must be normalized between (0, 1), they cannot be within (-1, 1).
const float brightness = dot_vec3f(vertNorm, lightDir);
return 0.5f * (brightness * LIGHT_INTENSITY) + 0.5f;
}
__device__ float3 calc_scene_normal(const float3 point)
{
const float epsilon = EPSILON;
const float3 px = {point.x-epsilon, point.y, point.z};
const float3 py = {point.x, point.y-epsilon, point.z};
const float3 pz = {point.x, point.y, point.z-epsilon};
const float dist0 = calc_scene_distance(point);
const float3 norm =
{
calc_scene_distance(px) - dist0,
calc_scene_distance(py) - dist0,
calc_scene_distance(pz) - dist0
};
return normalize_vec3f(norm);
}
__device__ float3 shade_scene(const float3 eye, const float3 ray, const float dist)
{
const float3 point = add_vec3f(eye, mul_vec3f(ray, fill_vec3f(dist)));
const float3 norm = calc_scene_normal(point);
const float3 lightPos = {3.f, 3.f, 3.f}; // TODO: kill the magic number
float3 lightDir = normalize_vec3f(sub_vec3f(point, lightPos));
return fill_vec3f(get_diffuse_intensity(norm, lightDir));
}
// Basic Ray-marching equation.
__device__ float raymarch(const float3 origin, const float3 direction)
{
const unsigned maxMarches = MAX_STEPS;
const float maxDist = MAX_DIST;
const float minDist = MIN_DIST;
float dist = 0.f;
unsigned i;
for (i = 0; i < maxMarches; ++i)
{
const float3 point = add_vec3f(origin, mul_vec3f(direction, fill_vec3f(dist)));
const float scene = calc_scene_distance(point);
if (scene < minDist)
{
return dist;
}
dist += scene;
if (dist > maxDist)
{
return maxDist;
}
}
return maxDist;
}
__global__ void render_sphere(const unsigned w, const unsigned h, float3* const pPixels)
{
const unsigned pixelId = calc_pixel_index();
const float2 pixelXY = calc_pixel_pos(w, h, pixelId);
const float3 eye = {0.f, 1.f, -1.f};
const float3 fwd = {0.f, -1.f, 1.f};
const float3 up = {0.f, 1.f, 0.f};
const float3 right = cross_vec3f(fwd, up);
const float3 r = mul_vec3f(right, fill_vec3f(pixelXY.x));
const float3 u = mul_vec3f(up, fill_vec3f(-pixelXY.y));
const float3 ray = normalize_vec3f(add_vec3f(add_vec3f(r, u), fwd));
const float dist = raymarch(eye, ray);
float3* pPixel = NULL;
if (pixelId >= w*h)
{
return;
}
pPixel = &pPixels[pixelId];
if (dist < MAX_DIST)
{
*pPixel = shade_scene(eye, ray, dist);
}
else
{
pPixel->x = 0.1f;
pPixel->y = 0.2f;
pPixel->z = 0.3f;
}
/* pixel values need to remain between (0, 1) for proper conversion to unsigned chars. */
pPixel->x = clampf(pPixel->x, 0.f, 1.f);
pPixel->y = clampf(pPixel->y, 0.f, 1.f);
pPixel->z = clampf(pPixel->z, 0.f, 1.f);
}
__host__ uchar3 float3_to_uchar3(const float3* const pPixel)
{
uchar3 ret;
ret.x = (unsigned char)floorf((pPixel->x * 255.f));
ret.y = (unsigned char)floorf((pPixel->y * 255.f));
ret.z = (unsigned char)floorf((pPixel->z * 255.f));
return ret;
}
__host__ int save_pixels_to_file(const unsigned w, const unsigned h, const float3* const pPixels, const char* const pFilename)
{
int i, j;
FILE* pFile = fopen(pFilename, "wb");
if (!pFile)
{
return 0;
}
fprintf(pFile, "P6\n%u %u\n255\n", w, h);
for (i = 0; i < h; ++i)
{
for(j = 0; j < w; ++j)
{
const uchar3 pixel = float3_to_uchar3(&pPixels[w * j + i]);
const char pixelBuffer[3] = {pixel.x, pixel.y, pixel.z};
fwrite(pixelBuffer, sizeof(char), sizeof(pixelBuffer), pFile);
}
}
fclose(pFile);
return 1;
}
int main()
{
const unsigned w = 1024;
const unsigned h = 1024;
const unsigned numBytes = w * h * sizeof(float3);
float3* hostPixels, *devicePixels;
dim3 gridSize, blockSize;
printf("\nAllocating %u bytes for host pixel storage...", numBytes);
hostPixels = (float3*)malloc(numBytes);
assert(hostPixels != NULL);
memset(hostPixels, 0, numBytes);
printf("Success!\n");
printf("\nAllocating %u bytes for device processing storage...", numBytes);
assert(cudaSuccess == cudaMalloc(&devicePixels, numBytes));
assert(cudaSuccess == cudaMemset(devicePixels, 0, numBytes));
printf("Success!\n");
printf("\nProcessing pixel data on the GPU...");
blockSize.x = 32;
blockSize.y = 32;
blockSize.z = 1;
gridSize.x = (w + blockSize.x-1) / blockSize.x;
gridSize.y = (h + blockSize.y-1) / blockSize.y;
gridSize.z = 1;
render_sphere<<< gridSize, blockSize >>>(w, h, devicePixels);
printf("Done!\n");
printf("\nWaiting for the GPU to finish its thang...");
assert(cudaSuccess == cudaDeviceSynchronize());
printf("Done!\n");
printf("\nCopying pixels from the GPU to the host...");
assert(cudaSuccess == cudaMemcpy(hostPixels, devicePixels, numBytes, cudaMemcpyDeviceToHost));
printf("Done!\n");
printf("\nSaving pixels to a PPM image file...");
assert(0 != save_pixels_to_file(w, h, hostPixels, "cuda_sphere.ppm"));
printf("Done!\n");
printf("\nReleasing host and device memory...");
free(hostPixels);
cudaFree(devicePixels);
printf("Done.\n\n");
return 0;
}
@hamsham
Copy link
Author

hamsham commented Aug 18, 2015

cuda_raymarcher

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment