Last active
December 23, 2019 15:19
-
-
Save hamsham/a6fc2fc05972fff6c2f1 to your computer and use it in GitHub Desktop.
A basic ray marcher using CUDA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* A small raymarcher using CUDA | |
* | |
* This program renders a dynamically-generated image to a PPM file. | |
* | |
* nvcc cuda_raymarcher.cu -arch=sm_20 -o cuda_raymarcher | |
*/ | |
#include <assert.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <limits.h> | |
#include <cuda_runtime.h> | |
#include "cuda_math.h" | |
#define EPSILON 1.0e-5 | |
#define MAX_STEPS 128 | |
#define MAX_DIST 100.f | |
#define MIN_DIST EPSILON | |
#define LIGHT_INTENSITY 1.f | |
#define CAM_NEAR 0.1f | |
__device__ unsigned calc_pixel_index() | |
{ | |
const unsigned blockId = blockIdx.x + blockIdx.y * gridDim.x; | |
return blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x; | |
} | |
__device__ float2 calc_pixel_pos(const unsigned w, const unsigned h, const unsigned arrayId) | |
{ | |
const float wf = (float)w; | |
const float hf = (float)h; | |
const unsigned x = arrayId / h; | |
const unsigned y = arrayId - (x * w); | |
float2 ret = {(float)x, (float)y}; | |
ret.x = 2.f * (ret.x / wf) - 1.f; | |
ret.y = 2.f * (ret.y / hf) - 1.f; | |
return ret; | |
} | |
__device__ float calc_scene_distance(const float3 pos) | |
{ | |
// Sphere | |
//const float radius = 0.5f; | |
//return length_vec3f(pos)-radius; | |
// Torus | |
const float2 torus = {0.35f, 0.1f}; | |
const float dist = length_vec2f(fill2_vec2f(pos.x, pos.z)); | |
const float2 width = fill2_vec2f(dist-torus.x, pos.y); | |
return length_vec2f(width)-torus.y; | |
} | |
__device__ float get_diffuse_intensity(const float3 vertNorm, const float3 lightDir) { | |
// All color values must be normalized between (0, 1), they cannot be within (-1, 1). | |
const float brightness = dot_vec3f(vertNorm, lightDir); | |
return 0.5f * (brightness * LIGHT_INTENSITY) + 0.5f; | |
} | |
__device__ float3 calc_scene_normal(const float3 point) | |
{ | |
const float epsilon = EPSILON; | |
const float3 px = {point.x-epsilon, point.y, point.z}; | |
const float3 py = {point.x, point.y-epsilon, point.z}; | |
const float3 pz = {point.x, point.y, point.z-epsilon}; | |
const float dist0 = calc_scene_distance(point); | |
const float3 norm = | |
{ | |
calc_scene_distance(px) - dist0, | |
calc_scene_distance(py) - dist0, | |
calc_scene_distance(pz) - dist0 | |
}; | |
return normalize_vec3f(norm); | |
} | |
__device__ float3 shade_scene(const float3 eye, const float3 ray, const float dist) | |
{ | |
const float3 point = add_vec3f(eye, mul_vec3f(ray, fill_vec3f(dist))); | |
const float3 norm = calc_scene_normal(point); | |
const float3 lightPos = {3.f, 3.f, 3.f}; // TODO: kill the magic number | |
float3 lightDir = normalize_vec3f(sub_vec3f(point, lightPos)); | |
return fill_vec3f(get_diffuse_intensity(norm, lightDir)); | |
} | |
// Basic Ray-marching equation. | |
__device__ float raymarch(const float3 origin, const float3 direction) | |
{ | |
const unsigned maxMarches = MAX_STEPS; | |
const float maxDist = MAX_DIST; | |
const float minDist = MIN_DIST; | |
float dist = 0.f; | |
unsigned i; | |
for (i = 0; i < maxMarches; ++i) | |
{ | |
const float3 point = add_vec3f(origin, mul_vec3f(direction, fill_vec3f(dist))); | |
const float scene = calc_scene_distance(point); | |
if (scene < minDist) | |
{ | |
return dist; | |
} | |
dist += scene; | |
if (dist > maxDist) | |
{ | |
return maxDist; | |
} | |
} | |
return maxDist; | |
} | |
__global__ void render_sphere(const unsigned w, const unsigned h, float3* const pPixels) | |
{ | |
const unsigned pixelId = calc_pixel_index(); | |
const float2 pixelXY = calc_pixel_pos(w, h, pixelId); | |
const float3 eye = {0.f, 1.f, -1.f}; | |
const float3 fwd = {0.f, -1.f, 1.f}; | |
const float3 up = {0.f, 1.f, 0.f}; | |
const float3 right = cross_vec3f(fwd, up); | |
const float3 r = mul_vec3f(right, fill_vec3f(pixelXY.x)); | |
const float3 u = mul_vec3f(up, fill_vec3f(-pixelXY.y)); | |
const float3 ray = normalize_vec3f(add_vec3f(add_vec3f(r, u), fwd)); | |
const float dist = raymarch(eye, ray); | |
float3* pPixel = NULL; | |
if (pixelId >= w*h) | |
{ | |
return; | |
} | |
pPixel = &pPixels[pixelId]; | |
if (dist < MAX_DIST) | |
{ | |
*pPixel = shade_scene(eye, ray, dist); | |
} | |
else | |
{ | |
pPixel->x = 0.1f; | |
pPixel->y = 0.2f; | |
pPixel->z = 0.3f; | |
} | |
/* pixel values need to remain between (0, 1) for proper conversion to unsigned chars. */ | |
pPixel->x = clampf(pPixel->x, 0.f, 1.f); | |
pPixel->y = clampf(pPixel->y, 0.f, 1.f); | |
pPixel->z = clampf(pPixel->z, 0.f, 1.f); | |
} | |
__host__ uchar3 float3_to_uchar3(const float3* const pPixel) | |
{ | |
uchar3 ret; | |
ret.x = (unsigned char)floorf((pPixel->x * 255.f)); | |
ret.y = (unsigned char)floorf((pPixel->y * 255.f)); | |
ret.z = (unsigned char)floorf((pPixel->z * 255.f)); | |
return ret; | |
} | |
__host__ int save_pixels_to_file(const unsigned w, const unsigned h, const float3* const pPixels, const char* const pFilename) | |
{ | |
int i, j; | |
FILE* pFile = fopen(pFilename, "wb"); | |
if (!pFile) | |
{ | |
return 0; | |
} | |
fprintf(pFile, "P6\n%u %u\n255\n", w, h); | |
for (i = 0; i < h; ++i) | |
{ | |
for(j = 0; j < w; ++j) | |
{ | |
const uchar3 pixel = float3_to_uchar3(&pPixels[w * j + i]); | |
const char pixelBuffer[3] = {pixel.x, pixel.y, pixel.z}; | |
fwrite(pixelBuffer, sizeof(char), sizeof(pixelBuffer), pFile); | |
} | |
} | |
fclose(pFile); | |
return 1; | |
} | |
int main() | |
{ | |
const unsigned w = 1024; | |
const unsigned h = 1024; | |
const unsigned numBytes = w * h * sizeof(float3); | |
float3* hostPixels, *devicePixels; | |
dim3 gridSize, blockSize; | |
printf("\nAllocating %u bytes for host pixel storage...", numBytes); | |
hostPixels = (float3*)malloc(numBytes); | |
assert(hostPixels != NULL); | |
memset(hostPixels, 0, numBytes); | |
printf("Success!\n"); | |
printf("\nAllocating %u bytes for device processing storage...", numBytes); | |
assert(cudaSuccess == cudaMalloc(&devicePixels, numBytes)); | |
assert(cudaSuccess == cudaMemset(devicePixels, 0, numBytes)); | |
printf("Success!\n"); | |
printf("\nProcessing pixel data on the GPU..."); | |
blockSize.x = 32; | |
blockSize.y = 32; | |
blockSize.z = 1; | |
gridSize.x = (w + blockSize.x-1) / blockSize.x; | |
gridSize.y = (h + blockSize.y-1) / blockSize.y; | |
gridSize.z = 1; | |
render_sphere<<< gridSize, blockSize >>>(w, h, devicePixels); | |
printf("Done!\n"); | |
printf("\nWaiting for the GPU to finish its thang..."); | |
assert(cudaSuccess == cudaDeviceSynchronize()); | |
printf("Done!\n"); | |
printf("\nCopying pixels from the GPU to the host..."); | |
assert(cudaSuccess == cudaMemcpy(hostPixels, devicePixels, numBytes, cudaMemcpyDeviceToHost)); | |
printf("Done!\n"); | |
printf("\nSaving pixels to a PPM image file..."); | |
assert(0 != save_pixels_to_file(w, h, hostPixels, "cuda_sphere.ppm")); | |
printf("Done!\n"); | |
printf("\nReleasing host and device memory..."); | |
free(hostPixels); | |
cudaFree(devicePixels); | |
printf("Done.\n\n"); | |
return 0; | |
} |
Author
hamsham
commented
Aug 18, 2015
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment