Skip to content

Instantly share code, notes, and snippets.

@BrentFarris
Last active October 16, 2023 15:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BrentFarris/062e974457932c49f19e901805e632fd to your computer and use it in GitHub Desktop.
Save BrentFarris/062e974457932c49f19e901805e632fd to your computer and use it in GitHub Desktop.
ffmpeg -r 30 -f image2 -c:v ppm -s 640x480 -start_number 0 -i frame-%%d.ppm -vcodec libx264 -crf 25 -pix_fmt yuv420p baked.mp4
#include "cuda_runtime.h"
#include "curand_kernel.h"
#include "sm_20_atomic_functions.h"
#include "device_launch_parameters.h"
#define _CRT_SECURE_NO_DEPRECATE
#define _USE_MATH_DEFINES
#define _USE_MATH_DEFINES
#include <math.h>
#include <time.h>
#include <float.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include <stdbool.h>
//#define WIDTH 200
//#define HEIGHT 100
//#define WIDTH 256
//#define HEIGHT 240
//#define WIDTH 320
//#define HEIGHT 240
//#define WIDTH 640
//#define HEIGHT 480
#define WIDTH 720
#define HEIGHT 480
//#define WIDTH 858
//#define HEIGHT 480
//#define WIDTH 1920
//#define HEIGHT 1080
#define PIXEL_DIM_SPLIT 2
#define PIXEL_SIZE sizeof(struct v3) * WIDTH * HEIGHT
#define SAMPLES 16
#define MAX_DEPTH 50
#define RAND_COUNT_THREAD 8 * MAX_DEPTH
#define RANDS_COUNT RAND_COUNT_THREAD * SAMPLES * PIXEL_DIM_SPLIT
#define RANDS_SIZE sizeof(float) * RANDS_COUNT
#define MEM_LENGTH WIDTH * HEIGHT * 3
#define SPHERE_COUNT 175
#define PPM_HEADER_FORMAT "P3\n%d %d\n255\n"
#define FRAMES 120
#ifdef __cplusplus
extern "C" {
#endif
/************************************************************************/
/************************************************************************/
/* MTwister */
/************************************************************************/
/************************************************************************/
// https://github.com/ESultanik/mtwister
/* An implementation of the MT19937 Algorithm for the Mersenne Twister
* by Evan Sultanik. Based upon the pseudocode in: M. Matsumoto and
* T. Nishimura, "Mersenne Twister: A 623-dimensionally
* equidistributed uniform pseudorandom number generator," ACM
* Transactions on Modeling and Computer Simulation Vol. 8, No. 1,
* January pp.3-30 1998.
*
* http://www.sultanik.com/Mersenne_twister
*/
#define STATE_VECTOR_LENGTH 624
#define STATE_VECTOR_SIZE sizeof(uint64_t) * STATE_VECTOR_LENGTH
#define STATE_VECTOR_M 397 /* changes to STATE_VECTOR_LENGTH also require changes to this */
#define UPPER_MASK 0x80000000
#define LOWER_MASK 0x7fffffff
#define TEMPERING_MASK_B 0x9d2c5680
#define TEMPERING_MASK_C 0xefc60000
typedef struct tagMTRand {
uint64_t* mt;
int32_t index;
} MTRand;
__device__ __host__ static void m_seedRand(MTRand* rnd, uint64_t seed)
{
/* set initial seeds to mt[STATE_VECTOR_LENGTH] using the generator
* from Line 25 of Table 1 in: Donald Knuth, "The Art of Computer
* Programming," Vol. 2 (2nd Ed.) pp.102.
*/
rnd->mt = (uint64_t*)malloc(STATE_VECTOR_SIZE);
rnd->mt[0] = seed & 0xffffffff;
for (rnd->index = 1; rnd->index < STATE_VECTOR_LENGTH; rnd->index++)
rnd->mt[rnd->index] = (6069 * rnd->mt[rnd->index - 1]) & 0xffffffff;
}
/**
* Creates a new random number generator from a given seed.
*/
__device__ __host__ MTRand seed_rand(uint64_t seed)
{
MTRand rnd;
m_seedRand(&rnd, seed);
return rnd;
}
__device__ __host__ void mt_rand_free(MTRand* rnd)
{
free(rnd->mt);
}
MTRand make_rand_seed()
{
time_t now;
time(&now);
return seed_rand(now);
}
/**
* Generates a pseudo-randomly generated long.
*/
__device__ __host__ uint64_t gen_rand_long(MTRand* rnd)
{
uint64_t y;
static uint64_t mag[2] = { 0x0, 0x9908b0df }; /* mag[x] = x * 0x9908b0df for x = 0,1 */
if (rnd->index >= STATE_VECTOR_LENGTH || rnd->index < 0)
{
/* generate STATE_VECTOR_LENGTH words at a time */
int32_t kk;
if (rnd->index >= STATE_VECTOR_LENGTH + 1 || rnd->index < 0)
{
m_seedRand(rnd, 4357);
}
for (kk = 0; kk < STATE_VECTOR_LENGTH - STATE_VECTOR_M; kk++)
{
y = (rnd->mt[kk] & UPPER_MASK) | (rnd->mt[kk + 1] & LOWER_MASK);
rnd->mt[kk] = rnd->mt[kk + STATE_VECTOR_M] ^ (y >> 1) ^ mag[y & 0x1];
}
for (; kk < STATE_VECTOR_LENGTH - 1; kk++)
{
y = (rnd->mt[kk] & UPPER_MASK) | (rnd->mt[kk + 1] & LOWER_MASK);
rnd->mt[kk] = rnd->mt[kk + (STATE_VECTOR_M - STATE_VECTOR_LENGTH)] ^ (y >> 1) ^ mag[y & 0x1];
}
y = (rnd->mt[STATE_VECTOR_LENGTH - 1] & UPPER_MASK) | (rnd->mt[0] & LOWER_MASK);
rnd->mt[STATE_VECTOR_LENGTH - 1] = rnd->mt[STATE_VECTOR_M - 1] ^ (y >> 1) ^ mag[y & 0x1];
rnd->index = 0;
}
y = rnd->mt[rnd->index++];
y ^= (y >> 11);
y ^= (y << 7) & TEMPERING_MASK_B;
y ^= (y << 15) & TEMPERING_MASK_C;
y ^= (y >> 18);
return y;
}
/**
* Generates a pseudo-randomly generated double in the range [0..1].
*/
__device__ __host__ double gen_rand(MTRand* rnd)
{
double val = 0.0 + gen_rand_long(rnd);
return(val / (double)0xffffffff);
}
__device__ __host__ int32_t rand_int32(MTRand* rnd, int32_t min, int32_t max)
{
uint64_t r = gen_rand_long(rnd);
int32_t q = r % INT32_MAX;
if (min < 0)
{
if (r <= UINT64_MAX / 2)
q *= -1;
}
return q % (max - min) + min;
}
__device__ __host__ float rand_float(MTRand* rnd, float min, float max)
{
return (float)gen_rand(rnd) * (max - min) + min;
}
/************************************************************************/
/************************************************************************/
/* V3 */
/************************************************************************/
/************************************************************************/
typedef struct v3 {
union {
float x;
float r;
};
union {
float y;
float g;
};
union {
float z;
float b;
};
} v3;
__device__ __host__ static v3 v3_zero()
{
v3 v;
v.x = 0.0F;
v.y = 0.0F;
v.z = 0.0F;
return v;
}
__device__ __host__ static v3 v3_one()
{
v3 v;
v.x = 1.0F;
v.y = 1.0F;
v.z = 1.0F;
return v;
}
__device__ __host__ static v3 v3_up()
{
v3 v;
v.x = 0.0F;
v.y = 1.0F;
v.z = 0.0F;
return v;
}
__device__ __host__ static v3 v3_get(const float x, const float y, const float z)
{
v3 v;
v.x = x;
v.y = y;
v.z = z;
return v;
}
__device__ __host__ void v3_load(const v3 from, v3* to)
{
to->x = from.x;
to->y = from.y;
to->z = from.z;
}
__device__ __host__ void v3_copy(const v3 from, v3* to)
{
to->x = from.x;
to->y = from.y;
to->z = from.z;
}
__device__ __host__ void v3_add(v3* lhs, const v3 rhs)
{
lhs->x += rhs.x;
lhs->y += rhs.y;
lhs->z += rhs.z;
}
__device__ __host__ void v3_subtract(v3* lhs, const v3 rhs)
{
lhs->x -= rhs.x;
lhs->y -= rhs.y;
lhs->z -= rhs.z;
}
__device__ __host__ void v3_multiply(v3* lhs, const v3 rhs)
{
lhs->x *= rhs.x;
lhs->y *= rhs.y;
lhs->z *= rhs.z;
}
__device__ __host__ void v3_divide(v3* lhs, const v3 rhs)
{
lhs->x /= rhs.x;
lhs->y /= rhs.y;
lhs->z /= rhs.z;
}
__device__ __host__ void v3_add_using(v3* lhs, const float x, const float y, const float z)
{
lhs->x += x;
lhs->y += y;
lhs->z += z;
}
__device__ __host__ void v3_subtract_using(v3* lhs, const float x, const float y, const float z)
{
lhs->x -= x;
lhs->y -= y;
lhs->z -= z;
}
__device__ __host__ v3 v3_add_to(const v3 a, const v3 b)
{
v3 v;
v.x = a.x + b.x;
v.y = a.y + b.y;
v.z = a.z + b.z;
return v;
}
__device__ __host__ v3 v3_subtract_to(const v3 a, const v3 b)
{
v3 v;
v.x = a.x - b.x;
v.y = a.y - b.y;
v.z = a.z - b.z;
return v;
}
__device__ __host__ void v3_scale(v3* from, const float scale)
{
from->x *= scale;
from->y *= scale;
from->z *= scale;
}
__device__ __host__ v3 v3_scale_from(const v3 vector, const float scale)
{
v3 v;
v.x = vector.x * scale;
v.y = vector.y * scale;
v.z = vector.z * scale;
return v;
}
__device__ __host__ v3 v3_scale_to(const v3 vector, const float scale)
{
v3 v;
v.x = vector.x * scale;
v.y = vector.y * scale;
v.z = vector.z * scale;
return v;
}
__device__ __host__ float v3_magnitude(const v3 vector)
{
return sqrtf((vector.x * vector.x) + (vector.y * vector.y) + (vector.z * vector.z));
}
__device__ __host__ float v3_distance(const v3 a, const v3 b)
{
return v3_magnitude(v3_subtract_to(a, b));
}
__device__ __host__ void v3_normalize(v3* vector)
{
float mag = v3_magnitude(*vector);
vector->x /= mag;
vector->y /= mag;
vector->z /= mag;
}
__device__ __host__ v3 v3_normalized(const v3 vector)
{
float mag = v3_magnitude(vector);
if (mag != 0)
{
v3 copy = vector;
copy.x /= mag;
copy.y /= mag;
copy.z /= mag;
return copy;
}
return v3_zero();
}
__device__ __host__ float v3_dot(const v3 a, const v3 b)
{
return (a.x * b.x) + (a.y * b.y) + (a.z * b.z);
}
__device__ __host__ float v3_dot_from(const v3 a, const v3 b)
{
return (a.x * b.x) + (a.y * b.y) + (a.z * b.z);
}
__device__ __host__ v3 v3_cross(const v3 a, const v3 b)
{
v3 cross;
cross.x = (a.y * b.z) - (a.z * b.y);
cross.y = (a.z * b.x) - (a.x * b.z);
cross.z = (a.x * b.y) - (a.y * b.x);
return cross;
}
__device__ __host__ v3 v3_unit(const v3 vec)
{
v3 out = vec;
float len = 1.0F / v3_magnitude(vec);
v3_scale(&out, len);
return out;
}
/************************************************************************/
/************************************************************************/
/* Ray */
/************************************************************************/
/************************************************************************/
typedef struct Ray {
v3 ori;
v3 dir;
} Ray;
__device__ __host__ static Ray ray_get(const v3 origin, const v3 direction)
{
Ray r;
r.ori = origin;
r.dir = direction;
return r;
}
__device__ __host__ static v3 ray_origin(const Ray* ray)
{
return ray->ori;
}
__device__ __host__ static v3 ray_direction(const Ray* ray)
{
return ray->dir;
}
__device__ __host__ v3 ray_point(const Ray* ray, const float len)
{
/*--- origin + length * direction -------------------------------------*/
v3 p = ray->dir;
v3_scale(&p, len);
v3_add(&p, ray->ori);
return p;
}
/************************************************************************/
/************************************************************************/
/* Communication structures */
/************************************************************************/
/************************************************************************/
struct ColorPick {
v3 color;
Ray scatter;
uint16_t depth;
bool done;
};
struct DeviceRands {
const float* nums;
size_t idx;
};
/************************************************************************/
/************************************************************************/
/* Camera */
/************************************************************************/
/************************************************************************/
struct Camera {
v3 origin;
v3 lowerLeft;
v3 horizontal;
v3 vertical;
v3 u;
v3 v;
v3 w;
float lensRadius;
};
void Camera_set_view(struct Camera* cam, const v3 origin, const v3 lookAt, const v3 up,
const float fov, const float aspect, const float focalDistance)
{
const float theta = (float)(fov * M_PI / 180.0F);
const float halfHeight = tanf(theta / 2.0F);
const float halfWidth = aspect * halfHeight;
cam->origin = origin;
cam->w = v3_unit(v3_subtract_to(origin, lookAt));
cam->u = v3_unit(v3_cross(up, cam->w));
cam->v = v3_cross(cam->w, cam->u);
//cam->lowerLeft = v3_get(-halfWidth, -halfHeight, -1.0F);
cam->lowerLeft = cam->origin;
v3_subtract(&cam->lowerLeft, v3_scale_to(cam->u, halfWidth * focalDistance));
v3_subtract(&cam->lowerLeft, v3_scale_to(cam->v, halfHeight * focalDistance));
v3_subtract(&cam->lowerLeft, v3_scale_to(cam->w, focalDistance));
cam->horizontal = cam->u;
v3_scale(&cam->horizontal, halfWidth * 2.0F * focalDistance);
cam->vertical = cam->v;
v3_scale(&cam->vertical, halfHeight * 2.0F * focalDistance);
}
struct Camera* Camera_new(const v3 origin, const v3 lookAt, const v3 up, const float fov, const float aspect,
const float aperture, const float focalDistance)
{
struct Camera* cam = (struct Camera*)calloc(1, sizeof(struct Camera));
cam->lensRadius = aperture / 2.0F;
Camera_set_view(cam, origin, lookAt, up, fov, aspect, focalDistance);
return cam;
}
void Camera_free(struct Camera* cam)
{
free(cam);
}
__device__ static v3 local_random_unit_disk(struct DeviceRands* rng)
{
v3 p = v3_get(rng->nums[rng->idx++], rng->nums[rng->idx++], 0.0F);
v3_scale(&p, 2.0F);
v3_subtract(&p, v3_get(1.0F, 1.0F, 0.0F));
if (v3_dot(p, p) >= 1.0F)
v3_scale(&p, 0.5F);
return p;
}
__device__ Ray Camera_ray(struct Camera* cam, const float u, const float v, struct DeviceRands* rng)
{
v3 r = v3_scale_to(local_random_unit_disk(rng), cam->lensRadius);
v3 offset = v3_scale_to(cam->u, r.x);
v3_add(&offset, v3_scale_to(cam->v, r.y));
v3 dir = cam->lowerLeft;
v3_add(&dir, v3_scale_to(cam->horizontal, u));
v3_add(&dir, v3_scale_to(cam->vertical, v));
v3 oo = v3_add_to(cam->origin, offset);
v3_subtract(&dir, oo);
return ray_get(oo, dir);
}
/************************************************************************/
/************************************************************************/
/* Hit */
/************************************************************************/
/************************************************************************/
struct Material;
typedef struct Hit {
v3 p; // Point
v3 nml; // Normal
float t; // Time (for like motion blur or w/e)
const struct Material* mat;
} Hit;
/************************************************************************/
/************************************************************************/
/* Material */
/************************************************************************/
/************************************************************************/
struct Material {
v3 albedo;
float fuzz;
float refractIdx;
int32_t scatterType;
};
__device__ static v3 rnd_unit_sphere_point(struct DeviceRands* rng)
{
v3 p = v3_get(rng->nums[rng->idx++], rng->nums[rng->idx++], rng->nums[rng->idx++]);
if (v3_magnitude(p) > 0.5F)
v3_scale(&p, 0.5F);
return p;
}
__device__ static float schlick(const float cosine, const float refractIdx)
{
// https://en.wikipedia.org/wiki/Schlick%27s_approximation
float r0 = (1 - refractIdx) / (1 + refractIdx);
r0 = r0 * r0;
return r0 + (1.0F - r0) * powf((1.0F - cosine), 5.0f);
}
__device__ static v3 reflect(const v3 v, const v3 n)
{
const float dot = v3_dot(v, n);
v3 r = v3_scale_to(n, dot * 2.0F);
return v3_subtract_to(v, r);
}
__device__ static bool refract(const v3 v, const v3 n, const float niOverNt, v3* outRefracted)
{
v3 uv = v3_unit(v);
const float dt = v3_dot(uv, n);
const float discriminant = 1.0F - niOverNt * niOverNt * (1.0F - dt * dt);
if (discriminant > 0.0F)
{
/*--- niOverNt * (uv - n * dt) -----------------------------------------*/
v3 l = v3_scale_to(v3_subtract_to(uv, v3_scale_to(n, dt)), niOverNt);
/*--- n * sqrt(discriminant) -------------------------------------------*/
v3 r = v3_scale_to(n, sqrtf(discriminant));
*outRefracted = v3_subtract_to(l, r);
return true;
}
return false;
}
__device__ bool mat_scatter_lambert(const Hit* hit, v3* outAttenuation, Ray* outRay, struct DeviceRands* rng)
{
// TODO: Possibly can scatter with some probability and then attenuation could be albedo/probability
v3 target = v3_add_to(hit->p, hit->nml);
v3_add(&target, rnd_unit_sphere_point(rng));
*outRay = ray_get(hit->p, v3_subtract_to(target, hit->p));
*outAttenuation = hit->mat->albedo;
return true;
}
__device__ bool mat_scatter_metal(const Hit* hit, const Ray* ray, v3* outAttenuation, Ray* outRay, struct DeviceRands* rng)
{
v3 reflected = reflect(v3_unit(ray_direction(ray)), hit->nml);
v3 dir = v3_scale_to(rnd_unit_sphere_point(rng), hit->mat->fuzz);
v3_add(&dir, reflected);
*outRay = ray_get(hit->p, dir);
*outAttenuation = hit->mat->albedo;
return v3_dot(ray_direction(outRay), hit->nml) > 0.0F;
}
__device__ bool mat_scatter_dielectric(const Hit* hit, const Ray* ray, v3* outAttenuation, Ray* outRay, struct DeviceRands* rng)
{
v3 oNorm;
float niOverNt;
float reflectProb;
float cosine;
/*--- This const value because glass doesn't absorb --------------------*/
*outAttenuation = v3_get(1.0F, 1.0F, 1.0F);
if (v3_dot(ray_direction(ray), hit->nml) > 0.0F)
{
oNorm = v3_scale_to(hit->nml, -1.0F);
niOverNt = hit->mat->refractIdx;
//cosine = mat->refractIdx * v3_dot(ray_direction(ray), hit->nml) / v3_magnitude(ray_direction(ray));
cosine = v3_dot(ray_direction(ray), hit->nml) / v3_magnitude(ray_direction(ray));
cosine = sqrtf(1.0F - hit->mat->refractIdx * hit->mat->refractIdx * (1.0F - cosine * cosine));
}
else
{
oNorm = hit->nml;
niOverNt = 1.0F / hit->mat->refractIdx;
cosine = -v3_dot(ray_direction(ray), hit->nml) / v3_magnitude(ray_direction(ray));
}
v3 refracted;
v3 reflected = reflect(ray_direction(ray), hit->nml);
if (refract(ray_direction(ray), oNorm, niOverNt, &refracted))
reflectProb = schlick(cosine, hit->mat->refractIdx);
else
reflectProb = 1.0F;
if (rng->nums[rng->idx++] < reflectProb)
*outRay = ray_get(hit->p, reflected);
else
*outRay = ray_get(hit->p, refracted);
return true;
}
struct Material mat_lambert(const v3 albedo)
{
struct Material m;
m.albedo = albedo;
m.scatterType = 1;
return m;
}
struct Material mat_metal(const v3 albedo, const float fuzz)
{
struct Material m;
m.albedo = albedo;
m.fuzz = fuzz;
m.scatterType = 2;
return m;
}
struct Material mat_glass(const float refractIndex)
{
struct Material m;
m.refractIdx = refractIndex;
m.scatterType = 3;
return m;
}
/************************************************************************/
/************************************************************************/
/* Sphere */
/************************************************************************/
/************************************************************************/
struct Sphere {
v3 center;
float radius;
struct Material mat;
};
struct Sphere sphere_get(const v3 center, const float radius, const struct Material mat)
{
struct Sphere s;
s.center = center;
s.radius = radius;
s.mat = mat;
return s;
}
__device__ bool sphere_hit(const struct Sphere* sphere, const Ray* ray, const float tMin, float tMax, Hit* outHit)
{
v3 oc = v3_subtract_to(ray_origin(ray), sphere->center);
float a = v3_dot(ray_direction(ray), ray_direction(ray));
float b = v3_dot(oc, ray_direction(ray));
float c = v3_dot(oc, oc) - sphere->radius * sphere->radius;
float discriminant = b * b - a * c;
if (discriminant > 0.0F)
{
float t = 0.0F;
float t0 = (-b - sqrtf(discriminant)) / a;
float t1 = (-b + sqrtf(discriminant)) / a;
if (t0 < tMax && t0 > tMin)
t = t0;
else if (t1 < tMax && t1 > tMin)
t = t1;
else
return false;
outHit->t = t;
outHit->p = ray_point(ray, t);
outHit->nml = outHit->p;
v3_subtract(&outHit->nml, sphere->center);
v3_scale(&outHit->nml, 1.0F / sphere->radius);
outHit->mat = &sphere->mat;
return true;
}
return false;
}
#ifdef __cplusplus
}
#endif
/************************************************************************/
/************************************************************************/
/* Global */
/************************************************************************/
/************************************************************************/
__device__ static void color(Ray* ray, struct Sphere* spheres, const size_t sphereCount, struct DeviceRands* rng, struct ColorPick* pick)
{
Hit hit;
hit.t = FLT_MAX;
for (size_t i = 0; i < sphereCount; ++i)
{
Hit check;
check.t = FLT_MAX;
sphere_hit(spheres + i, ray, 0.001F, FLT_MAX, &check);
/*--- Near 0 should be ignored, thus 0.001F ----------------------------*/
if (check.t < hit.t)
hit = check;
}
if (hit.t < FLT_MAX)
{
Ray scattered;
v3 attenuation;
bool valid = false;
switch (hit.mat->scatterType)
{
case 1:
valid = mat_scatter_lambert(&hit, &attenuation, &scattered, rng);
break;
case 2:
valid = mat_scatter_metal(&hit, ray, &attenuation, &scattered, rng);
break;
case 3:
valid = mat_scatter_dielectric(&hit, ray, &attenuation, &scattered, rng);
break;
}
if (valid)
{
pick->scatter = scattered;
v3_multiply(&pick->color, attenuation);
return;
}
else
{
pick->color = v3_zero();
pick->done = true;
return;
}
}
// 1 = blue & 0 = white
v3 unit = v3_unit(ray_direction(ray)); // -1 < y < 1
float t = 0.5F * (unit.y + 1.0F); // Scale above to 0 < y < 1
/*--- ((1.0 - t) * <1.0,1.0,1.0>) + (t * <0.5,0.7,0.1>) ----------------*/
v3 start = v3_one();
v3 end = v3_get(0.5F, 0.7F, 1.0F);
v3_scale(&end, t);
v3_scale(&start, 1.0F - t);
v3_add(&start, end);
v3_multiply(&pick->color, start);
pick->done = true;
}
static void write_image(const uint8_t* buff, const size_t len, const int32_t width, const int32_t height, const int32_t frameIdx)
{
// TODO: The buf should be from float[] to uint8_t[]
assert(len > 0);
assert(width > 0);
assert(height > 0);
assert(len % 3 == 0);
char name[128];
snprintf(name, 128, "frame-%d.ppm", frameIdx);
FILE* fp = fopen(name, "w");
if (fp == NULL)
return;
char header[sizeof(PPM_HEADER_FORMAT) + 20];
snprintf(header, sizeof(header), PPM_HEADER_FORMAT, width, height);
fwrite(header, strlen(header), 1, fp);
char rgb[4];
const size_t colCount = (size_t)width * 3;
for (size_t i = 0; i < len;)
{
for (size_t c = 0; c < colCount; ++c, ++i)
{
_itoa(buff[i], rgb, 10);
fwrite(rgb, strlen(rgb), 1, fp);
fwrite(" ", 1, 1, fp);
}
fwrite("\n", 1, 1, fp);
}
fclose(fp);
}
static uint8_t fltoui8(const float val)
{
return (uint8_t)(val * 255.99F);
}
static struct Sphere* get_spheres(const size_t count, size_t* outCount)
{
MTRand r = make_rand_seed();
MTRand* rnd = &r;
struct Sphere* spheres = (struct Sphere*)malloc(sizeof(struct Sphere) * (count + 1));
size_t i = 0;
spheres[i++] = sphere_get(v3_get(0.0F, -1000.0F, 0.0F), 1000.0F, mat_lambert(v3_get(0.5F, 0.5F, 0.5F)));
const v3 m = v3_get(4.0F, 0.2F, 0.0F);
for (int32_t a = -5; a < 5; ++a)
{
for (int32_t b = -5; b < 5; ++b)
{
v3 center = v3_get(a + 0.9F * rand_float(rnd, 0.0F, 1.0F), 0.2F, b + 0.9F * rand_float(rnd, 0.0F, 1.0F));
if (v3_magnitude(v3_subtract_to(center, m)) > 0.9F)
{
float matChoice = rand_float(rnd, 0.0F, 1.0F);
if (matChoice < 0.8F)
spheres[i++] = sphere_get(center, 0.2F, mat_lambert(v3_get(rand_float(rnd, 0.0F, 1.0F), rand_float(rnd, 0.0F, 1.0F), rand_float(rnd, 0.0F, 1.0F))));
else if (matChoice < 0.95F)
spheres[i++] = sphere_get(center, 0.2F, mat_metal(v3_get((1 + rand_float(rnd, 0.0F, 1.0F)) * 0.5F, (1 + rand_float(rnd, 0.0F, 1.0F)) * 0.5F, (1 + rand_float(rnd, 0.0F, 1.0F)) * 0.5F), rand_float(rnd, 0.0F, 1.0F) * 0.5F));
else
spheres[i++] = sphere_get(center, 0.2F, mat_glass(1.5F));
}
if (i == count - 3)
break;
}
if (i == count - 3)
break;
}
spheres[i++] = sphere_get(v3_get(0.0F, 1.0F, 0.0F), 1.0F, mat_glass(1.5F));
spheres[i++] = sphere_get(v3_get(-4.0F, 1.0F, 0.0F), 1.0F, mat_lambert(v3_get(0.4F, 0.2F, 0.1F)));
spheres[i++] = sphere_get(v3_get(4.0F, 1.0F, 0.0F), 1.0F, mat_metal(v3_get(0.7F, 0.6F, 0.5F), 0.0F));
*outCount = i;
mt_rand_free(rnd);
return spheres;
}
__constant__ struct Sphere cuda_Spheres[500];
__global__ void dummy(struct Camera* cam, struct Sphere* spheres, size_t sphereCount, struct v3* rgb, const float* dRands)
{
//extern __shared__ v3 samples[];
int threadsPerBlock = blockDim.x * blockDim.y * blockDim.z;
int blocksPerGrid = gridDim.x * gridDim.y * gridDim.z;
int threadPositionInBlock = threadIdx.x +
blockDim.x * threadIdx.y +
blockDim.x * blockDim.y * threadIdx.z;
int blockPosInGrid = blockIdx.x +
gridDim.x * blockIdx.y +
gridDim.x * gridDim.y * blockIdx.z;
//int idx = threadPositionInBlock * blocksPerGrid + blockPosInGrid;
//int idx = blockPosInGrid * threadsPerBlock + threadPositionInBlock;
//int idx = blockIdx.y * gridDim.x + blockIdx.x;
int h = (blockIdx.x * blockDim.y) + threadIdx.y;
int idx = (blockIdx.y * gridDim.x * blockDim.y) + h;
rgb[idx] = v3_zero();
struct ColorPick pick;
struct DeviceRands rng;
rng.idx = 0;
//int rndOffset = threadPositionInBlock * RAND_COUNT_THREAD;
int rndOffset = threadIdx.x * RAND_COUNT_THREAD;
//int rndOffset = (idx + threadIdx.x * RAND_COUNT_THREAD) % (RANDS_COUNT - RAND_COUNT_THREAD);
rng.nums = dRands + rndOffset;
const float u = ((float)h + rng.nums[rng.idx++]) / WIDTH;
//const float u = ((float)blockIdx.x + rng.nums[rng.idx++]) / WIDTH;
const float v = ((float)HEIGHT - blockIdx.y + rng.nums[rng.idx++]) / HEIGHT;
pick.scatter = Camera_ray(cam, u, v, &rng);
pick.done = false;
pick.depth = 0;
pick.color = v3_one();
while (!pick.done && pick.depth < MAX_DEPTH)
{
color(&pick.scatter, cuda_Spheres, sphereCount, &rng, &pick);
pick.depth++;
}
//samples[threadPositionInBlock] = pick.color;
// Block for all threads and sum up shared values
//__syncthreads();
//if (threadPositionInBlock == 0)
//{
// v3 c = v3_zero();
// for (int i = 0; i < SAMPLES; ++i)
// v3_add(&c, samples[i]);
// rgb[idx] = c;
// //v3_scale(&c, 1.0F / SAMPLES);
// //rgb[idx] = v3_get(sqrtf(c.r), sqrtf(c.g), sqrtf(c.b));
//}
// Say hello to atomics
v3 c = pick.color;
atomicAdd(&rgb[idx].r, c.x);
atomicAdd(&rgb[idx].g, c.g);
atomicAdd(&rgb[idx].b, c.b);
}
void render_frame(struct Sphere* spheres, size_t sphereCount)
{
float scale = 1.0F / FRAMES;
float cx = 8.0F;
float cz = 10.0F;
const float circle = (float)(M_PI * 2.0F);
float angleDelta = circle * scale;
float angle = 0.0F;
struct Sphere* dSpheres;
cudaMalloc(&dSpheres, sizeof(struct Sphere) * sphereCount);
cudaMemcpy(dSpheres, spheres, sizeof(struct Sphere) * sphereCount, cudaMemcpyHostToDevice);
struct Camera* dCamera;
cudaMalloc(&dCamera, sizeof(struct Camera));
struct v3* dCp;
cudaMalloc(&dCp, PIXEL_SIZE);
//cudaMallocManaged(&dCp, PIXEL_SIZE, cudaMemAttachGlobal);
struct v3* cp;
cudaMallocHost(&cp, PIXEL_SIZE);
float* dRands;
cudaMalloc(&dRands, RANDS_SIZE);
curandGenerator_t gen;
curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_MT19937);
dim3 block;
block.x = SAMPLES;
block.y = PIXEL_DIM_SPLIT;
block.z = 1;
dim3 grid;
grid.x = WIDTH / PIXEL_DIM_SPLIT;
grid.y = HEIGHT;
grid.z = 1;
const size_t sharedMemSize = sizeof(v3) * SAMPLES;
const float fov = 20.0F;
const float aspect = (float)WIDTH / (float)HEIGHT;
const v3 lookAt = v3_zero();
const float aperture = 0.1F;
const float focalDist = 10.0F;
uint8_t* data = (uint8_t*)malloc(MEM_LENGTH);
struct Camera* cam = Camera_new(v3_get(cx, 2.0F, cz), lookAt, v3_up(), fov, aspect, aperture, focalDist);
cudaMemcpyToSymbol(cuda_Spheres, spheres, sphereCount * sizeof(struct Sphere), 0, cudaMemcpyHostToDevice);
curandGenerateUniform(gen, dRands, RANDS_COUNT);
for (int32_t f = 0; f < FRAMES; ++f)
{
cam->origin.x = cx * cosf(angle) - cz * sinf(angle);
cam->origin.z = cz * cosf(angle) + cx * sinf(angle);
Camera_set_view(cam, cam->origin, lookAt, v3_up(), fov, aspect, focalDist);
angle += angleDelta;
cudaMemcpy(dCamera, cam, sizeof(struct Camera), cudaMemcpyHostToDevice);
//curandGenerateNormal(gen, dRands, RANDS_COUNT, 0.5F, 0.5F);
//dummy<<<grid, block, sharedMemSize>>>(dCamera, dSpheres, sphereCount, dCp, dRands, WIDTH * HEIGHT);
dummy<<<grid, block>>>(dCamera, dSpheres, sphereCount, dCp, dRands);
cudaDeviceSynchronize();
cudaMemcpy(cp, dCp, PIXEL_SIZE, cudaMemcpyDeviceToHost);
size_t writeIdx = 0;
for (int32_t i = 0; i < WIDTH * HEIGHT; ++i)
{
v3 c = cp[i];
v3_scale(&c, 1.0F / SAMPLES);
c = v3_get(sqrtf(c.r), sqrtf(c.g), sqrtf(c.b));
data[writeIdx++] = fltoui8(c.r);
data[writeIdx++] = fltoui8(c.g);
data[writeIdx++] = fltoui8(c.b);
}
write_image(data, MEM_LENGTH, WIDTH, HEIGHT, f);
}
curandDestroyGenerator(gen);
cudaFree(dSpheres);
cudaFree(dCamera);
cudaFree(dCp);
cudaFree(dRands);
cudaFreeHost(cp);
Camera_free(cam);
}
int main(void)
{
size_t sphereCount = 0;
struct Sphere* spheres = get_spheres(SPHERE_COUNT, &sphereCount);
render_frame(spheres, sphereCount);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment