simple raytracer in one file, multi thread version
#include <memory>
#include <stdio.h>
#include <math.h>
#include <vector>
#include <algorithm>
#include <chrono>
#include <thread>
#include <future>
// some constant
static const int img_width = 3840; //4k image
static const int img_height = 2160;
const int max_scene_size = 16;
static const float eps = 0.00001f;
// basic vector math stuff
union Vec3
float data[3];
float x, y, z;
float lenght() const
return sqrt(x*x + y*y + z*z);
Vec3 operator/(float f)const
return Vec3{ x / f, y / f, z / f };
Vec3 operator*(float f)const
return Vec3{ x * f, y * f, z * f };
Vec3 operator+(const Vec3 &other)const
return Vec3{ x + other.x, y + other.y, z + other.z };
Vec3 operator-(const Vec3 &other)const
return Vec3{ x - other.x, y - other.y, z - other.z };
Vec3 operator-()const
return Vec3{ -x, -y, -z };
Vec3 operator*(const Vec3 &other)const
return Vec3{ x * other.x, y * other.y, z * other.z };
Vec3 normalize() const
return *this / lenght();
float dot(const Vec3 &v) const
return (x * v.x + y * v.y + z * v.z);
Vec3 average(const Vec3 &v) const
return{ (x + v.x) * 0.5f, (y + v.y) * 0.5f, (z + v.z) * 0.5f };
Vec3 cross(const Vec3 &v)const
Vec3 res;
res.x = (y * v.z) - (z * v.y);
res.y = (z * v.x) - (x * v.z);
res.z = (x * v.y) - (y * v.x);
return res;
// todo this really make sense only for color
Vec3 saturate()const
float sum = x + y + z;
float excess = sum - 3.0f;
Vec3 res = { x, y, z };
if (excess > 0.0f)
res.x = x + excess * (x / sum);
res.y = y + excess * (y / sum);
res.z = z + excess * (z / sum);
if (res.x > 1.0) res.x = 1.0f;
if (res.y > 1.0) res.y = 1.0f;
if (res.z > 1.0) res.z = 1.0f;
return res;
struct Ray
Vec3 origin;
Vec3 direction;
struct Camera
Vec3 position;
Vec3 direction;
Vec3 right;
Vec3 up;
// a color is similar to a vec3
using Color = Vec3;
template <int SIZE>
struct Light
Ray light_world[SIZE];
Color color[SIZE];
int current_size;
Light() : current_size{ 0 }{}
void add_light(const Ray& pos, const Color& col)
if (current_size < SIZE)
light_world[current_size] = pos;
color[current_size] = col;
union ColorBMP
unsigned char data[3];
unsigned char r, g, b;
//data oriented structure
template <int SIZE>
struct Objects
enum object_types
Vec3 center[SIZE];
Color color[SIZE];
float radius[SIZE];
object_types types[SIZE];
int current_size ;
void add_sphere(const Vec3 &cen, const Color &col, float rad)
if (current_size < SIZE)
center[current_size] = cen;
color[current_size] = col;
radius[current_size] = rad;
types[current_size] = SPHERE;
void add_Plane(const Vec3 &normal, const Color &col, float distance)
if (current_size < SIZE)
center[current_size] = normal;
color[current_size] = col;
radius[current_size] = distance;
types[current_size] = PLANE;
Color GetColor(int idx) const
return color[idx];
Vec3 GetNormal(const Vec3 &vec, int idx) const
if (types[idx] == SPHERE)
return (vec - center[idx]).normalize();
else if(types[idx] == PLANE)
return center[idx];
return {.0f,.0f,.0f};
float ray_intersection(const Ray & ray, int idx) const
if (types[idx] == SPHERE)
Vec3 m(ray.origin - center[idx]);
float b = (m).dot(ray.direction);
float c = - radius[idx] * radius[idx];
if (c > 0.0f && b > 0.0f)
return -1.0;
float discr = b*b - c;
if (discr < 0.0)
return -1.0;
float t = -b - sqrt(discr);
if (t < 0.0f)
return -1.0f;
return t;
else if (types[idx] == PLANE)
float t = (radius[idx] - center[idx].dot(ray.origin)) / center[idx].dot(ray.direction);
if (t >= 0.0)
return t;
return -1.0;
return 0.0f;
//bmp stuff
unsigned char header[54];
void InitHeader(int w, int h)
// todo check paddign and if the way to write these bites is platform dependend.
unsigned int size = (w * h * 3) + sizeof(header);
unsigned int sizedata = (w * h * 3);
header[0] = 'B';
header[1] = 'M';
header[10] = 54;
unsigned char *pos = &header[2];
unsigned char *info = &header[14];
*info = 40;
info[12] = 1;
info[14] = 24;
info += 4;
// write the size header
pos[0] = (unsigned char)(size);
pos[1] = (unsigned char)(size >> 8);
pos[2] = (unsigned char)(size >> 16);
pos[3] = (unsigned char)(size >> 24);
pos += sizeof(int);
// write the h and h in the info header
info[0] = (unsigned char)(w);
info[1] = (unsigned char)(w >> 8);
info[2] = (unsigned char)(w >> 16);
info[3] = (unsigned char)(w >> 24);
info += sizeof(int);
info[0] = (unsigned char)(h);
info[1] = (unsigned char)(h >> 8);
info[2] = (unsigned char)(h >> 16);
info[3] = (unsigned char)(h >> 24);
info += sizeof(int);
info += 8;
info[0] = (unsigned char)(sizedata);
info[1] = (unsigned char)(sizedata >> 8);
info[2] = (unsigned char)(sizedata >> 16);
info[3] = (unsigned char)(sizedata >> 24);
info += sizeof(int);
unsigned int ppm = 72;
info[0] = (unsigned char)(ppm);
info[1] = (unsigned char)(ppm >> 8);
info[2] = (unsigned char)(ppm >> 16);
info[3] = (unsigned char)(ppm >> 24);
info += sizeof(ppm);
info[0] = (unsigned char)(ppm);
info[1] = (unsigned char)(ppm >> 8);
info[2] = (unsigned char)(ppm >> 16);
info[3] = (unsigned char)(ppm >> 24);
info += sizeof(ppm);
void writebmp(const char * filename, ColorBMP *data, int w, int h)
FILE *f = fopen(filename, "wb");
InitHeader(w, h);
fwrite(header, sizeof(header), 1, f);
fwrite(data, w*h*sizeof(ColorBMP), 1, f);
Color compute_color(const Vec3 &intersection, int closest, const Light<max_scene_size> &lights, const Objects<max_scene_size> &objects, std::vector<std::pair<float, int>> &intersections, float ambientfactor)
Vec3 normal = objects.GetNormal(intersection,closest);
Color color = objects.GetColor(closest);
Color finalcolor = color * ambientfactor;
for ( int i = 0; i < lights.current_size; ++i)
Vec3 light_dir = (lights.light_world[i].origin - intersection).normalize();
float dot =;
if (dot > 0.0f)
bool shadow = false;
Vec3 ligth_distance = (lights.light_world[i].origin - intersection);
float distance = ligth_distance.lenght();
//fire a ray from intersection position to the light to check to check if the object is shadowed
Ray shadow_ray = { intersection, (lights.light_world[i].origin - intersection).normalize() };
for ( int j = 0; j < objects.current_size; ++j)
intersections.push_back(std::pair<float, int>(objects.ray_intersection(shadow_ray,j), j));
for (unsigned int k = 0; k < intersections.size(); ++k)
if (intersections[k].first> eps && intersections[k].first <= distance)
shadow = true;
if (!shadow)
finalcolor = finalcolor + ((color * lights.color[i]) * dot);
return finalcolor.saturate();
// do the actual job
void process_image(int w, int h)
ColorBMP* data = new ColorBMP[w* h];
memset(data, 0, w* h * sizeof(ColorBMP));
Objects<max_scene_size> object_container;
Light<max_scene_size> light_container;
Vec3 pos = { 6.0f, 1.5f, -3.0f };
Vec3 look = { .0f, .0f, .0f };
Vec3 Y = { .0f, 1.0f, .0f };
Vec3 camdir = (look - pos).normalize();
Vec3 camright = Y.cross(camdir).normalize();
Vec3 camup = camright.cross(camdir).normalize();
//we have a camera
Camera camera = { pos, camdir, camright, camup };
//add a light
light_container.add_light({ { -7.0f, 10.0f, -10.0f }, { 0.0f, 0.0f, 0.0f } }, { 1.0f, 1.0f, 1.0f });
//add a couple of sphered and a plane
object_container.add_sphere({ 0.0f, 0.0f, 0.0f }, { 0.0f, 1.0f, 0.0f }, 1.0f);
object_container.add_sphere({ 3.0f, -0.5f, 0.0f }, { 1.0f, 1.0f, 0.0f }, 0.5f);
object_container.add_Plane(Y, { 0.0f, 0.0f, 1.0f }, { -1.0f });
std::chrono::time_point<std::chrono::system_clock> start, end;
start = std::chrono::system_clock::now();
float aspect = (static_cast<float>(w) / static_cast<float>(h));
int numthread = std::max<int>(2, std::thread::hardware_concurrency());
// must be even
numthread % 2 == 0 ? numthread : ++numthread;
std::vector< std::future<void> > futures;
// split w and h
int splitw = w / (numthread/2);
int splith = h/2;
int start_x = 0;
int start_y = 0;
int end_x = splitw;
int end_y = splith;
for (int thread_idx = 0; thread_idx < numthread; ++thread_idx)
//spawn numthread threads with labmdas
std::future<void> threads = std::async(std::launch::async, [=, &light_container, &object_container, &data](){
std::vector<std::pair<float, int>>intersections;
float xinc, yincr;
for (int i = start_x; i < end_x; ++i)
for (int j = start_y; j < end_y; ++j)
int pixelpos = i + j * w;
// decide how to offset the camera ray according to the screen ratio
if (aspect > 1.0)
xinc = ((i + 0.5f) / w) * aspect - (((w - h) / static_cast<float>(h)) *0.5f);
yincr = ((h - j) + 0.5f) / h;
else if (aspect < 1.0)
xinc = (i + 0.5f) / w;
yincr = ((h - j + 0.5f) / h) / aspect - (((h - w) / static_cast<float>(w)) *0.5f);
xinc = (i + 0.5f) / w;
yincr = ((h - j + 0.5f) / h);
// starts to fire rays
Vec3 camera_pos = camera.position;
Vec3 camera_dir = (camera.direction + (camera.right * (xinc - 0.5f)) + (camup *(yincr - 0.5f))).normalize();
Ray camera_ray = { camera_pos, camera_dir };
// store the intersection between the camera and all the objects in scene.
for ( int k = 0; k < object_container.current_size; ++k)
float intersection = object_container.ray_intersection(camera_ray,k);
if (intersection > 0.0f)
intersections.push_back(std::pair<float, int>(intersection, k));
// find the nearest
std::sort(intersections.begin(), intersections.end(), [](std::pair<float, int> a, std::pair<float, int> b){return b.first > a.first; });
if (!intersections.empty())
// get the intersection point
Vec3 intersection = camera_pos + (camera_dir * intersections[0].first);
Color color = compute_color(intersection, intersections[0].second, light_container, object_container, intersections, 0.2f);
data[pixelpos].r = (unsigned char)(color.x * 255);
data[pixelpos].g = (unsigned char)(color.y * 255);
data[pixelpos].b = (unsigned char)(color.z * 255);
start_x = start_x + splitw;
end_x = end_x + splitw;
if (end_x > w)
start_x = 0;
end_x = splitw;
start_y = start_y + splith;
end_y = start_y + splith;
// wait till all the threads are done and wait 2 ms
while (!futures.empty())
if (futures.back().wait_for(std::chrono::milliseconds(2)) == std::future_status::ready)
end = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end - start;
printf("rendering took %fs\n", elapsed_seconds);
writebmp("test.bmp", data, img_width, img_height);
delete[] data;
void draw_scene()
process_image(img_width, img_height);
int main()
return 0;
