speedwago/multi_thead_simple_raytracer.cpp

## multi_thead_simple_raytracer.cpp
#include <memory>
#include <stdio.h>
#include <math.h>
#include <vector>
#include <algorithm>
#include <chrono>
#include <thread>
#include <future>
// some constant
static const int img_width = 3840; //4k image
static const int img_height = 2160;
const int max_scene_size = 16;
static const float eps = 0.00001f;

// basic vector math stuff
union Vec3
{
	float data[3];

	struct
	{
		float x, y, z;
	};

	float lenght() const
	{
		return sqrt(x*x + y*y + z*z);
	}

	Vec3 operator/(float f)const
	{
		return Vec3{ x / f, y / f, z / f };
	}

	Vec3 operator*(float f)const
	{
		return Vec3{ x * f, y * f, z * f };
	}

	Vec3 operator+(const Vec3 &other)const
	{
		return Vec3{ x + other.x, y + other.y, z + other.z };
	}

	Vec3 operator-(const Vec3 &other)const
	{
		return Vec3{ x - other.x, y - other.y, z - other.z };
	}

	Vec3 operator-()const
	{
		return Vec3{ -x, -y, -z };
	}

	Vec3 operator*(const Vec3 &other)const
	{
		return Vec3{ x * other.x, y * other.y, z * other.z };
	}

	Vec3 normalize() const
	{
		return *this / lenght();
	}

	float dot(const Vec3 &v) const
	{
		return (x * v.x + y * v.y + z * v.z);
	}

	Vec3 average(const Vec3 &v) const
	{
		return{ (x + v.x) * 0.5f, (y + v.y) * 0.5f, (z + v.z) * 0.5f };
	}


	Vec3 cross(const Vec3 &v)const
	{
		//xyzzy
		Vec3 res;
		res.x = (y * v.z) - (z * v.y);
		res.y = (z * v.x) - (x * v.z);
		res.z = (x * v.y) - (y * v.x);

		return res;
	}

	// todo this really make sense only for color
	Vec3 saturate()const
	{
		float sum = x + y + z;
		float excess = sum - 3.0f;
		Vec3 res = { x, y, z };
		if (excess > 0.0f)
		{
			res.x = x + excess * (x / sum);
			res.y = y + excess * (y / sum);
			res.z = z + excess * (z / sum);
		}

		if (res.x > 1.0) res.x = 1.0f;
		if (res.y > 1.0) res.y = 1.0f;
		if (res.z > 1.0) res.z = 1.0f;

		return res;
	}

};

struct Ray
{
	Vec3 origin;
	Vec3 direction;
};

struct Camera
{
	Vec3 position;
	Vec3 direction;
	Vec3 right;
	Vec3 up;
};

// a color is similar to a vec3
using Color = Vec3;

template <int SIZE>
struct Light
{
	Ray light_world[SIZE];
	Color color[SIZE];
	int current_size;

	Light() : current_size{ 0 }{}

	void add_light(const Ray& pos, const Color& col)
	{
		if (current_size < SIZE)
		{
			light_world[current_size] = pos;
			color[current_size] = col;
			++current_size;
		}
	}
};


union ColorBMP
{
	unsigned char  data[3];
	struct
	{
		unsigned char r, g, b;
	};

};


//data oriented structure
template <int SIZE>
struct Objects
{
	enum object_types
	{
		SPHERE,
		PLANE,
	};
	Vec3 center[SIZE];
	Color color[SIZE];
	float radius[SIZE];
	object_types types[SIZE];
	int current_size ;
	Objects()
		:current_size{0}{}

	void add_sphere(const Vec3 &cen, const Color &col, float rad)
	{
		if (current_size < SIZE)
		{
			center[current_size] = cen;
			color[current_size] = col;
			radius[current_size] = rad;
			types[current_size] = SPHERE;
			++current_size;
		}
	}

	void add_Plane(const Vec3 &normal, const Color &col, float distance)
	{
		if (current_size < SIZE)
		{
			center[current_size] = normal;
			color[current_size] = col;
			radius[current_size] = distance;
			types[current_size] = PLANE;
			++current_size;
		}
	}

	Color GetColor(int idx) const
	{
		return color[idx];
	}

	Vec3 GetNormal(const Vec3 &vec, int idx) const
	{
		if (types[idx] == SPHERE)
		{
			return (vec - center[idx]).normalize();
		}
		else if(types[idx] == PLANE)
		{
			return center[idx];
		}
		return {.0f,.0f,.0f};
	}

	float ray_intersection(const Ray & ray, int idx) const
	{
		if (types[idx] == SPHERE)
		{
			Vec3 m(ray.origin - center[idx]);
			float b = (m).dot(ray.direction);
			float c = m.dot(m) - radius[idx] * radius[idx];
			if (c > 0.0f &&  b > 0.0f)
			{
				return -1.0;
			}
			float discr = b*b - c;
			if (discr < 0.0)
			{
				return -1.0;
			}
			float t = -b - sqrt(discr);
			if (t < 0.0f)
			{
				return -1.0f;
			}
			return t;
		}
		else if (types[idx] == PLANE)
		{
			float t = (radius[idx] - center[idx].dot(ray.origin)) / center[idx].dot(ray.direction);
			if (t >= 0.0)
			{
				return t;
			}
			return -1.0;
		}

		return 0.0f;
	}

};

//bmp stuff
unsigned char header[54];
void InitHeader(int w, int h)
{

	// todo check paddign and if the way to write these bites is platform dependend.
	unsigned int size = (w * h * 3) + sizeof(header);
	unsigned  int sizedata = (w * h * 3);
	header[0] = 'B';
	header[1] = 'M';
	header[10] = 54;
	unsigned char *pos = &header[2];
	unsigned char *info = &header[14];
	*info = 40;

	//24bpp
	info[12] = 1;
	info[14] = 24;
	info += 4;

	// write the size  header
	pos[0] = (unsigned char)(size);
	pos[1] = (unsigned char)(size >> 8);
	pos[2] = (unsigned char)(size >> 16);
	pos[3] = (unsigned char)(size >> 24);
	pos += sizeof(int);
	// write the h and h in the info  header
	info[0] = (unsigned char)(w);
	info[1] = (unsigned char)(w >> 8);
	info[2] = (unsigned char)(w >> 16);
	info[3] = (unsigned char)(w >> 24);

	info += sizeof(int);
	info[0] = (unsigned char)(h);
	info[1] = (unsigned char)(h >> 8);
	info[2] = (unsigned char)(h >> 16);
	info[3] = (unsigned char)(h >> 24);

	info += sizeof(int);
	info += 8;

	info[0] = (unsigned char)(sizedata);
	info[1] = (unsigned char)(sizedata >> 8);
	info[2] = (unsigned char)(sizedata >> 16);
	info[3] = (unsigned char)(sizedata >> 24);
	info += sizeof(int);
	unsigned int ppm = 72;

	info[0] = (unsigned char)(ppm);
	info[1] = (unsigned char)(ppm >> 8);
	info[2] = (unsigned char)(ppm >> 16);
	info[3] = (unsigned char)(ppm >> 24);

	info += sizeof(ppm);
	info[0] = (unsigned char)(ppm);
	info[1] = (unsigned char)(ppm >> 8);
	info[2] = (unsigned char)(ppm >> 16);
	info[3] = (unsigned char)(ppm >> 24);
	info += sizeof(ppm);
}

void writebmp(const char * filename, ColorBMP *data, int w, int h)
{
	FILE *f = fopen(filename, "wb");
	InitHeader(w, h);
	fwrite(header, sizeof(header), 1, f);
	fwrite(data, w*h*sizeof(ColorBMP), 1, f);
	fclose(f);
}

Color compute_color(const Vec3 &intersection, int closest, const Light<max_scene_size> &lights, const Objects<max_scene_size> &objects, std::vector<std::pair<float, int>> &intersections, float ambientfactor)
{

	Vec3 normal = objects.GetNormal(intersection,closest);
	Color color = objects.GetColor(closest);
	intersections.clear();
	Color finalcolor = color * ambientfactor;
	for ( int i = 0; i < lights.current_size; ++i)
	{
		Vec3 light_dir = (lights.light_world[i].origin - intersection).normalize();
		float dot = light_dir.dot(normal);
		if (dot > 0.0f)
		{
			bool shadow = false;
			Vec3 ligth_distance = (lights.light_world[i].origin - intersection);
			float distance = ligth_distance.lenght();

			//fire a ray from intersection position to the light to check to check if the object is shadowed
			Ray shadow_ray = { intersection, (lights.light_world[i].origin - intersection).normalize() };

			for ( int j = 0; j < objects.current_size; ++j)
			{
				intersections.push_back(std::pair<float, int>(objects.ray_intersection(shadow_ray,j), j));
			}

			for (unsigned int k = 0; k < intersections.size(); ++k)
			{
				if (intersections[k].first> eps && intersections[k].first <= distance)
				{
					shadow = true;
					break;
				}
			}
			if (!shadow)
			{
				finalcolor = finalcolor + ((color * lights.color[i]) * dot);
			}
		}

	}

	return finalcolor.saturate();
}

// do the actual job
void process_image(int w, int h)
{
	ColorBMP* data = new ColorBMP[w* h];
	memset(data, 0, w* h * sizeof(ColorBMP));
	Objects<max_scene_size> object_container;
	Light<max_scene_size> light_container;
	Vec3 pos = { 6.0f, 1.5f, -3.0f };
	Vec3 look = { .0f, .0f, .0f };
	Vec3 Y = { .0f, 1.0f, .0f };
	Vec3 camdir = (look - pos).normalize();
	Vec3 camright = Y.cross(camdir).normalize();
	Vec3 camup = camright.cross(camdir).normalize();
	//we have a camera
	Camera camera = { pos, camdir, camright, camup };

	//add a  light
	light_container.add_light({ { -7.0f, 10.0f, -10.0f }, { 0.0f, 0.0f, 0.0f } }, { 1.0f, 1.0f, 1.0f });
	//add a couple of sphered and a plane
	object_container.add_sphere({ 0.0f, 0.0f, 0.0f }, { 0.0f, 1.0f, 0.0f }, 1.0f);
	object_container.add_sphere({ 3.0f, -0.5f, 0.0f }, { 1.0f, 1.0f, 0.0f }, 0.5f);
	object_container.add_Plane(Y, { 0.0f, 0.0f, 1.0f }, { -1.0f });
	std::chrono::time_point<std::chrono::system_clock> start, end;

	start = std::chrono::system_clock::now();

	float aspect = (static_cast<float>(w) / static_cast<float>(h));
	int  numthread = std::max<int>(2, std::thread::hardware_concurrency());
	// must be even
	numthread % 2 == 0 ? numthread : ++numthread;
	std::vector< std::future<void> > futures;
	futures.reserve(numthread);

	// split w and h
	int splitw = w / (numthread/2);
	int splith =  h/2;
	int start_x = 0;
	int start_y = 0;
	int end_x = splitw;
	int end_y = splith;
	for (int thread_idx = 0; thread_idx < numthread; ++thread_idx)
	{
		//spawn numthread threads with labmdas
		std::future<void> threads = std::async(std::launch::async, [=, &light_container, &object_container, &data](){
			std::vector<std::pair<float, int>>intersections;
			intersections.reserve(1024);
			float xinc, yincr;


			for (int i = start_x; i < end_x; ++i)
			{
				for (int j = start_y; j < end_y; ++j)
				{

					int pixelpos = i + j * w;

					// decide how to offset the camera ray according to the screen ratio

					if (aspect > 1.0)
					{
						xinc = ((i + 0.5f) / w) * aspect - (((w - h) / static_cast<float>(h)) *0.5f);
						yincr = ((h - j) + 0.5f) / h;
					}
					else if (aspect < 1.0)
					{
						xinc = (i + 0.5f) / w;
						yincr = ((h - j + 0.5f) / h) / aspect - (((h - w) / static_cast<float>(w)) *0.5f);
					}
					else
					{
						xinc = (i + 0.5f) / w;
						yincr = ((h - j + 0.5f) / h);
					}

					// starts to fire rays
					Vec3 camera_pos = camera.position;
					Vec3 camera_dir = (camera.direction + (camera.right * (xinc - 0.5f)) + (camup *(yincr - 0.5f))).normalize();
					Ray camera_ray = { camera_pos, camera_dir };

					// store the intersection between the camera and all the objects in scene.
					for ( int k = 0; k < object_container.current_size; ++k)
					{
						float intersection = object_container.ray_intersection(camera_ray,k);

						if (intersection > 0.0f)
							intersections.push_back(std::pair<float, int>(intersection, k));
					}

					// find the nearest
					std::sort(intersections.begin(), intersections.end(), [](std::pair<float, int> a, std::pair<float, int> b){return b.first > a.first; });

					if (!intersections.empty())
					{
						// get the intersection point
						Vec3 intersection = camera_pos + (camera_dir * intersections[0].first);
						Color color = compute_color(intersection, intersections[0].second, light_container, object_container, intersections, 0.2f);
						data[pixelpos].r = (unsigned char)(color.x * 255);
						data[pixelpos].g = (unsigned char)(color.y * 255);
						data[pixelpos].b = (unsigned char)(color.z * 255);

					}

					intersections.clear();

				}
			}
		});

		futures.push_back(std::move(threads));
		start_x = start_x + splitw;
		end_x = end_x + splitw;
		if (end_x > w)
		{
			start_x = 0;
			end_x = splitw;
			start_y = start_y + splith;
			end_y = start_y + splith;
		}

	}

	// wait till all the threads are done and wait 2 ms
	while (!futures.empty())
	{
		if (futures.back().wait_for(std::chrono::milliseconds(2)) == std::future_status::ready)
		{
			futures.pop_back();
		}

	}

	end = std::chrono::system_clock::now();
	std::chrono::duration<double> elapsed_seconds = end - start;
	printf("rendering took %fs\n", elapsed_seconds);
	writebmp("test.bmp", data, img_width, img_height);
	delete[] data;

	system("pause");
}

void draw_scene()
{
	process_image(img_width, img_height);

}

int main()
{
	draw_scene();
	return 0;
}
	#include <memory>
	#include <stdio.h>
	#include <math.h>
	#include <vector>
	#include <algorithm>
	#include <chrono>
	#include <thread>
	#include <future>
	// some constant
	static const int img_width = 3840; //4k image
	static const int img_height = 2160;
	const int max_scene_size = 16;
	static const float eps = 0.00001f;

	// basic vector math stuff
	union Vec3
	{
	float data[3];

	struct
	{
	float x, y, z;
	};

	float lenght() const
	{
	return sqrt(xx + yy + z*z);
	}

	Vec3 operator/(float f)const
	{
	return Vec3{ x / f, y / f, z / f };
	}

	Vec3 operator*(float f)const
	{
	return Vec3{ x * f, y * f, z * f };
	}

	Vec3 operator+(const Vec3 &other)const
	{
	return Vec3{ x + other.x, y + other.y, z + other.z };
	}

	Vec3 operator-(const Vec3 &other)const
	{
	return Vec3{ x - other.x, y - other.y, z - other.z };
	}

	Vec3 operator-()const
	{
	return Vec3{ -x, -y, -z };
	}

	Vec3 operator*(const Vec3 &other)const
	{
	return Vec3{ x * other.x, y * other.y, z * other.z };
	}

	Vec3 normalize() const
	{
	return *this / lenght();
	}

	float dot(const Vec3 &v) const
	{
	return (x * v.x + y * v.y + z * v.z);
	}

	Vec3 average(const Vec3 &v) const
	{
	return{ (x + v.x) * 0.5f, (y + v.y) * 0.5f, (z + v.z) * 0.5f };
	}


	Vec3 cross(const Vec3 &v)const
	{
	//xyzzy
	Vec3 res;
	res.x = (y * v.z) - (z * v.y);
	res.y = (z * v.x) - (x * v.z);
	res.z = (x * v.y) - (y * v.x);

	return res;
	}

	// todo this really make sense only for color
	Vec3 saturate()const
	{
	float sum = x + y + z;
	float excess = sum - 3.0f;
	Vec3 res = { x, y, z };
	if (excess > 0.0f)
	{
	res.x = x + excess * (x / sum);
	res.y = y + excess * (y / sum);
	res.z = z + excess * (z / sum);
	}

	if (res.x > 1.0) res.x = 1.0f;
	if (res.y > 1.0) res.y = 1.0f;
	if (res.z > 1.0) res.z = 1.0f;

	return res;
	}

	};

	struct Ray
	{
	Vec3 origin;
	Vec3 direction;
	};

	struct Camera
	{
	Vec3 position;
	Vec3 direction;
	Vec3 right;
	Vec3 up;
	};

	// a color is similar to a vec3
	using Color = Vec3;

	template <int SIZE>
	struct Light
	{
	Ray light_world[SIZE];
	Color color[SIZE];
	int current_size;

	Light() : current_size{ 0 }{}

	void add_light(const Ray& pos, const Color& col)
	{
	if (current_size < SIZE)
	{
	light_world[current_size] = pos;
	color[current_size] = col;
	++current_size;
	}
	}
	};


	union ColorBMP
	{
	unsigned char data[3];
	struct
	{
	unsigned char r, g, b;
	};

	};


	//data oriented structure
	template <int SIZE>
	struct Objects
	{
	enum object_types
	{
	SPHERE,
	PLANE,
	};
	Vec3 center[SIZE];
	Color color[SIZE];
	float radius[SIZE];
	object_types types[SIZE];
	int current_size ;
	Objects()
	:current_size{0}{}

	void add_sphere(const Vec3 &cen, const Color &col, float rad)
	{
	if (current_size < SIZE)
	{
	center[current_size] = cen;
	color[current_size] = col;
	radius[current_size] = rad;
	types[current_size] = SPHERE;
	++current_size;
	}
	}

	void add_Plane(const Vec3 &normal, const Color &col, float distance)
	{
	if (current_size < SIZE)
	{
	center[current_size] = normal;
	color[current_size] = col;
	radius[current_size] = distance;
	types[current_size] = PLANE;
	++current_size;
	}
	}

	Color GetColor(int idx) const
	{
	return color[idx];
	}

	Vec3 GetNormal(const Vec3 &vec, int idx) const
	{
	if (types[idx] == SPHERE)
	{
	return (vec - center[idx]).normalize();
	}
	else if(types[idx] == PLANE)
	{
	return center[idx];
	}
	return {.0f,.0f,.0f};
	}

	float ray_intersection(const Ray & ray, int idx) const
	{
	if (types[idx] == SPHERE)
	{
	Vec3 m(ray.origin - center[idx]);
	float b = (m).dot(ray.direction);
	float c = m.dot(m) - radius[idx] * radius[idx];
	if (c > 0.0f && b > 0.0f)
	{
	return -1.0;
	}
	float discr = b*b - c;
	if (discr < 0.0)
	{
	return -1.0;
	}
	float t = -b - sqrt(discr);
	if (t < 0.0f)
	{
	return -1.0f;
	}
	return t;
	}
	else if (types[idx] == PLANE)
	{
	float t = (radius[idx] - center[idx].dot(ray.origin)) / center[idx].dot(ray.direction);
	if (t >= 0.0)
	{
	return t;
	}
	return -1.0;
	}

	return 0.0f;
	}

	};

	//bmp stuff
	unsigned char header[54];
	void InitHeader(int w, int h)
	{

	// todo check paddign and if the way to write these bites is platform dependend.
	unsigned int size = (w * h * 3) + sizeof(header);
	unsigned int sizedata = (w * h * 3);
	header[0] = 'B';
	header[1] = 'M';
	header[10] = 54;
	unsigned char *pos = &header[2];
	unsigned char *info = &header[14];
	*info = 40;

	//24bpp
	info[12] = 1;
	info[14] = 24;
	info += 4;

	// write the size header
	pos[0] = (unsigned char)(size);
	pos[1] = (unsigned char)(size >> 8);
	pos[2] = (unsigned char)(size >> 16);
	pos[3] = (unsigned char)(size >> 24);
	pos += sizeof(int);
	// write the h and h in the info header
	info[0] = (unsigned char)(w);
	info[1] = (unsigned char)(w >> 8);
	info[2] = (unsigned char)(w >> 16);
	info[3] = (unsigned char)(w >> 24);

	info += sizeof(int);
	info[0] = (unsigned char)(h);
	info[1] = (unsigned char)(h >> 8);
	info[2] = (unsigned char)(h >> 16);
	info[3] = (unsigned char)(h >> 24);

	info += sizeof(int);
	info += 8;

	info[0] = (unsigned char)(sizedata);
	info[1] = (unsigned char)(sizedata >> 8);
	info[2] = (unsigned char)(sizedata >> 16);
	info[3] = (unsigned char)(sizedata >> 24);
	info += sizeof(int);
	unsigned int ppm = 72;

	info[0] = (unsigned char)(ppm);
	info[1] = (unsigned char)(ppm >> 8);
	info[2] = (unsigned char)(ppm >> 16);
	info[3] = (unsigned char)(ppm >> 24);

	info += sizeof(ppm);
	info[0] = (unsigned char)(ppm);
	info[1] = (unsigned char)(ppm >> 8);
	info[2] = (unsigned char)(ppm >> 16);
	info[3] = (unsigned char)(ppm >> 24);
	info += sizeof(ppm);
	}

	void writebmp(const char * filename, ColorBMP *data, int w, int h)
	{
	FILE *f = fopen(filename, "wb");
	InitHeader(w, h);
	fwrite(header, sizeof(header), 1, f);
	fwrite(data, whsizeof(ColorBMP), 1, f);
	fclose(f);
	}

	Color compute_color(const Vec3 &intersection, int closest, const Light<max_scene_size> &lights, const Objects<max_scene_size> &objects, std::vector<std::pair<float, int>> &intersections, float ambientfactor)
	{

	Vec3 normal = objects.GetNormal(intersection,closest);
	Color color = objects.GetColor(closest);
	intersections.clear();
	Color finalcolor = color * ambientfactor;
	for ( int i = 0; i < lights.current_size; ++i)
	{
	Vec3 light_dir = (lights.light_world[i].origin - intersection).normalize();
	float dot = light_dir.dot(normal);
	if (dot > 0.0f)
	{
	bool shadow = false;
	Vec3 ligth_distance = (lights.light_world[i].origin - intersection);
	float distance = ligth_distance.lenght();

	//fire a ray from intersection position to the light to check to check if the object is shadowed
	Ray shadow_ray = { intersection, (lights.light_world[i].origin - intersection).normalize() };

	for ( int j = 0; j < objects.current_size; ++j)
	{
	intersections.push_back(std::pair<float, int>(objects.ray_intersection(shadow_ray,j), j));
	}

	for (unsigned int k = 0; k < intersections.size(); ++k)
	{
	if (intersections[k].first> eps && intersections[k].first <= distance)
	{
	shadow = true;
	break;
	}
	}
	if (!shadow)
	{
	finalcolor = finalcolor + ((color * lights.color[i]) * dot);
	}
	}

	}

	return finalcolor.saturate();
	}

	// do the actual job
	void process_image(int w, int h)
	{
	ColorBMP* data = new ColorBMP[w* h];
	memset(data, 0, w* h * sizeof(ColorBMP));
	Objects<max_scene_size> object_container;
	Light<max_scene_size> light_container;
	Vec3 pos = { 6.0f, 1.5f, -3.0f };
	Vec3 look = { .0f, .0f, .0f };
	Vec3 Y = { .0f, 1.0f, .0f };
	Vec3 camdir = (look - pos).normalize();
	Vec3 camright = Y.cross(camdir).normalize();
	Vec3 camup = camright.cross(camdir).normalize();
	//we have a camera
	Camera camera = { pos, camdir, camright, camup };

	//add a light
	light_container.add_light({ { -7.0f, 10.0f, -10.0f }, { 0.0f, 0.0f, 0.0f } }, { 1.0f, 1.0f, 1.0f });
	//add a couple of sphered and a plane
	object_container.add_sphere({ 0.0f, 0.0f, 0.0f }, { 0.0f, 1.0f, 0.0f }, 1.0f);
	object_container.add_sphere({ 3.0f, -0.5f, 0.0f }, { 1.0f, 1.0f, 0.0f }, 0.5f);
	object_container.add_Plane(Y, { 0.0f, 0.0f, 1.0f }, { -1.0f });
	std::chrono::time_point<std::chrono::system_clock> start, end;

	start = std::chrono::system_clock::now();

	float aspect = (static_cast<float>(w) / static_cast<float>(h));
	int numthread = std::max<int>(2, std::thread::hardware_concurrency());
	// must be even
	numthread % 2 == 0 ? numthread : ++numthread;
	std::vector< std::future<void> > futures;
	futures.reserve(numthread);

	// split w and h
	int splitw = w / (numthread/2);
	int splith = h/2;
	int start_x = 0;
	int start_y = 0;
	int end_x = splitw;
	int end_y = splith;
	for (int thread_idx = 0; thread_idx < numthread; ++thread_idx)
	{
	//spawn numthread threads with labmdas
	std::future<void> threads = std::async(std::launch::async, [=, &light_container, &object_container, &data](){
	std::vector<std::pair<float, int>>intersections;
	intersections.reserve(1024);
	float xinc, yincr;


	for (int i = start_x; i < end_x; ++i)
	{
	for (int j = start_y; j < end_y; ++j)
	{

	int pixelpos = i + j * w;

	// decide how to offset the camera ray according to the screen ratio

	if (aspect > 1.0)
	{
	xinc = ((i + 0.5f) / w) * aspect - (((w - h) / static_cast<float>(h)) *0.5f);
	yincr = ((h - j) + 0.5f) / h;
	}
	else if (aspect < 1.0)
	{
	xinc = (i + 0.5f) / w;
	yincr = ((h - j + 0.5f) / h) / aspect - (((h - w) / static_cast<float>(w)) *0.5f);
	}
	else
	{
	xinc = (i + 0.5f) / w;
	yincr = ((h - j + 0.5f) / h);
	}

	// starts to fire rays
	Vec3 camera_pos = camera.position;
	Vec3 camera_dir = (camera.direction + (camera.right * (xinc - 0.5f)) + (camup *(yincr - 0.5f))).normalize();
	Ray camera_ray = { camera_pos, camera_dir };

	// store the intersection between the camera and all the objects in scene.
	for ( int k = 0; k < object_container.current_size; ++k)
	{
	float intersection = object_container.ray_intersection(camera_ray,k);

	if (intersection > 0.0f)
	intersections.push_back(std::pair<float, int>(intersection, k));
	}

	// find the nearest
	std::sort(intersections.begin(), intersections.end(), [](std::pair<float, int> a, std::pair<float, int> b){return b.first > a.first; });

	if (!intersections.empty())
	{
	// get the intersection point
	Vec3 intersection = camera_pos + (camera_dir * intersections[0].first);
	Color color = compute_color(intersection, intersections[0].second, light_container, object_container, intersections, 0.2f);
	data[pixelpos].r = (unsigned char)(color.x * 255);
	data[pixelpos].g = (unsigned char)(color.y * 255);
	data[pixelpos].b = (unsigned char)(color.z * 255);

	}

	intersections.clear();

	}
	}
	});

	futures.push_back(std::move(threads));
	start_x = start_x + splitw;
	end_x = end_x + splitw;
	if (end_x > w)
	{
	start_x = 0;
	end_x = splitw;
	start_y = start_y + splith;
	end_y = start_y + splith;
	}

	}

	// wait till all the threads are done and wait 2 ms
	while (!futures.empty())
	{
	if (futures.back().wait_for(std::chrono::milliseconds(2)) == std::future_status::ready)
	{
	futures.pop_back();
	}

	}

	end = std::chrono::system_clock::now();
	std::chrono::duration<double> elapsed_seconds = end - start;
	printf("rendering took %fs\n", elapsed_seconds);
	writebmp("test.bmp", data, img_width, img_height);
	delete[] data;

	system("pause");
	}

	void draw_scene()
	{
	process_image(img_width, img_height);

	}

	int main()
	{
	draw_scene();
	return 0;
	}