xor-shift/hello.zig

## hello.zig
const std = @import("std");

fn vsop(v: [3]f32, s: f32, comptime op: anytype) [3]f32 {
    var ret: [3]f32 = undefined;
    for (0.., v) |i, vv| ret[i] = op.aufruf(vv, s);
    return ret;
}

fn vmuls(v: [3]f32, s: f32) [3]f32 {
    return vsop(v, s, struct {
        fn aufruf(a: f32, b: f32) f32 {
            return a * b;
        }
    });
}

fn vdivs(v: [3]f32, s: f32) [3]f32 {
    return vsop(v, s, struct {
        fn aufruf(a: f32, b: f32) f32 {
            return a / b;
        }
    });
}

fn vvop(a: [3]f32, b: [3]f32, comptime op: anytype) [3]f32 {
    var ret: [3]f32 = undefined;
    for (0.., a, b) |i, av, bv| ret[i] = op.aufruf(av, bv);
    return ret;
}

fn vaddv(a: [3]f32, b: [3]f32) [3]f32 {
    return vvop(a, b, struct {
        fn aufruf(av: f32, bv: f32) f32 {
            return av + bv;
        }
    });
}

fn vsubv(a: [3]f32, b: [3]f32) [3]f32 {
    return vvop(a, b, struct {
        fn aufruf(av: f32, bv: f32) f32 {
            return av - bv;
        }
    });
}

fn vmulv(a: [3]f32, b: [3]f32) [3]f32 {
    return vvop(a, b, struct {
        fn aufruf(av: f32, bv: f32) f32 {
            return av * bv;
        }
    });
}

fn vneg(vec: [3]f32) [3]f32 {
    var ret: [3]f32 = undefined;
    for (0.., vec) |i, v| ret[i] = -v;
    return ret;
}

fn vabs(vec: [3]f32) [3]f32 {
    var ret: [3]f32 = undefined;
    for (0.., vec) |i, v| ret[i] = @abs(v);
    return ret;
}

fn ssplat(s: f32) [3]f32 {
    return [_]f32{ s, s, s };
}

fn dot(a: [3]f32, b: [3]f32) f32 {
    var sum: f32 = 0.0;
    for (a, b) |va, vb| sum += va * vb;
    return sum;
}

fn length(vec: [3]f32) f32 {
    return std.math.sqrt(dot(vec, vec));
}

fn normalized(vec: [3]f32) [3]f32 {
    return vdivs(vec, length(vec));
}

const ray = struct {
    origin: [3]f32,
    direction: [3]f32,
};

const material = union(enum) {
    lambertian: [3]f32,
    light: [3]f32,
    mirror: f32,
    dielectric: f32,
};

const sphere = struct {
    center: [3]f32,
    radius: f32,
    mat: material,
};

const intersection = struct {
    t: f32,
    point: [3]f32,
};

fn raysphere(r: ray, s: sphere) ?f32 {
    const dir_sphere = vsubv(r.origin, s.center);
    const temp = dot(r.direction, dir_sphere);

    const delta = temp * temp - (dot(dir_sphere, dir_sphere) - s.radius * s.radius);

    if (delta < 0) return null;

    const sq_delta = std.math.sqrt(delta);

    const s0 = -temp + sq_delta;
    const s1 = -temp - sq_delta;

    if (s0 < 0 and s1 < 0) return null;
    if (s0 < 0) return s1;
    if (s1 < 0) return s0;
    return @min(s0, s1);
}

fn vreflect(r: [3]f32, on: [3]f32) [3]f32 {
    return vsubv(r, vmuls(on, 2 * dot(r, on)));
}

fn schlick(cos_theta_i: f32, n1: f32, n2: f32) f32 {
    const sqrt_r0 = (n1 - n2) / (n1 + n2);
    const r0 = sqrt_r0 * sqrt_r0;
    const r = r0 + (1 - r0) * std.math.pow(f32, 1 - cos_theta_i, 5);

    return r;
}

fn vrefract(r: [3]f32, on: [3]f32, n1: f32, n2: f32) ?struct { ray: [3]f32, refl_prob: f32 } {
    const index_ratio = n1 / n2;

    const cos_theta_i = dot(vneg(r), on);
    const sin_sq_theta_i = 1 - cos_theta_i * cos_theta_i;
    const sin_sq_theta_t = index_ratio * index_ratio * sin_sq_theta_i;

    if (sin_sq_theta_t > 1) {
        return null;
    }

    const cos_theta_t = std.math.sqrt(1 - sin_sq_theta_t);

    return .{
        .ray = vaddv(vmuls(r, index_ratio), vmuls(on, index_ratio * cos_theta_i - cos_theta_t)),
        .refl_prob = schlick(cos_theta_i, n1, n2),
    };
}

fn trace_one(first_ray: ray, bounces: usize, prng: *const std.Random) [3]f32 {
    const box_base = 1000.0;
    const box_dims = vdivs([_]f32{ 5.0, 4.0, 17.5 }, 3.0);
    const sphere_rad = 0.28;
    const sphere_offset = 0.1;
    const sphere_depth = 0.6;

    const spheres = [_]sphere{
        .{
            .center = vaddv(vaddv( //
                [_]f32{ -box_dims[0] / 2, -box_dims[1] / 2, box_dims[2] / 2 }, //
                [_]f32{ sphere_rad, sphere_rad, -sphere_rad } //
            ), [_]f32{ sphere_offset, 0, -sphere_offset }),
            .radius = sphere_rad,
            //.mat = .{ .lambertian = [3]f32{ 1, 1, 1 } },
            .mat = .{ .mirror = 0.999 },
        },
        .{
            .center = vaddv(vaddv( //
                [_]f32{ box_dims[0] / 2, -box_dims[1] / 2, box_dims[2] / 2 - sphere_depth }, //
                [_]f32{ -sphere_rad, sphere_rad, -sphere_rad } //
            ), [_]f32{ -sphere_offset, 0, sphere_offset }),
            .radius = sphere_rad,
            //.mat = .{ .lambertian = [3]f32{ 1, 1, 1 } },
            .mat = .{ .dielectric = 1.5 },
        },

        .{
            .center = [_]f32{ 0, box_base / 50 + box_dims[1] / 2 - 0.0001, box_dims[2] / 2 - sphere_offset - sphere_depth },
            .radius = box_base / 50,
            .mat = .{ .light = [3]f32{ 12, 12, 12 } },
        },

        .{
            .center = [_]f32{ box_base + box_dims[0] / 2, 0, 0 },
            .radius = box_base,
            .mat = .{ .lambertian = [3]f32{ 0.25, 0.25, 0.75 } },
        },
        .{
            .center = [_]f32{ 0, box_base + box_dims[1] / 2, 0 },
            .radius = box_base,
            .mat = .{ .lambertian = [3]f32{ 0.75, 0.75, 0.75 } },
        },
        .{
            .center = [_]f32{ box_base - box_dims[0] / 2, 0, 0 },
            .radius = box_base,
            .mat = .{ .lambertian = [3]f32{ 0.75, 0.25, 0.25 } },
        },
        .{
            .center = [_]f32{ 0, box_base - box_dims[1] / 2, 0 },
            .radius = box_base,
            .mat = .{ .lambertian = [3]f32{ 0.75, 0.75, 0.75 } },
        },
        .{
            .center = [_]f32{ 0, 0, box_base + box_dims[2] / 2 },
            .radius = box_base,
            .mat = .{ .lambertian = [3]f32{ 0.75, 0.75, 0.75 } },
        },
    };

    var attenuation = [_]f32{ 1, 1, 1 };
    var accumulation = [_]f32{ 0, 0, 0 };
    var cur_ray = first_ray;
    for (0..bounces) |_| {
        var closest_t: ?f32 = null;
        var closest_sphere: sphere = undefined;

        for (spheres) |s| {
            const res = raysphere(cur_ray, s) orelse continue;

            if (res > closest_t orelse std.math.inf(f32)) continue;

            closest_t = res;
            closest_sphere = s;
        }

        const t = closest_t orelse break;
        const pt = vaddv(cur_ray.origin, vmuls(cur_ray.direction, t));
        const n = vdivs(vsubv(pt, closest_sphere.center), closest_sphere.radius);
        const going_in = dot(n, cur_ray.direction) < 0;
        const on = if (going_in) n else vneg(n);

        if (true) {
            //return vabs(on);
        }

        switch (closest_sphere.mat) {
            .light => |brightness| {
                accumulation = vaddv(accumulation, vmulv(attenuation, brightness));
            },
            else => {},
        }

        // Dot product factor Times BRDF Over PDF
        var dtbop: f32 = undefined;
        const new_dir = switch (closest_sphere.mat) {
            .light, .lambertian => ret: {
                const theta = prng.float(f32) * 2 * std.math.pi;
                const cos_phi = 2 * prng.float(f32) - 1;

                const sin_phi = std.math.sqrt(1 - cos_phi * cos_phi);
                const sin_theta = std.math.sin(theta);
                const cos_theta = std.math.cos(theta);

                const rand_vec = [_]f32{ sin_theta * sin_phi, cos_theta * sin_phi, cos_phi };

                const oriented_vec = if (dot(rand_vec, on) < 0) vneg(rand_vec) else rand_vec;

                dtbop = dot(oriented_vec, on);
                break :ret oriented_vec;
            },

            .dielectric => |ior| ret: {
                dtbop = 1;

                const n_i = if (going_in) 1.0 else ior;
                const n_t = if (going_in) ior else 1.0;

                const dice_roll = prng.float(f32);
                const refract_res = vrefract(cur_ray.direction, on, n_i, n_t) orelse {
                    break :ret vreflect(cur_ray.direction, on);
                };

                //if (true) return refract_res.ray;
                //if (true) return ssplat(refract_res.refl_prob);

                if (dice_roll < refract_res.refl_prob) {
                    //if (true) @panic("asd");
                    break :ret vreflect(cur_ray.direction, on);
                }

                break :ret refract_res.ray;
            },

            .mirror => vreflect(cur_ray.direction, on),
        };
        cur_ray.direction = new_dir;
        cur_ray.origin = vaddv(pt, vmuls(if (dot(new_dir, on) < 0) vneg(on) else on, 0.001));

        switch (closest_sphere.mat) {
            .lambertian => |color| {
                attenuation = vmulv(attenuation, vmuls(color, dtbop));
            },
            .light => {
                attenuation = vmuls(attenuation, dtbop);
            },
            .mirror => |reflectivity| {
                attenuation = vmuls(attenuation, reflectivity);
            },
            .dielectric => {
                attenuation = vmuls(attenuation, dtbop);
            },
        }
    }

    return accumulation;
}

const pixel = [3]f32;
const config = struct {
    no_threads: usize,
    dims: [2]usize,
    samples: usize,
    bounces: usize,
};

fn lienar_to_srgb(v: f32) f32 {
    return if (v <= 0.00031308) 12.92 * v else 1.055 * std.math.pow(f32, v, (1.0 / 2.4)) - 0.055;
}

fn thread(id: usize, pixels: []pixel, cfg: *const config) void {
    const float_dims = [_]f32{
        @as(f32, @floatFromInt(cfg.dims[0])),
        @as(f32, @floatFromInt(cfg.dims[1])),
    };

    var prng = std.rand.Xoshiro256.init(seed_val: {
        var seed: u64 = undefined;
        std.posix.getrandom(std.mem.asBytes(&seed)) catch unreachable;
        break :seed_val seed;
    });
    const rand = prng.random();

    var row = id;
    while (row < cfg.dims[1]) : (row += cfg.no_threads) {
        if (row >= cfg.dims[1]) break;

        for (0..cfg.dims[0]) |col| {
            const idx = col + row * cfg.dims[0];

            var sum = [_]f32{ 0, 0, 0 };

            for (0..cfg.samples) |_| sum = vaddv(sum, trace_one(ray{
                .origin = [_]f32{ 0, 0.1, 0 },
                .direction = normalized([_]f32{
                    ((@as(f32, @floatFromInt(col)) + rand.float(f32)) / float_dims[0] - 0.5) * float_dims[0] / float_dims[1],
                    1 - (@as(f32, @floatFromInt(row)) + rand.float(f32)) / float_dims[1] - 0.5,
                    1.0,
                }),
            }, cfg.bounces, &rand));

            const linear = vdivs(sum, @floatFromInt(cfg.samples)); // magick doesn't respect the linear channel declaration

            const srgb = [_]f32{
                lienar_to_srgb(linear[0]),
                lienar_to_srgb(linear[1]),
                lienar_to_srgb(linear[2]),
            };

            pixels[idx] = srgb;
        }
    }
}

fn qoi_hash(pix: [4]u8) u8 {
    return (pix[0] *% 3 +% pix[1] *% 5 +% pix[2] *% 7 +% pix[3] *% 11) % 64;
}

pub fn main() !void {
    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
    defer arena.deinit();
    const allocator = arena.allocator();

    const cfg: config = .{
        .no_threads = 7,
        .dims = [_]usize{ 600, 400 },
        .samples = 1024,
        .bounces = 5,
    };

    const pixels: []pixel = try allocator.alloc(pixel, cfg.dims[0] * cfg.dims[1]);

    var threads: []std.Thread = try allocator.alloc(std.Thread, cfg.no_threads);
    for (0..cfg.no_threads) |thread_id| {
        threads[thread_id] = try std.Thread.spawn(.{}, thread, .{ thread_id, pixels, &cfg });
        //thread(thread_id, pixels, &cfg);
    }

    for (threads) |t| t.join();

    const stdout_file = std.io.getStdOut().writer();
    var bw = std.io.bufferedWriter(stdout_file);
    const stdout = bw.writer();

    _ = stdout.write(&[_]u8{ 'q', 'o', 'i', 'f' }) catch unreachable;
    _ = stdout.write(&std.mem.toBytes(std.mem.nativeToBig(u32, cfg.dims[0]))) catch unreachable;
    _ = stdout.write(&std.mem.toBytes(std.mem.nativeToBig(u32, cfg.dims[1]))) catch unreachable;
    _ = stdout.write(&[_]u8{ 4, 0 }) catch unreachable;

    var pixel_table: [64][4]u8 = .{[_]u8{ 0, 0, 0, 0 }} ** 64;
    var last_pixel: [4]u8 = .{ 0, 0, 0, 255 };
    var cur_run: u8 = 0;
    for (pixels) |pix| {
        const cur_pix = [_]u8{
            @as(u8, @intFromFloat((std.math.clamp(pix[0], 0, 1)) * 255.0)),
            @as(u8, @intFromFloat((std.math.clamp(pix[1], 0, 1)) * 255.0)),
            @as(u8, @intFromFloat((std.math.clamp(pix[2], 0, 1)) * 255.0)),
            255,
        };
        defer last_pixel = cur_pix;
        defer pixel_table[qoi_hash(cur_pix)] = cur_pix;

        if (cur_run == 62) {
            _ = stdout.writeByte(0xC0 | (cur_run - 1)) catch unreachable;
            cur_run = 0;
        }

        if (std.mem.eql(u8, &cur_pix, &last_pixel)) {
            cur_run += 1;
            continue;
        }

        if (cur_run != 0) {
            _ = stdout.writeByte(0xC0 | (cur_run - 1)) catch unreachable;
            cur_run = 0;
        }

        if (std.mem.eql(u8, &cur_pix, &pixel_table[qoi_hash(cur_pix)])) {
            _ = stdout.writeByte(qoi_hash(cur_pix)) catch unreachable;
            continue;
        }

        const dr = cur_pix[0] -% last_pixel[0];
        const dg = cur_pix[1] -% last_pixel[1];
        const db = cur_pix[2] -% last_pixel[2];

        if ((dr +% 2) < 4 and (dg +% 2) < 4 and (db +% 2) < 4) {
            const delta = ((dr +% 2) << 4) | ((dg +% 2) << 2) | (db +% 2);
            _ = stdout.writeByte(0x40 | delta) catch unreachable;
            continue;
        }

        const drdg = dr -% dg;
        const dbdg = db -% dg;

        if ((dg +% 32) < 64 and (drdg +% 8) < 16 and (dbdg +% 8) < 16) {
            _ = stdout.writeByte(0x80 | (dg +% 32)) catch unreachable;
            _ = stdout.writeByte(((drdg +% 8) << 4) | (dbdg +% 8)) catch unreachable;
            continue;
        }

        if (last_pixel[3] == cur_pix[3]) {
            _ = stdout.write(&[_]u8{ 0xFE, cur_pix[0], cur_pix[1], cur_pix[2] }) catch unreachable;
        } else {
            _ = stdout.write(&[_]u8{ 0xFF, cur_pix[0], cur_pix[1], cur_pix[2], cur_pix[3] }) catch unreachable;
        }
    }
    if (cur_run != 0) {
        _ = stdout.writeByte(0xC0 | (cur_run - 1)) catch unreachable;
        cur_run = 0;
    }

    _ = stdout.write(&[_]u8{ 0, 0, 0, 0, 0, 0, 0, 1 }) catch unreachable;

    try bw.flush();
}
	const std = @import("std");

	fn vsop(v: [3]f32, s: f32, comptime op: anytype) [3]f32 {
	var ret: [3]f32 = undefined;
	for (0.., v) \|i, vv\| ret[i] = op.aufruf(vv, s);
	return ret;
	}

	fn vmuls(v: [3]f32, s: f32) [3]f32 {
	return vsop(v, s, struct {
	fn aufruf(a: f32, b: f32) f32 {
	return a * b;
	}
	});
	}

	fn vdivs(v: [3]f32, s: f32) [3]f32 {
	return vsop(v, s, struct {
	fn aufruf(a: f32, b: f32) f32 {
	return a / b;
	}
	});
	}

	fn vvop(a: [3]f32, b: [3]f32, comptime op: anytype) [3]f32 {
	var ret: [3]f32 = undefined;
	for (0.., a, b) \|i, av, bv\| ret[i] = op.aufruf(av, bv);
	return ret;
	}

	fn vaddv(a: [3]f32, b: [3]f32) [3]f32 {
	return vvop(a, b, struct {
	fn aufruf(av: f32, bv: f32) f32 {
	return av + bv;
	}
	});
	}

	fn vsubv(a: [3]f32, b: [3]f32) [3]f32 {
	return vvop(a, b, struct {
	fn aufruf(av: f32, bv: f32) f32 {
	return av - bv;
	}
	});
	}

	fn vmulv(a: [3]f32, b: [3]f32) [3]f32 {
	return vvop(a, b, struct {
	fn aufruf(av: f32, bv: f32) f32 {
	return av * bv;
	}
	});
	}

	fn vneg(vec: [3]f32) [3]f32 {
	var ret: [3]f32 = undefined;
	for (0.., vec) \|i, v\| ret[i] = -v;
	return ret;
	}

	fn vabs(vec: [3]f32) [3]f32 {
	var ret: [3]f32 = undefined;
	for (0.., vec) \|i, v\| ret[i] = @abs(v);
	return ret;
	}

	fn ssplat(s: f32) [3]f32 {
	return [_]f32{ s, s, s };
	}

	fn dot(a: [3]f32, b: [3]f32) f32 {
	var sum: f32 = 0.0;
	for (a, b) \|va, vb\| sum += va * vb;
	return sum;
	}

	fn length(vec: [3]f32) f32 {
	return std.math.sqrt(dot(vec, vec));
	}

	fn normalized(vec: [3]f32) [3]f32 {
	return vdivs(vec, length(vec));
	}

	const ray = struct {
	origin: [3]f32,
	direction: [3]f32,
	};

	const material = union(enum) {
	lambertian: [3]f32,
	light: [3]f32,
	mirror: f32,
	dielectric: f32,
	};

	const sphere = struct {
	center: [3]f32,
	radius: f32,
	mat: material,
	};

	const intersection = struct {
	t: f32,
	point: [3]f32,
	};

	fn raysphere(r: ray, s: sphere) ?f32 {
	const dir_sphere = vsubv(r.origin, s.center);
	const temp = dot(r.direction, dir_sphere);

	const delta = temp * temp - (dot(dir_sphere, dir_sphere) - s.radius * s.radius);

	if (delta < 0) return null;

	const sq_delta = std.math.sqrt(delta);

	const s0 = -temp + sq_delta;
	const s1 = -temp - sq_delta;

	if (s0 < 0 and s1 < 0) return null;
	if (s0 < 0) return s1;
	if (s1 < 0) return s0;
	return @min(s0, s1);
	}

	fn vreflect(r: [3]f32, on: [3]f32) [3]f32 {
	return vsubv(r, vmuls(on, 2 * dot(r, on)));
	}

	fn schlick(cos_theta_i: f32, n1: f32, n2: f32) f32 {
	const sqrt_r0 = (n1 - n2) / (n1 + n2);
	const r0 = sqrt_r0 * sqrt_r0;
	const r = r0 + (1 - r0) * std.math.pow(f32, 1 - cos_theta_i, 5);

	return r;
	}

	fn vrefract(r: [3]f32, on: [3]f32, n1: f32, n2: f32) ?struct { ray: [3]f32, refl_prob: f32 } {
	const index_ratio = n1 / n2;

	const cos_theta_i = dot(vneg(r), on);
	const sin_sq_theta_i = 1 - cos_theta_i * cos_theta_i;
	const sin_sq_theta_t = index_ratio * index_ratio * sin_sq_theta_i;

	if (sin_sq_theta_t > 1) {
	return null;
	}

	const cos_theta_t = std.math.sqrt(1 - sin_sq_theta_t);

	return .{
	.ray = vaddv(vmuls(r, index_ratio), vmuls(on, index_ratio * cos_theta_i - cos_theta_t)),
	.refl_prob = schlick(cos_theta_i, n1, n2),
	};
	}

	fn trace_one(first_ray: ray, bounces: usize, prng: *const std.Random) [3]f32 {
	const box_base = 1000.0;
	const box_dims = vdivs([_]f32{ 5.0, 4.0, 17.5 }, 3.0);
	const sphere_rad = 0.28;
	const sphere_offset = 0.1;
	const sphere_depth = 0.6;

	const spheres = [_]sphere{
	.{
	.center = vaddv(vaddv( //
	[_]f32{ -box_dims[0] / 2, -box_dims[1] / 2, box_dims[2] / 2 }, //
	[_]f32{ sphere_rad, sphere_rad, -sphere_rad } //
	), [_]f32{ sphere_offset, 0, -sphere_offset }),
	.radius = sphere_rad,
	//.mat = .{ .lambertian = [3]f32{ 1, 1, 1 } },
	.mat = .{ .mirror = 0.999 },
	},
	.{
	.center = vaddv(vaddv( //
	[_]f32{ box_dims[0] / 2, -box_dims[1] / 2, box_dims[2] / 2 - sphere_depth }, //
	[_]f32{ -sphere_rad, sphere_rad, -sphere_rad } //
	), [_]f32{ -sphere_offset, 0, sphere_offset }),
	.radius = sphere_rad,
	//.mat = .{ .lambertian = [3]f32{ 1, 1, 1 } },
	.mat = .{ .dielectric = 1.5 },
	},

	.{
	.center = [_]f32{ 0, box_base / 50 + box_dims[1] / 2 - 0.0001, box_dims[2] / 2 - sphere_offset - sphere_depth },
	.radius = box_base / 50,
	.mat = .{ .light = [3]f32{ 12, 12, 12 } },
	},

	.{
	.center = [_]f32{ box_base + box_dims[0] / 2, 0, 0 },
	.radius = box_base,
	.mat = .{ .lambertian = [3]f32{ 0.25, 0.25, 0.75 } },
	},
	.{
	.center = [_]f32{ 0, box_base + box_dims[1] / 2, 0 },
	.radius = box_base,
	.mat = .{ .lambertian = [3]f32{ 0.75, 0.75, 0.75 } },
	},
	.{
	.center = [_]f32{ box_base - box_dims[0] / 2, 0, 0 },
	.radius = box_base,
	.mat = .{ .lambertian = [3]f32{ 0.75, 0.25, 0.25 } },
	},
	.{
	.center = [_]f32{ 0, box_base - box_dims[1] / 2, 0 },
	.radius = box_base,
	.mat = .{ .lambertian = [3]f32{ 0.75, 0.75, 0.75 } },
	},
	.{
	.center = [_]f32{ 0, 0, box_base + box_dims[2] / 2 },
	.radius = box_base,
	.mat = .{ .lambertian = [3]f32{ 0.75, 0.75, 0.75 } },
	},
	};

	var attenuation = [_]f32{ 1, 1, 1 };
	var accumulation = [_]f32{ 0, 0, 0 };
	var cur_ray = first_ray;
	for (0..bounces) \|_\| {
	var closest_t: ?f32 = null;
	var closest_sphere: sphere = undefined;

	for (spheres) \|s\| {
	const res = raysphere(cur_ray, s) orelse continue;

	if (res > closest_t orelse std.math.inf(f32)) continue;

	closest_t = res;
	closest_sphere = s;
	}

	const t = closest_t orelse break;
	const pt = vaddv(cur_ray.origin, vmuls(cur_ray.direction, t));
	const n = vdivs(vsubv(pt, closest_sphere.center), closest_sphere.radius);
	const going_in = dot(n, cur_ray.direction) < 0;
	const on = if (going_in) n else vneg(n);

	if (true) {
	//return vabs(on);
	}

	switch (closest_sphere.mat) {
	.light => \|brightness\| {
	accumulation = vaddv(accumulation, vmulv(attenuation, brightness));
	},
	else => {},
	}

	// Dot product factor Times BRDF Over PDF
	var dtbop: f32 = undefined;
	const new_dir = switch (closest_sphere.mat) {
	.light, .lambertian => ret: {
	const theta = prng.float(f32) * 2 * std.math.pi;
	const cos_phi = 2 * prng.float(f32) - 1;

	const sin_phi = std.math.sqrt(1 - cos_phi * cos_phi);
	const sin_theta = std.math.sin(theta);
	const cos_theta = std.math.cos(theta);

	const rand_vec = [_]f32{ sin_theta * sin_phi, cos_theta * sin_phi, cos_phi };

	const oriented_vec = if (dot(rand_vec, on) < 0) vneg(rand_vec) else rand_vec;

	dtbop = dot(oriented_vec, on);
	break :ret oriented_vec;
	},

	.dielectric => \|ior\| ret: {
	dtbop = 1;

	const n_i = if (going_in) 1.0 else ior;
	const n_t = if (going_in) ior else 1.0;

	const dice_roll = prng.float(f32);
	const refract_res = vrefract(cur_ray.direction, on, n_i, n_t) orelse {
	break :ret vreflect(cur_ray.direction, on);
	};

	//if (true) return refract_res.ray;
	//if (true) return ssplat(refract_res.refl_prob);

	if (dice_roll < refract_res.refl_prob) {
	//if (true) @panic("asd");
	break :ret vreflect(cur_ray.direction, on);
	}

	break :ret refract_res.ray;
	},

	.mirror => vreflect(cur_ray.direction, on),
	};
	cur_ray.direction = new_dir;
	cur_ray.origin = vaddv(pt, vmuls(if (dot(new_dir, on) < 0) vneg(on) else on, 0.001));

	switch (closest_sphere.mat) {
	.lambertian => \|color\| {
	attenuation = vmulv(attenuation, vmuls(color, dtbop));
	},
	.light => {
	attenuation = vmuls(attenuation, dtbop);
	},
	.mirror => \|reflectivity\| {
	attenuation = vmuls(attenuation, reflectivity);
	},
	.dielectric => {
	attenuation = vmuls(attenuation, dtbop);
	},
	}
	}

	return accumulation;
	}

	const pixel = [3]f32;
	const config = struct {
	no_threads: usize,
	dims: [2]usize,
	samples: usize,
	bounces: usize,
	};

	fn lienar_to_srgb(v: f32) f32 {
	return if (v <= 0.00031308) 12.92 * v else 1.055 * std.math.pow(f32, v, (1.0 / 2.4)) - 0.055;
	}

	fn thread(id: usize, pixels: []pixel, cfg: *const config) void {
	const float_dims = [_]f32{
	@as(f32, @floatFromInt(cfg.dims[0])),
	@as(f32, @floatFromInt(cfg.dims[1])),
	};

	var prng = std.rand.Xoshiro256.init(seed_val: {
	var seed: u64 = undefined;
	std.posix.getrandom(std.mem.asBytes(&seed)) catch unreachable;
	break :seed_val seed;
	});
	const rand = prng.random();

	var row = id;
	while (row < cfg.dims[1]) : (row += cfg.no_threads) {
	if (row >= cfg.dims[1]) break;

	for (0..cfg.dims[0]) \|col\| {
	const idx = col + row * cfg.dims[0];

	var sum = [_]f32{ 0, 0, 0 };

	for (0..cfg.samples) \|_\| sum = vaddv(sum, trace_one(ray{
	.origin = [_]f32{ 0, 0.1, 0 },
	.direction = normalized([_]f32{
	((@as(f32, @floatFromInt(col)) + rand.float(f32)) / float_dims[0] - 0.5) * float_dims[0] / float_dims[1],
	1 - (@as(f32, @floatFromInt(row)) + rand.float(f32)) / float_dims[1] - 0.5,
	1.0,
	}),
	}, cfg.bounces, &rand));

	const linear = vdivs(sum, @floatFromInt(cfg.samples)); // magick doesn't respect the linear channel declaration

	const srgb = [_]f32{
	lienar_to_srgb(linear[0]),
	lienar_to_srgb(linear[1]),
	lienar_to_srgb(linear[2]),
	};

	pixels[idx] = srgb;
	}
	}
	}

	fn qoi_hash(pix: [4]u8) u8 {
	return (pix[0] % 3 +% pix[1] % 5 +% pix[2] % 7 +% pix[3] % 11) % 64;
	}

	pub fn main() !void {
	var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
	defer arena.deinit();
	const allocator = arena.allocator();

	const cfg: config = .{
	.no_threads = 7,
	.dims = [_]usize{ 600, 400 },
	.samples = 1024,
	.bounces = 5,
	};

	const pixels: []pixel = try allocator.alloc(pixel, cfg.dims[0] * cfg.dims[1]);

	var threads: []std.Thread = try allocator.alloc(std.Thread, cfg.no_threads);
	for (0..cfg.no_threads) \|thread_id\| {
	threads[thread_id] = try std.Thread.spawn(.{}, thread, .{ thread_id, pixels, &cfg });
	//thread(thread_id, pixels, &cfg);
	}

	for (threads) \|t\| t.join();

	const stdout_file = std.io.getStdOut().writer();
	var bw = std.io.bufferedWriter(stdout_file);
	const stdout = bw.writer();

	_ = stdout.write(&[_]u8{ 'q', 'o', 'i', 'f' }) catch unreachable;
	_ = stdout.write(&std.mem.toBytes(std.mem.nativeToBig(u32, cfg.dims[0]))) catch unreachable;
	_ = stdout.write(&std.mem.toBytes(std.mem.nativeToBig(u32, cfg.dims[1]))) catch unreachable;
	_ = stdout.write(&[_]u8{ 4, 0 }) catch unreachable;

	var pixel_table: [64][4]u8 = .{[_]u8{ 0, 0, 0, 0 }} ** 64;
	var last_pixel: [4]u8 = .{ 0, 0, 0, 255 };
	var cur_run: u8 = 0;
	for (pixels) \|pix\| {
	const cur_pix = [_]u8{
	@as(u8, @intFromFloat((std.math.clamp(pix[0], 0, 1)) * 255.0)),
	@as(u8, @intFromFloat((std.math.clamp(pix[1], 0, 1)) * 255.0)),
	@as(u8, @intFromFloat((std.math.clamp(pix[2], 0, 1)) * 255.0)),
	255,
	};
	defer last_pixel = cur_pix;
	defer pixel_table[qoi_hash(cur_pix)] = cur_pix;

	if (cur_run == 62) {
	_ = stdout.writeByte(0xC0 \| (cur_run - 1)) catch unreachable;
	cur_run = 0;
	}

	if (std.mem.eql(u8, &cur_pix, &last_pixel)) {
	cur_run += 1;
	continue;
	}

	if (cur_run != 0) {
	_ = stdout.writeByte(0xC0 \| (cur_run - 1)) catch unreachable;
	cur_run = 0;
	}

	if (std.mem.eql(u8, &cur_pix, &pixel_table[qoi_hash(cur_pix)])) {
	_ = stdout.writeByte(qoi_hash(cur_pix)) catch unreachable;
	continue;
	}

	const dr = cur_pix[0] -% last_pixel[0];
	const dg = cur_pix[1] -% last_pixel[1];
	const db = cur_pix[2] -% last_pixel[2];

	if ((dr +% 2) < 4 and (dg +% 2) < 4 and (db +% 2) < 4) {
	const delta = ((dr +% 2) << 4) \| ((dg +% 2) << 2) \| (db +% 2);
	_ = stdout.writeByte(0x40 \| delta) catch unreachable;
	continue;
	}

	const drdg = dr -% dg;
	const dbdg = db -% dg;

	if ((dg +% 32) < 64 and (drdg +% 8) < 16 and (dbdg +% 8) < 16) {
	_ = stdout.writeByte(0x80 \| (dg +% 32)) catch unreachable;
	_ = stdout.writeByte(((drdg +% 8) << 4) \| (dbdg +% 8)) catch unreachable;
	continue;
	}

	if (last_pixel[3] == cur_pix[3]) {
	_ = stdout.write(&[_]u8{ 0xFE, cur_pix[0], cur_pix[1], cur_pix[2] }) catch unreachable;
	} else {
	_ = stdout.write(&[_]u8{ 0xFF, cur_pix[0], cur_pix[1], cur_pix[2], cur_pix[3] }) catch unreachable;
	}
	}
	if (cur_run != 0) {
	_ = stdout.writeByte(0xC0 \| (cur_run - 1)) catch unreachable;
	cur_run = 0;
	}

	_ = stdout.write(&[_]u8{ 0, 0, 0, 0, 0, 0, 0, 1 }) catch unreachable;

	try bw.flush();
	}