Created
September 26, 2024 06:55
-
-
Save xor-shift/2699115fc3b0f47350cf623a1edc824c to your computer and use it in GitHub Desktop.
Smallpt with QoI output as a Zig "hello world".
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
fn vsop(v: [3]f32, s: f32, comptime op: anytype) [3]f32 { | |
var ret: [3]f32 = undefined; | |
for (0.., v) |i, vv| ret[i] = op.aufruf(vv, s); | |
return ret; | |
} | |
fn vmuls(v: [3]f32, s: f32) [3]f32 { | |
return vsop(v, s, struct { | |
fn aufruf(a: f32, b: f32) f32 { | |
return a * b; | |
} | |
}); | |
} | |
fn vdivs(v: [3]f32, s: f32) [3]f32 { | |
return vsop(v, s, struct { | |
fn aufruf(a: f32, b: f32) f32 { | |
return a / b; | |
} | |
}); | |
} | |
fn vvop(a: [3]f32, b: [3]f32, comptime op: anytype) [3]f32 { | |
var ret: [3]f32 = undefined; | |
for (0.., a, b) |i, av, bv| ret[i] = op.aufruf(av, bv); | |
return ret; | |
} | |
fn vaddv(a: [3]f32, b: [3]f32) [3]f32 { | |
return vvop(a, b, struct { | |
fn aufruf(av: f32, bv: f32) f32 { | |
return av + bv; | |
} | |
}); | |
} | |
fn vsubv(a: [3]f32, b: [3]f32) [3]f32 { | |
return vvop(a, b, struct { | |
fn aufruf(av: f32, bv: f32) f32 { | |
return av - bv; | |
} | |
}); | |
} | |
fn vmulv(a: [3]f32, b: [3]f32) [3]f32 { | |
return vvop(a, b, struct { | |
fn aufruf(av: f32, bv: f32) f32 { | |
return av * bv; | |
} | |
}); | |
} | |
fn vneg(vec: [3]f32) [3]f32 { | |
var ret: [3]f32 = undefined; | |
for (0.., vec) |i, v| ret[i] = -v; | |
return ret; | |
} | |
fn vabs(vec: [3]f32) [3]f32 { | |
var ret: [3]f32 = undefined; | |
for (0.., vec) |i, v| ret[i] = @abs(v); | |
return ret; | |
} | |
fn ssplat(s: f32) [3]f32 { | |
return [_]f32{ s, s, s }; | |
} | |
fn dot(a: [3]f32, b: [3]f32) f32 { | |
var sum: f32 = 0.0; | |
for (a, b) |va, vb| sum += va * vb; | |
return sum; | |
} | |
fn length(vec: [3]f32) f32 { | |
return std.math.sqrt(dot(vec, vec)); | |
} | |
fn normalized(vec: [3]f32) [3]f32 { | |
return vdivs(vec, length(vec)); | |
} | |
const ray = struct { | |
origin: [3]f32, | |
direction: [3]f32, | |
}; | |
const material = union(enum) { | |
lambertian: [3]f32, | |
light: [3]f32, | |
mirror: f32, | |
dielectric: f32, | |
}; | |
const sphere = struct { | |
center: [3]f32, | |
radius: f32, | |
mat: material, | |
}; | |
const intersection = struct { | |
t: f32, | |
point: [3]f32, | |
}; | |
fn raysphere(r: ray, s: sphere) ?f32 { | |
const dir_sphere = vsubv(r.origin, s.center); | |
const temp = dot(r.direction, dir_sphere); | |
const delta = temp * temp - (dot(dir_sphere, dir_sphere) - s.radius * s.radius); | |
if (delta < 0) return null; | |
const sq_delta = std.math.sqrt(delta); | |
const s0 = -temp + sq_delta; | |
const s1 = -temp - sq_delta; | |
if (s0 < 0 and s1 < 0) return null; | |
if (s0 < 0) return s1; | |
if (s1 < 0) return s0; | |
return @min(s0, s1); | |
} | |
fn vreflect(r: [3]f32, on: [3]f32) [3]f32 { | |
return vsubv(r, vmuls(on, 2 * dot(r, on))); | |
} | |
fn schlick(cos_theta_i: f32, n1: f32, n2: f32) f32 { | |
const sqrt_r0 = (n1 - n2) / (n1 + n2); | |
const r0 = sqrt_r0 * sqrt_r0; | |
const r = r0 + (1 - r0) * std.math.pow(f32, 1 - cos_theta_i, 5); | |
return r; | |
} | |
fn vrefract(r: [3]f32, on: [3]f32, n1: f32, n2: f32) ?struct { ray: [3]f32, refl_prob: f32 } { | |
const index_ratio = n1 / n2; | |
const cos_theta_i = dot(vneg(r), on); | |
const sin_sq_theta_i = 1 - cos_theta_i * cos_theta_i; | |
const sin_sq_theta_t = index_ratio * index_ratio * sin_sq_theta_i; | |
if (sin_sq_theta_t > 1) { | |
return null; | |
} | |
const cos_theta_t = std.math.sqrt(1 - sin_sq_theta_t); | |
return .{ | |
.ray = vaddv(vmuls(r, index_ratio), vmuls(on, index_ratio * cos_theta_i - cos_theta_t)), | |
.refl_prob = schlick(cos_theta_i, n1, n2), | |
}; | |
} | |
fn trace_one(first_ray: ray, bounces: usize, prng: *const std.Random) [3]f32 { | |
const box_base = 1000.0; | |
const box_dims = vdivs([_]f32{ 5.0, 4.0, 17.5 }, 3.0); | |
const sphere_rad = 0.28; | |
const sphere_offset = 0.1; | |
const sphere_depth = 0.6; | |
const spheres = [_]sphere{ | |
.{ | |
.center = vaddv(vaddv( // | |
[_]f32{ -box_dims[0] / 2, -box_dims[1] / 2, box_dims[2] / 2 }, // | |
[_]f32{ sphere_rad, sphere_rad, -sphere_rad } // | |
), [_]f32{ sphere_offset, 0, -sphere_offset }), | |
.radius = sphere_rad, | |
//.mat = .{ .lambertian = [3]f32{ 1, 1, 1 } }, | |
.mat = .{ .mirror = 0.999 }, | |
}, | |
.{ | |
.center = vaddv(vaddv( // | |
[_]f32{ box_dims[0] / 2, -box_dims[1] / 2, box_dims[2] / 2 - sphere_depth }, // | |
[_]f32{ -sphere_rad, sphere_rad, -sphere_rad } // | |
), [_]f32{ -sphere_offset, 0, sphere_offset }), | |
.radius = sphere_rad, | |
//.mat = .{ .lambertian = [3]f32{ 1, 1, 1 } }, | |
.mat = .{ .dielectric = 1.5 }, | |
}, | |
.{ | |
.center = [_]f32{ 0, box_base / 50 + box_dims[1] / 2 - 0.0001, box_dims[2] / 2 - sphere_offset - sphere_depth }, | |
.radius = box_base / 50, | |
.mat = .{ .light = [3]f32{ 12, 12, 12 } }, | |
}, | |
.{ | |
.center = [_]f32{ box_base + box_dims[0] / 2, 0, 0 }, | |
.radius = box_base, | |
.mat = .{ .lambertian = [3]f32{ 0.25, 0.25, 0.75 } }, | |
}, | |
.{ | |
.center = [_]f32{ 0, box_base + box_dims[1] / 2, 0 }, | |
.radius = box_base, | |
.mat = .{ .lambertian = [3]f32{ 0.75, 0.75, 0.75 } }, | |
}, | |
.{ | |
.center = [_]f32{ box_base - box_dims[0] / 2, 0, 0 }, | |
.radius = box_base, | |
.mat = .{ .lambertian = [3]f32{ 0.75, 0.25, 0.25 } }, | |
}, | |
.{ | |
.center = [_]f32{ 0, box_base - box_dims[1] / 2, 0 }, | |
.radius = box_base, | |
.mat = .{ .lambertian = [3]f32{ 0.75, 0.75, 0.75 } }, | |
}, | |
.{ | |
.center = [_]f32{ 0, 0, box_base + box_dims[2] / 2 }, | |
.radius = box_base, | |
.mat = .{ .lambertian = [3]f32{ 0.75, 0.75, 0.75 } }, | |
}, | |
}; | |
var attenuation = [_]f32{ 1, 1, 1 }; | |
var accumulation = [_]f32{ 0, 0, 0 }; | |
var cur_ray = first_ray; | |
for (0..bounces) |_| { | |
var closest_t: ?f32 = null; | |
var closest_sphere: sphere = undefined; | |
for (spheres) |s| { | |
const res = raysphere(cur_ray, s) orelse continue; | |
if (res > closest_t orelse std.math.inf(f32)) continue; | |
closest_t = res; | |
closest_sphere = s; | |
} | |
const t = closest_t orelse break; | |
const pt = vaddv(cur_ray.origin, vmuls(cur_ray.direction, t)); | |
const n = vdivs(vsubv(pt, closest_sphere.center), closest_sphere.radius); | |
const going_in = dot(n, cur_ray.direction) < 0; | |
const on = if (going_in) n else vneg(n); | |
if (true) { | |
//return vabs(on); | |
} | |
switch (closest_sphere.mat) { | |
.light => |brightness| { | |
accumulation = vaddv(accumulation, vmulv(attenuation, brightness)); | |
}, | |
else => {}, | |
} | |
// Dot product factor Times BRDF Over PDF | |
var dtbop: f32 = undefined; | |
const new_dir = switch (closest_sphere.mat) { | |
.light, .lambertian => ret: { | |
const theta = prng.float(f32) * 2 * std.math.pi; | |
const cos_phi = 2 * prng.float(f32) - 1; | |
const sin_phi = std.math.sqrt(1 - cos_phi * cos_phi); | |
const sin_theta = std.math.sin(theta); | |
const cos_theta = std.math.cos(theta); | |
const rand_vec = [_]f32{ sin_theta * sin_phi, cos_theta * sin_phi, cos_phi }; | |
const oriented_vec = if (dot(rand_vec, on) < 0) vneg(rand_vec) else rand_vec; | |
dtbop = dot(oriented_vec, on); | |
break :ret oriented_vec; | |
}, | |
.dielectric => |ior| ret: { | |
dtbop = 1; | |
const n_i = if (going_in) 1.0 else ior; | |
const n_t = if (going_in) ior else 1.0; | |
const dice_roll = prng.float(f32); | |
const refract_res = vrefract(cur_ray.direction, on, n_i, n_t) orelse { | |
break :ret vreflect(cur_ray.direction, on); | |
}; | |
//if (true) return refract_res.ray; | |
//if (true) return ssplat(refract_res.refl_prob); | |
if (dice_roll < refract_res.refl_prob) { | |
//if (true) @panic("asd"); | |
break :ret vreflect(cur_ray.direction, on); | |
} | |
break :ret refract_res.ray; | |
}, | |
.mirror => vreflect(cur_ray.direction, on), | |
}; | |
cur_ray.direction = new_dir; | |
cur_ray.origin = vaddv(pt, vmuls(if (dot(new_dir, on) < 0) vneg(on) else on, 0.001)); | |
switch (closest_sphere.mat) { | |
.lambertian => |color| { | |
attenuation = vmulv(attenuation, vmuls(color, dtbop)); | |
}, | |
.light => { | |
attenuation = vmuls(attenuation, dtbop); | |
}, | |
.mirror => |reflectivity| { | |
attenuation = vmuls(attenuation, reflectivity); | |
}, | |
.dielectric => { | |
attenuation = vmuls(attenuation, dtbop); | |
}, | |
} | |
} | |
return accumulation; | |
} | |
const pixel = [3]f32; | |
const config = struct { | |
no_threads: usize, | |
dims: [2]usize, | |
samples: usize, | |
bounces: usize, | |
}; | |
fn lienar_to_srgb(v: f32) f32 { | |
return if (v <= 0.00031308) 12.92 * v else 1.055 * std.math.pow(f32, v, (1.0 / 2.4)) - 0.055; | |
} | |
fn thread(id: usize, pixels: []pixel, cfg: *const config) void { | |
const float_dims = [_]f32{ | |
@as(f32, @floatFromInt(cfg.dims[0])), | |
@as(f32, @floatFromInt(cfg.dims[1])), | |
}; | |
var prng = std.rand.Xoshiro256.init(seed_val: { | |
var seed: u64 = undefined; | |
std.posix.getrandom(std.mem.asBytes(&seed)) catch unreachable; | |
break :seed_val seed; | |
}); | |
const rand = prng.random(); | |
var row = id; | |
while (row < cfg.dims[1]) : (row += cfg.no_threads) { | |
if (row >= cfg.dims[1]) break; | |
for (0..cfg.dims[0]) |col| { | |
const idx = col + row * cfg.dims[0]; | |
var sum = [_]f32{ 0, 0, 0 }; | |
for (0..cfg.samples) |_| sum = vaddv(sum, trace_one(ray{ | |
.origin = [_]f32{ 0, 0.1, 0 }, | |
.direction = normalized([_]f32{ | |
((@as(f32, @floatFromInt(col)) + rand.float(f32)) / float_dims[0] - 0.5) * float_dims[0] / float_dims[1], | |
1 - (@as(f32, @floatFromInt(row)) + rand.float(f32)) / float_dims[1] - 0.5, | |
1.0, | |
}), | |
}, cfg.bounces, &rand)); | |
const linear = vdivs(sum, @floatFromInt(cfg.samples)); // magick doesn't respect the linear channel declaration | |
const srgb = [_]f32{ | |
lienar_to_srgb(linear[0]), | |
lienar_to_srgb(linear[1]), | |
lienar_to_srgb(linear[2]), | |
}; | |
pixels[idx] = srgb; | |
} | |
} | |
} | |
fn qoi_hash(pix: [4]u8) u8 { | |
return (pix[0] *% 3 +% pix[1] *% 5 +% pix[2] *% 7 +% pix[3] *% 11) % 64; | |
} | |
pub fn main() !void { | |
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | |
defer arena.deinit(); | |
const allocator = arena.allocator(); | |
const cfg: config = .{ | |
.no_threads = 7, | |
.dims = [_]usize{ 600, 400 }, | |
.samples = 1024, | |
.bounces = 5, | |
}; | |
const pixels: []pixel = try allocator.alloc(pixel, cfg.dims[0] * cfg.dims[1]); | |
var threads: []std.Thread = try allocator.alloc(std.Thread, cfg.no_threads); | |
for (0..cfg.no_threads) |thread_id| { | |
threads[thread_id] = try std.Thread.spawn(.{}, thread, .{ thread_id, pixels, &cfg }); | |
//thread(thread_id, pixels, &cfg); | |
} | |
for (threads) |t| t.join(); | |
const stdout_file = std.io.getStdOut().writer(); | |
var bw = std.io.bufferedWriter(stdout_file); | |
const stdout = bw.writer(); | |
_ = stdout.write(&[_]u8{ 'q', 'o', 'i', 'f' }) catch unreachable; | |
_ = stdout.write(&std.mem.toBytes(std.mem.nativeToBig(u32, cfg.dims[0]))) catch unreachable; | |
_ = stdout.write(&std.mem.toBytes(std.mem.nativeToBig(u32, cfg.dims[1]))) catch unreachable; | |
_ = stdout.write(&[_]u8{ 4, 0 }) catch unreachable; | |
var pixel_table: [64][4]u8 = .{[_]u8{ 0, 0, 0, 0 }} ** 64; | |
var last_pixel: [4]u8 = .{ 0, 0, 0, 255 }; | |
var cur_run: u8 = 0; | |
for (pixels) |pix| { | |
const cur_pix = [_]u8{ | |
@as(u8, @intFromFloat((std.math.clamp(pix[0], 0, 1)) * 255.0)), | |
@as(u8, @intFromFloat((std.math.clamp(pix[1], 0, 1)) * 255.0)), | |
@as(u8, @intFromFloat((std.math.clamp(pix[2], 0, 1)) * 255.0)), | |
255, | |
}; | |
defer last_pixel = cur_pix; | |
defer pixel_table[qoi_hash(cur_pix)] = cur_pix; | |
if (cur_run == 62) { | |
_ = stdout.writeByte(0xC0 | (cur_run - 1)) catch unreachable; | |
cur_run = 0; | |
} | |
if (std.mem.eql(u8, &cur_pix, &last_pixel)) { | |
cur_run += 1; | |
continue; | |
} | |
if (cur_run != 0) { | |
_ = stdout.writeByte(0xC0 | (cur_run - 1)) catch unreachable; | |
cur_run = 0; | |
} | |
if (std.mem.eql(u8, &cur_pix, &pixel_table[qoi_hash(cur_pix)])) { | |
_ = stdout.writeByte(qoi_hash(cur_pix)) catch unreachable; | |
continue; | |
} | |
const dr = cur_pix[0] -% last_pixel[0]; | |
const dg = cur_pix[1] -% last_pixel[1]; | |
const db = cur_pix[2] -% last_pixel[2]; | |
if ((dr +% 2) < 4 and (dg +% 2) < 4 and (db +% 2) < 4) { | |
const delta = ((dr +% 2) << 4) | ((dg +% 2) << 2) | (db +% 2); | |
_ = stdout.writeByte(0x40 | delta) catch unreachable; | |
continue; | |
} | |
const drdg = dr -% dg; | |
const dbdg = db -% dg; | |
if ((dg +% 32) < 64 and (drdg +% 8) < 16 and (dbdg +% 8) < 16) { | |
_ = stdout.writeByte(0x80 | (dg +% 32)) catch unreachable; | |
_ = stdout.writeByte(((drdg +% 8) << 4) | (dbdg +% 8)) catch unreachable; | |
continue; | |
} | |
if (last_pixel[3] == cur_pix[3]) { | |
_ = stdout.write(&[_]u8{ 0xFE, cur_pix[0], cur_pix[1], cur_pix[2] }) catch unreachable; | |
} else { | |
_ = stdout.write(&[_]u8{ 0xFF, cur_pix[0], cur_pix[1], cur_pix[2], cur_pix[3] }) catch unreachable; | |
} | |
} | |
if (cur_run != 0) { | |
_ = stdout.writeByte(0xC0 | (cur_run - 1)) catch unreachable; | |
cur_run = 0; | |
} | |
_ = stdout.write(&[_]u8{ 0, 0, 0, 0, 0, 0, 0, 1 }) catch unreachable; | |
try bw.flush(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment