Last active
August 21, 2024 17:26
-
-
Save kprotty/9f45dde0eaea94a9a8d13097ee44b3cf to your computer and use it in GitHub Desktop.
Updated to Zig 0.13
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const builtin = @import("builtin"); | |
pub fn main() !void { | |
try bench(struct { | |
pub const name = "FAA"; | |
pub fn inc(counter: *std.atomic.Value(usize), current: usize, rng: *u32) usize { | |
_ = rng; | |
_ = current; | |
return counter.fetchAdd(1, .monotonic) + 1; | |
} | |
}); | |
try bench(struct { | |
pub const name = "CAS"; | |
pub fn inc(counter: *std.atomic.Value(usize), current: usize, rng: *u32) usize { | |
_ = rng; | |
var value = current; | |
while (true) { | |
const new_value = value + 1; | |
value = counter.cmpxchgWeak(value, new_value, .monotonic, .monotonic) orelse return new_value; | |
} | |
} | |
}); | |
try bench(struct { | |
pub const name = "CAS backoff"; | |
pub fn inc(counter: *std.atomic.Value(usize), current: usize, rng: *u32) usize { | |
var value = current; | |
while (true) { | |
const new_value = value + 1; | |
_ = counter.cmpxchgWeak(value, new_value, .monotonic, .monotonic) orelse return new_value; | |
// reload after spinning | |
defer value = counter.load(.monotonic); | |
// On M1 & similar, WFE acts like an efficient pause yield when backing off. | |
if (comptime builtin.target.cpu.arch == .aarch64) { | |
asm volatile("wfe" ::: "memory"); | |
continue; | |
} | |
// Spin a randomized amount of time to avoid threads resonating | |
// at the same frequency and causing permanent contention. | |
// https://github.com/apple/swift-corelibs-libdispatch/blob/542b7f32311680b11b6fc8fcb2576955460ba7da/src/shims/yield.h#L91-L120 | |
const spin_min = 2; | |
const spin_max = 1024; | |
const spin_count = ((rng.* >> 24) & (spin_max - 1)) | (spin_min - 1); | |
rng.* = (rng.* *% 1103515245) +% 12345; | |
for (0..spin_count) |_| std.atomic.spinLoopHint(); | |
} | |
} | |
}); | |
} | |
fn bench(comptime Config: type) !void { | |
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | |
defer arena.deinit(); | |
const allocator = arena.allocator(); | |
const num_cpus = try std.Thread.getCpuCount(); | |
const Shared = extern struct { | |
front_padding: [std.atomic.cache_line]u8 = undefined, | |
counter: std.atomic.Value(usize) = .{ .raw = 0 }, | |
back_padding: [std.atomic.cache_line]u8 = undefined, | |
}; | |
const Worker = struct { | |
shutdown: std.atomic.Value(bool) = .{ .raw = false }, | |
thread: std.Thread = undefined, | |
increments: usize = 0, | |
shared: *Shared, | |
fn run(worker: *@This(), seed: usize) void { | |
var local_inc: usize = 0; | |
defer worker.increments = local_inc; | |
const shared = worker.shared; | |
var rng: u32 = @intCast(seed + 1); | |
var current = shared.counter.load(.monotonic); | |
while (!worker.shutdown.load(.monotonic)) { | |
current = Config.inc(&shared.counter, current, &rng); | |
local_inc += 1; | |
} | |
} | |
}; | |
const workers = try allocator.alloc(Worker, num_cpus); | |
defer allocator.free(workers); | |
var shared = Shared{}; | |
for (workers, 0..) |*w, i| { | |
w.* = .{ .shared = &shared }; | |
w.thread = try std.Thread.spawn(.{}, Worker.run, .{w, i}); | |
} | |
const measure_duration_ns = 1000 * std.time.ns_per_ms; | |
std.time.sleep(measure_duration_ns); | |
for (workers) |*w| w.shutdown.store(true, .monotonic); | |
for (workers) |w| w.thread.join(); | |
var min: i128 = std.math.maxInt(i128); | |
var max: i128 = 0; | |
var sum: i128 = 0; | |
for (workers) |w| { | |
sum += w.increments; | |
min = @min(min, w.increments); | |
max = @max(max, w.increments); | |
} | |
const avg = @divFloor(sum, workers.len); | |
const throughput_ms = @divFloor(sum, measure_duration_ns / std.time.ns_per_ms); | |
var stdev: i128 = 0; | |
for (workers) |w| stdev += (w.increments - avg) * 2; | |
stdev = @divFloor(stdev, workers.len); | |
std.debug.print( | |
\\------------------------- | |
\\| {s} | |
\\------------------------- | |
\\| {s:>10} | {s:>8} | | |
\\| {d:>7}/ms | {d:>8} | | |
\\ | |
\\ | |
, .{ | |
Config.name, | |
"throughput", "stdev", | |
@as(u128, @intCast(throughput_ms)), | |
@as(u128, @intCast(stdev)), | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment