-
-
Save kprotty/9f45dde0eaea94a9a8d13097ee44b3cf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const builtin = @import("builtin"); | |
const Random = std.rand.Random; | |
const Atomic = std.atomic.Atomic; | |
pub fn main() !void { | |
try bench(struct { | |
pub const name = "FAA"; | |
pub fn inc(counter: *Atomic(usize), current: usize, rng: *Random) usize { | |
_ = rng; | |
_ = current; | |
return counter.fetchAdd(1, .Monotonic) + 1; | |
} | |
}); | |
try bench(struct { | |
pub const name = "CAS"; | |
pub fn inc(counter: *Atomic(usize), current: usize, rng: *Random) usize { | |
_ = rng; | |
var value = current; | |
while (true) { | |
const new_value = value + 1; | |
value = counter.tryCompareAndSwap( | |
value, | |
new_value, | |
.Monotonic, | |
.Monotonic, | |
) orelse return new_value; | |
} | |
} | |
}); | |
try bench(struct { | |
pub const name = "CAS backoff"; | |
pub fn inc(counter: *Atomic(usize), current: usize, rng: *Random) usize { | |
var value = current; | |
while (true) { | |
const new_value = value + 1; | |
value = counter.compareAndSwap( | |
value, | |
new_value, | |
.Monotonic, | |
.Monotonic, | |
) orelse return new_value; | |
// reload the value after spinning | |
defer value = counter.load(.Monotonic); | |
// On M1, it's better to spin with a single WFE instead | |
if (comptime builtin.target.os.tag.isDarwin() and builtin.target.cpu.arch == .aarch64) { | |
asm volatile("wfe" ::: "memory"); | |
continue; | |
} | |
// Spin a randomized amount of time to avoid threads resonating | |
// at the same frequency and causing permanent contention. | |
// https://github.com/apple/swift-corelibs-libdispatch/blob/main/src/shims/yield.h#L102-L125 | |
const spin_max = 128; | |
const spin_min = 32; | |
var spin = (rng.int(u32) & (spin_max - 1)) | (spin_min - 1); | |
while (spin > 0) : (spin -= 1) std.atomic.spinLoopHint(); | |
} | |
} | |
}); | |
} | |
var gpa = std.heap.GeneralPurposeAllocator(.{}){}; | |
fn bench(comptime Config: type) !void { | |
var arena = std.heap.ArenaAllocator.init(gpa.allocator()); | |
defer arena.deinit(); | |
const allocator = arena.allocator(); | |
const num_cpus = try std.Thread.getCpuCount(); | |
const Shared = extern struct { | |
front_padding: [std.atomic.cache_line]u8 = undefined, | |
counter: Atomic(usize) = Atomic(usize).init(0), | |
back_padding: [std.atomic.cache_line]u8 = undefined, | |
}; | |
const Worker = struct { | |
shutdown: Atomic(bool) = Atomic(bool).init(false), | |
thread: std.Thread = undefined, | |
increments: usize = 0, | |
shared: *Shared, | |
fn run(worker: *@This(), seed: usize) void { | |
var local_inc: usize = 0; | |
defer worker.increments = local_inc; | |
var rng = std.rand.DefaultPrng.init(seed + 1); | |
var random = rng.random(); | |
const shared = worker.shared; | |
var current = shared.counter.load(.Monotonic); | |
while (!worker.shutdown.load(.Monotonic)) { | |
current = Config.inc(&shared.counter, current, &random); | |
local_inc += 1; | |
} | |
} | |
}; | |
const workers = try allocator.alloc(Worker, num_cpus); | |
defer allocator.free(workers); | |
var shared = Shared{}; | |
for (workers) |*w, i| { | |
w.* = .{ .shared = &shared }; | |
w.thread = try std.Thread.spawn(.{}, Worker.run, .{w, i}); | |
} | |
const measure_duration_ns = 1000 * std.time.ns_per_ms; | |
std.time.sleep(measure_duration_ns); | |
for (workers) |*w| w.shutdown.store(true, .Monotonic); | |
for (workers) |w| w.thread.join(); | |
var min: i128 = std.math.maxInt(i128); | |
var max: i128 = 0; | |
var sum: i128 = 0; | |
for (workers) |w| { | |
sum += w.increments; | |
min = std.math.min(min, w.increments); | |
max = std.math.max(max, w.increments); | |
} | |
const avg = @divFloor(sum, workers.len); | |
const throughput_ms = @divFloor(sum, measure_duration_ns / std.time.ns_per_ms); | |
var stdev: i128 = 0; | |
for (workers) |w| stdev += (w.increments - avg) * 2; | |
stdev = @divFloor(stdev, workers.len); | |
std.debug.print( | |
\\------------------------- | |
\\| {s} | |
\\------------------------- | |
\\| {s:>10} | {s:>8} | | |
\\| {d:>7}/ms | {d:>8} | | |
\\ | |
\\ | |
, .{ | |
Config.name, | |
"throughput", "stdev", | |
@intCast(u128, throughput_ms), | |
@intCast(u128, stdev), | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment