Last active
February 6, 2022 22:06
benchmarks addo including the suggested layout change
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
All together: | |
[user@pc tryzig]$ hyperfine ./addo_* | |
Benchmark 1: ./addo_fast | |
Time (mean ± σ): 14.682 s ± 0.337 s [User: 14.640 s, System: 0.006 s] | |
Range (min … max): 14.315 s … 15.149 s 10 runs | |
Benchmark 2: ./addo_noptr | |
Time (mean ± σ): 13.789 s ± 0.290 s [User: 13.755 s, System: 0.001 s] | |
Range (min … max): 13.299 s … 14.120 s 10 runs | |
Benchmark 3: ./addo_simple | |
Time (mean ± σ): 15.858 s ± 0.352 s [User: 15.840 s, System: 0.001 s] | |
Range (min … max): 15.323 s … 16.190 s 10 runs | |
Summary | |
'./addo_noptr' ran | |
1.06 ± 0.03 times faster than './addo_fast' | |
1.15 ± 0.04 times faster than './addo_simple' | |
Only addo_fast and addo_simple: | |
[user@pc tryzig]$ hyperfine ./addo_* | |
Benchmark 1: ./addo_fast | |
Time (mean ± σ): 13.945 s ± 0.308 s [User: 13.907 s, System: 0.002 s] | |
Range (min … max): 13.537 s … 14.400 s 10 runs | |
Benchmark 2: ./addo_simple | |
Time (mean ± σ): 15.281 s ± 0.413 s [User: 15.241 s, System: 0.001 s] | |
Range (min … max): 14.730 s … 15.745 s 10 runs | |
Summary | |
'./addo_fast' ran | |
1.10 ± 0.04 times faster than './addo_simple' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const builtin = @import("builtin"); | |
const math = std.math; | |
inline fn addoXi4_generic(comptime ST: type, a: ST, b: ST, overflow: u8) ST { | |
@setRuntimeSafety(builtin.is_test); | |
overflow.* = 0; | |
var sum: ST = a +% b; | |
// Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract | |
// Let sum = a +% b == a + b + carry == wraparound addition. | |
// Overflow in a+b+carry occurs, iff a and b have opposite signs | |
// and the sign of a+b+carry is the same as a (or equivalently b). | |
// Slower routine: res = ~(a ^ b) & ((sum ^ a) | |
// Faster routine: res = (sum ^ a) & (sum ^ b) | |
// Oerflow occured, iff (res < 0) | |
if (((sum ^ a) & (sum ^ b)) < 0) | |
overflow.* = 1; | |
return sum; | |
} | |
pub fn __addoti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 { | |
return addoXi4_generic(i128, a, b, overflow); | |
} | |
fn simple_addosi4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 { | |
overflow.* = 0; | |
const min: i128 = math.minInt(i128); | |
const max: i128 = math.maxInt(i128); | |
if (((a > 0) and (b > max - a)) or | |
((a < 0) and (b < min - a))) | |
overflow.* = 1; | |
return a +% b; | |
} | |
const Res = struct { | |
sum: i128, | |
overflow: u8, | |
}; | |
fn addoti4(a: i128, b: i128) Res { | |
@setRuntimeSafety(builtin.is_test); | |
var res = Res{ | |
.sum = undefined, | |
.overflow = 0, | |
}; | |
res.overflow = 0; | |
res.sum = a +% b; | |
// Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract | |
// Let sum = a +% b == a + b + carry == wraparound addition. | |
// Overflow in a+b+carry occurs, iff a and b have opposite signs | |
// and the sign of a+b+carry is the same as a (or equivalently b). | |
// Slower routine: res = ~(a ^ b) & ((sum ^ a) | |
// Faster routine: res = (sum ^ a) & (sum ^ b) | |
// Oerflow occured, iff (res < 0) | |
if (((res.sum ^ a) & (res.sum ^ b)) < 0) | |
res.overflow = 1; | |
return res; | |
} | |
//pub fn main() !void { | |
// var x: i128 = 0; | |
// var y: i128 = 0; | |
// var ov: c_int = 0; | |
// var res: i128 = 0; | |
// var sum: i128 = 0; | |
// var sum2: i128 = 0; | |
// const stdout = std.io.getStdOut(); | |
// | |
// stdout.writeAll("starting\n") catch unreachable; | |
// //while (x < 50_000_000) { | |
// while (x < 1_000_000_000) { | |
// //res = simple_addosi4(x, y, &ov); | |
// res = __addoti4(x, y, &ov); | |
// x += 1; | |
// y += 1; | |
// sum += res; | |
// if (sum > 1_000_000) { | |
// sum2 += 1; | |
// sum = 0; | |
// } | |
// //std.debug.assert(ov != 1); | |
// } | |
// if (ov == 1) stdout.writeAll("error: overflow happened\n") catch unreachable; | |
// //std.debug.print("sum2: {d}\n", .{sum2}); | |
// if (sum2 > 0) stdout.writeAll("finished\n") catch unreachable; | |
// std.process.exit(0); | |
//} | |
pub fn main() !void { | |
var x: i128 = 0; | |
var y: i128 = 0; | |
var res = Res{ | |
.sum = 0, | |
.overflow = 0, | |
}; | |
var sum: i128 = 0; | |
var sum2: i128 = 0; | |
const stdout = std.io.getStdOut(); | |
stdout.writeAll("starting\n") catch unreachable; | |
//while (x < 50_000_000) { | |
while (x < 1_000_000_000) { | |
//res = simple_addosi4(x, y, &ov); | |
res = addoti4(x, y); | |
x += 1; | |
y += 1; | |
sum += res.sum; | |
if (sum > 1_000_000) { | |
sum2 += 1; | |
sum = 0; | |
} | |
//std.debug.assert(ov != 1); | |
} | |
if (res.overflow == 1) stdout.writeAll("error: overflow happened\n") catch unreachable; | |
//std.debug.print("sum2: {d}\n", .{sum2}); | |
if (sum2 > 0) stdout.writeAll("finished\n") catch unreachable; | |
std.process.exit(0); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment