Last active
February 6, 2022 22:06
Revisions
-
matu3ba revised this gist
Feb 6, 2022 . 1 changed file with 15 additions and 0 deletions.There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,4 @@ All together: [user@pc tryzig]$ hyperfine ./addo_* Benchmark 1: ./addo_fast Time (mean ± σ): 14.682 s ± 0.337 s [User: 14.640 s, System: 0.006 s] @@ -15,3 +16,17 @@ Summary './addo_noptr' ran 1.06 ± 0.03 times faster than './addo_fast' 1.15 ± 0.04 times faster than './addo_simple' Only addo_fast and addo_simple: [user@pc tryzig]$ hyperfine ./addo_* Benchmark 1: ./addo_fast Time (mean ± σ): 13.945 s ± 0.308 s [User: 13.907 s, System: 0.002 s] Range (min … max): 13.537 s … 14.400 s 10 runs Benchmark 2: ./addo_simple Time (mean ± σ): 15.281 s ± 0.413 s [User: 15.241 s, System: 0.001 s] Range (min … max): 14.730 s … 15.745 s 10 runs Summary './addo_fast' ran 1.10 ± 0.04 times faster than './addo_simple' -
matu3ba created this gist
Feb 6, 2022 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,17 @@ [user@pc tryzig]$ hyperfine ./addo_* Benchmark 1: ./addo_fast Time (mean ± σ): 14.682 s ± 0.337 s [User: 14.640 s, System: 0.006 s] Range (min … max): 14.315 s … 15.149 s 10 runs Benchmark 2: ./addo_noptr Time (mean ± σ): 13.789 s ± 0.290 s [User: 13.755 s, System: 0.001 s] Range (min … max): 13.299 s … 14.120 s 10 runs Benchmark 3: ./addo_simple Time (mean ± σ): 15.858 s ± 0.352 s [User: 15.840 s, System: 0.001 s] Range (min … max): 15.323 s … 16.190 s 10 runs Summary './addo_noptr' ran 1.06 ± 0.03 times faster than './addo_fast' 1.15 ± 0.04 times faster than './addo_simple' This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,120 @@ const std = @import("std"); const builtin = @import("builtin"); const math = std.math; inline fn addoXi4_generic(comptime ST: type, a: ST, b: ST, overflow: u8) ST { @setRuntimeSafety(builtin.is_test); overflow.* = 0; var sum: ST = a +% b; // Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract // Let sum = a +% b == a + b + carry == wraparound addition. // Overflow in a+b+carry occurs, iff a and b have opposite signs // and the sign of a+b+carry is the same as a (or equivalently b). // Slower routine: res = ~(a ^ b) & ((sum ^ a) // Faster routine: res = (sum ^ a) & (sum ^ b) // Oerflow occured, iff (res < 0) if (((sum ^ a) & (sum ^ b)) < 0) overflow.* = 1; return sum; } pub fn __addoti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 { return addoXi4_generic(i128, a, b, overflow); } fn simple_addosi4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 { overflow.* = 0; const min: i128 = math.minInt(i128); const max: i128 = math.maxInt(i128); if (((a > 0) and (b > max - a)) or ((a < 0) and (b < min - a))) overflow.* = 1; return a +% b; } const Res = struct { sum: i128, overflow: u8, }; fn addoti4(a: i128, b: i128) Res { @setRuntimeSafety(builtin.is_test); var res = Res{ .sum = undefined, .overflow = 0, }; res.overflow = 0; res.sum = a +% b; // Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract // Let sum = a +% b == a + b + carry == wraparound addition. // Overflow in a+b+carry occurs, iff a and b have opposite signs // and the sign of a+b+carry is the same as a (or equivalently b). // Slower routine: res = ~(a ^ b) & ((sum ^ a) // Faster routine: res = (sum ^ a) & (sum ^ b) // Oerflow occured, iff (res < 0) if (((res.sum ^ a) & (res.sum ^ b)) < 0) res.overflow = 1; return res; } //pub fn main() !void { // var x: i128 = 0; // var y: i128 = 0; // var ov: c_int = 0; // var res: i128 = 0; // var sum: i128 = 0; // var sum2: i128 = 0; // const stdout = std.io.getStdOut(); // // stdout.writeAll("starting\n") catch unreachable; // //while (x < 50_000_000) { // while (x < 1_000_000_000) { // //res = simple_addosi4(x, y, &ov); // res = __addoti4(x, y, &ov); // x += 1; // y += 1; // sum += res; // if (sum > 1_000_000) { // sum2 += 1; // sum = 0; // } // //std.debug.assert(ov != 1); // } // if (ov == 1) stdout.writeAll("error: overflow happened\n") catch unreachable; // //std.debug.print("sum2: {d}\n", .{sum2}); // if (sum2 > 0) stdout.writeAll("finished\n") catch unreachable; // std.process.exit(0); //} pub fn main() !void { var x: i128 = 0; var y: i128 = 0; var res = Res{ .sum = 0, .overflow = 0, }; var sum: i128 = 0; var sum2: i128 = 0; const stdout = std.io.getStdOut(); stdout.writeAll("starting\n") catch unreachable; //while (x < 50_000_000) { while (x < 1_000_000_000) { //res = simple_addosi4(x, y, &ov); res = addoti4(x, y); x += 1; y += 1; sum += res.sum; if (sum > 1_000_000) { sum2 += 1; sum = 0; } //std.debug.assert(ov != 1); } if (res.overflow == 1) stdout.writeAll("error: overflow happened\n") catch unreachable; //std.debug.print("sum2: {d}\n", .{sum2}); if (sum2 > 0) stdout.writeAll("finished\n") catch unreachable; std.process.exit(0); }