Skip to content

Instantly share code, notes, and snippets.

@matu3ba
Created February 1, 2023 21:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matu3ba/3c9cbefc8907d45543dffd40133791d4 to your computer and use it in GitHub Desktop.
Save matu3ba/3c9cbefc8907d45543dffd40133791d4 to your computer and use it in GitHub Desktop.
Using c struct instead of return and writing overflow via pointer bears the same performance.
128bit
(ins)[misterspoon@pc tryzig]$ hyperfine ./addo_crt ./addo_cstruct
Benchmark 1: ./addo_crt
Time (mean ± σ): 1.083 s ± 0.006 s [User: 1.082 s, System: 0.001 s]
Range (min … max): 1.072 s … 1.094 s 10 runs
Benchmark 2: ./addo_cstruct
Time (mean ± σ): 1.077 s ± 0.003 s [User: 1.075 s, System: 0.001 s]
Range (min … max): 1.073 s … 1.082 s 10 runs
Summary
'./addo_cstruct' ran
1.01 ± 0.01 times faster than './addo_crt'
64bit
(ins)[misterspoon@pc tryzig]$ hyperfine --warmup 5 ./addo64_crt ./addo64_cstruct
Benchmark 1: ./addo64_crt
Time (mean ± σ): 701.9 ms ± 4.9 ms [User: 700.3 ms, System: 0.6 ms]
Range (min … max): 696.9 ms … 710.8 ms 10 runs
Benchmark 2: ./addo64_cstruct
Time (mean ± σ): 707.8 ms ± 4.3 ms [User: 705.8 ms, System: 1.1 ms]
Range (min … max): 701.1 ms … 715.5 ms 10 runs
Summary
'./addo64_crt' ran
1.01 ± 0.01 times faster than './addo64_cstruct'
const std = @import("std");
const builtin = @import("builtin");
const math = std.math;
inline fn addoXi4_generic(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
@setRuntimeSafety(builtin.is_test);
overflow.* = 0;
var sum: ST = a +% b;
// Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract
// Let sum = a +% b == a + b + carry == wraparound addition.
// Overflow in a+b+carry occurs, iff a and b have opposite signs
// and the sign of a+b+carry is the same as a (or equivalently b).
// Slower routine: res = ~(a ^ b) & ((sum ^ a)
// Faster routine: res = (sum ^ a) & (sum ^ b)
// Oerflow occured, iff (res < 0)
if (((sum ^ a) & (sum ^ b)) < 0)
overflow.* = 1;
return sum;
}
fn AddoXi5T(comptime ST: type) type {
return extern struct {
result: ST,
overflow: u8,
};
}
inline fn addoXi5_generic(comptime ST: type, a: ST, b: ST) AddoXi5T(ST) {
@setRuntimeSafety(builtin.is_test);
var res: AddoXi5T(ST) = .{
.result = a +% b,
.overflow = 0,
};
// Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract
// Let sum = a +% b == a + b + carry == wraparound addition.
// Overflow in a+b+carry occurs, iff a and b have opposite signs
// and the sign of a+b+carry is the same as a (or equivalently b).
// Slower routine: res = ~(a ^ b) & ((sum ^ a)
// Faster routine: res = (sum ^ a) & (sum ^ b)
// Oerflow occured, iff (res < 0)
if (((res.result ^ a) & (res.result ^ b)) < 0)
res.overflow = 1;
return res;
}
pub fn __addoti5(a: i128, b: i128) callconv(.C) AddoXi5T(i128) {
return addoXi5_generic(i128, a, b);
}
pub fn __addodi5(a: i64, b: i64) callconv(.C) AddoXi5T(i64) {
return addoXi5_generic(i64, a, b);
}
pub fn __addoti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 {
return addoXi4_generic(i128, a, b, overflow);
}
pub fn __addodi4(a: i64, b: i64, overflow: *c_int) callconv(.C) i64 {
return addoXi4_generic(i64, a, b, overflow);
}
fn simple_addosi4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 {
overflow.* = 0;
const min: i128 = math.minInt(i128);
const max: i128 = math.maxInt(i128);
if (((a > 0) and (b > max - a)) or
((a < 0) and (b < min - a)))
overflow.* = 1;
return a +% b;
}
const Res = struct {
sum: i128,
overflow: u8,
};
fn addoti4(a: i128, b: i128) Res {
@setRuntimeSafety(builtin.is_test);
var res = Res{
.sum = undefined,
.overflow = 0,
};
res.overflow = 0;
res.sum = a +% b;
// Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract
// Let sum = a +% b == a + b + carry == wraparound addition.
// Overflow in a+b+carry occurs, iff a and b have opposite signs
// and the sign of a+b+carry is the same as a (or equivalently b).
// Slower routine: res = ~(a ^ b) & ((sum ^ a)
// Faster routine: res = (sum ^ a) & (sum ^ b)
// Oerflow occured, iff (res < 0)
if (((res.sum ^ a) & (res.sum ^ b)) < 0)
res.overflow = 1;
return res;
}
// pub fn main() !void {
// var x: i64 = 0;
// var y: i64 = 0;
// var ov: c_int = 0;
// var res: i64 = 0;
// var sum: i64 = 0;
// var sum2: i64 = 0;
// const stdout = std.io.getStdOut();
//
// stdout.writeAll("starting\n") catch unreachable;
// //while (x < 50_000_000) {
// while (x < 1_000_000_000) {
// //res = simple_addosi4(x, y, &ov);
// res = __addodi4(x, y, &ov);
// x += 1;
// y += 1;
// sum += res;
// if (sum > 1_000_000) {
// sum2 += 1;
// sum = 0;
// }
// //std.debug.assert(ov != 1);
// }
// if (ov == 1) stdout.writeAll("error: overflow happened\n") catch unreachable;
// //std.debug.print("sum2: {d}\n", .{sum2});
// if (sum2 > 0) stdout.writeAll("finished\n") catch unreachable;
// std.process.exit(0);
// }
pub fn main() !void {
var x: i64 = 0;
var y: i64 = 0;
// var res = Res{
// .sum = 0,
// .overflow = 0,
// };
var res: AddoXi5T(i64) = .{
.result = 0,
.overflow = 0,
};
var sum: i64 = 0;
var sum2: i64 = 0;
const stdout = std.io.getStdOut();
stdout.writeAll("starting\n") catch unreachable;
//while (x < 50_000_000) {
while (x < 1_000_000_000) {
//res = simple_addosi4(x, y, &ov);
// res = addoti4(x, y);
res = __addodi5(x, y);
x += 1;
y += 1;
sum += res.result;
if (sum > 1_000_000) {
sum2 += 1;
sum = 0;
}
//std.debug.assert(ov != 1);
}
if (res.overflow == 1) stdout.writeAll("error: overflow happened\n") catch unreachable;
//std.debug.print("sum2: {d}\n", .{sum2});
if (sum2 > 0) stdout.writeAll("finished\n") catch unreachable;
std.process.exit(0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment