Skip to content

Instantly share code, notes, and snippets.

@vassvik
Last active September 8, 2018 13:26
Show Gist options
  • Save vassvik/ef3bbb13d89fe68c4b06f2ff94740102 to your computer and use it in GitHub Desktop.
Save vassvik/ef3bbb13d89fe68c4b06f2ff94740102 to your computer and use it in GitHub Desktop.
Benchmarking acos implementation for input values around 1.0, comparing with libm's acos.
import "core:fmt.odin";
import "core:math.odin";
import "shared:odin-glfw/glfw.odin";
foreign_system_library m "m";
foreign m {
acos :: proc(x: f64) -> f64 #link_name "acos" ---;
}
/*
The following procedures are based on the series expansion of `acos(x)` at x = 1
Only works well for input close to 1.0. The interval [0.99, 1.0] in particular works quite well.
*/
A :: math.SQRT_TWO;
B :: 1.0/(6.0*A);
C :: 3.0/(80.0*A);
D :: 5.0/(448.0*A);
E :: 35.0/(9216.0*A);
F :: 63.0/(45056.0*A);
G :: 231.0/(425984.0*A);
acos_near_one_1 :: proc(x: f64) -> f64 #cc_c {
y1 := math.sqrt(1.0 - x);
return A*y1;
}
acos_near_one_3 :: proc(x: f64) -> f64 #cc_c {
y1 := math.sqrt(1.0 - x);
y2 := y1*y1;
return y1*(A + B*y2);
}
acos_near_one_5 :: proc(x: f64) -> f64 #cc_c {
y1 := math.sqrt(1.0 - x);
y2 := y1*y1;
return y1*(A + y2*(B + y2*C));
}
acos_near_one_7 :: proc(x: f64) -> f64 #cc_c {
y1 := math.sqrt(1.0 - x);
y2 := y1*y1;
return y1*(A + y2*(B + y2*(C + y2*D)));
}
acos_near_one_9 :: proc(x: f64) -> f64 #cc_c {
y1 := math.sqrt(1.0 - x);
y2 := y1*y1;
return y1*(A + y2*(B + y2*(C + y2*(D + y2*E))));
}
acos_near_one_11 :: proc(x: f64) -> f64 #cc_c {
y1 := math.sqrt(1.0 - x);
y2 := y1*y1;
return y1*(A + y2*(B + y2*(C + y2*(D + y2*(E + y2*F)))));
}
acos_near_one_13 :: proc(x: f64) -> f64 #cc_c {
y1 := math.sqrt(1.0 - x);
y2 := y1*y1;
return y1*(A + y2*(B + y2*(C + y2*(D + y2*(E + y2*(F + y2*G))))));
}
main :: proc() {
glfw.Init();
// round up all the functions to be tested
functions := [...](proc(f64) -> f64 #cc_c) {
acos,
acos_near_one_1,
acos_near_one_3,
acos_near_one_5,
acos_near_one_7,
acos_near_one_9,
acos_near_one_11,
acos_near_one_13
};
desc := [...]string {
"libm's acos",
"series expansion, 1 term",
"series expansion, 2 terms",
"series expansion, 3 terms",
"series expansion, 4 terms",
"series expansion, 5 terms",
"series expansion, 6 terms",
"series expansion, 7 terms",
};
// Test values: acos(v) == c
vs := [...]f64 {
0.9,
0.99,
0.999,
0.9999,
1.0,
};
cs := [...]f64 {
0.4510268117962624325446446357943518262034225132842500, // 25.84 degrees
0.1415394733244272187457893569750270149939829305397531, // 8.11 degrees
0.0447250871687334312496962326715510699041805567621581, // 2.563 degrees
0.0141422534775128775962402258176561057244030662289954, // 0.8103 degrees
0.0000000000000000000000000000000000000000000000000000, // 0 degrees...
};
// wroom wroom
for j in 0..len(cs) {
v := vs[j];
c := cs[j];
fmt.printf("// acos(%.4f) == %f degrees\n", v, c*180.0/math.PI);
for func, k in functions {
t1 := glfw.GetTime();
i := 0;
for {
b := func(v);
i += 1;
if i > 1e8 do break; // 100 million times
}
t2 := glfw.GetTime();
fmt.printf("value (radians) = %.16f, error (radians) = %+.20f, time (milliseconds) = %f // %s\n", func(v), c - func(v), 1000*(t2-t1), desc[k]);
}
fmt.println();
}
}
// acos(0.9000) == 25.842 deg
value (rad) = 0.4510268117962624, error (rad) = +0.00000000000000016653, time (ms) = 1242.578 // libm's acos
value (rad) = 0.4472135954999579, error (rad) = +0.00381321629630460013, time (ms) = 236.264 // series expansion, 1 term
value (rad) = 0.4509403754624575, error (rad) = +0.00008643633380500670, time (ms) = 241.046 // series expansion, 2 terms
value (rad) = 0.4510242280116138, error (rad) = +0.00000258378464873532, time (ms) = 246.079 // series expansion, 3 terms
value (rad) = 0.4510267236231958, error (rad) = +0.00000008817306668130, time (ms) = 271.378 // series expansion, 4 terms
value (rad) = 0.4510268085433122, error (rad) = +0.00000000325295035353, time (ms) = 332.393 // series expansion, 5 terms
value (rad) = 0.4510268116699164, error (rad) = +0.00000000012634610025, time (ms) = 399.089 // series expansion, 6 terms
value (rad) = 0.4510268117911725, error (rad) = +0.00000000000508998399, time (ms) = 485.806 // series expansion, 7 terms
// acos(0.9900) == 8.110 deg
value (rad) = 0.1415394733244273, error (rad) = -0.00000000000000005551, time (ms) = 1787.917 // libm's acos
value (rad) = 0.1414213562373096, error (rad) = +0.00011811708711764735, time (ms) = 237.372 // series expansion, 1 term
value (rad) = 0.1415392073675073, error (rad) = +0.00000026595691990372, time (ms) = 238.003 // series expansion, 2 terms
value (rad) = 0.1415394725325503, error (rad) = +0.00000000079187695290, time (ms) = 246.510 // series expansion, 3 terms
value (rad) = 0.1415394733217319, error (rad) = +0.00000000000269528844, time (ms) = 271.047 // series expansion, 4 terms
value (rad) = 0.1415394733244174, error (rad) = +0.00000000000000988098, time (ms) = 332.767 // series expansion, 5 terms
value (rad) = 0.1415394733244273, error (rad) = -0.00000000000000002776, time (ms) = 398.757 // series expansion, 6 terms
value (rad) = 0.1415394733244273, error (rad) = -0.00000000000000005551, time (ms) = 482.430 // series expansion, 7 terms
// acos(0.9990) == 2.563 deg
value (rad) = 0.0447250871687335, error (rad) = -0.00000000000000002776, time (ms) = 11869.829 // libm's acos
value (rad) = 0.0447213595499958, error (rad) = +0.00000372761873761174, time (ms) = 236.650 // series expansion, 1 term
value (rad) = 0.0447250863299583, error (rad) = +0.00000000083877511187, time (ms) = 238.215 // series expansion, 2 terms
value (rad) = 0.0447250871684838, error (rad) = +0.00000000000024961977, time (ms) = 247.308 // series expansion, 3 terms
value (rad) = 0.0447250871687334, error (rad) = +0.00000000000000006245, time (ms) = 270.275 // series expansion, 4 terms
value (rad) = 0.0447250871687334, error (rad) = -0.00000000000000002082, time (ms) = 332.430 // series expansion, 5 terms
value (rad) = 0.0447250871687334, error (rad) = -0.00000000000000002082, time (ms) = 401.287 // series expansion, 6 terms
value (rad) = 0.0447250871687334, error (rad) = -0.00000000000000002082, time (ms) = 482.315 // series expansion, 7 terms
// acos(0.9999) == 0.810 deg
value (rad) = 0.0141422534775121, error (rad) = +0.00000000000000077889, time (ms) = 1788.514 // libm's acos
value (rad) = 0.0141421356237302, error (rad) = +0.00000011785378270512, time (ms) = 236.306 // series expansion, 1 term
value (rad) = 0.0141422534748604, error (rad) = +0.00000000000265250842, time (ms) = 237.829 // series expansion, 2 terms
value (rad) = 0.0141422534775120, error (rad) = +0.00000000000000085695, time (ms) = 246.659 // series expansion, 3 terms
value (rad) = 0.0141422534775121, error (rad) = +0.00000000000000077889, time (ms) = 272.118 // series expansion, 4 terms
value (rad) = 0.0141422534775121, error (rad) = +0.00000000000000077889, time (ms) = 332.560 // series expansion, 5 terms
value (rad) = 0.0141422534775121, error (rad) = +0.00000000000000077889, time (ms) = 403.273 // series expansion, 6 terms
value (rad) = 0.0141422534775121, error (rad) = +0.00000000000000077889, time (ms) = 488.513 // series expansion, 7 terms
// acos(1.0000) == 0.000 deg
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 668.809 // libm's acos
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 189.287 // series expansion, 1 term
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 189.331 // series expansion, 2 terms
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 188.990 // series expansion, 3 terms
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 225.059 // series expansion, 4 terms
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 279.655 // series expansion, 5 terms
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 338.520 // series expansion, 6 terms
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 412.603 // series expansion, 7 terms
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment