Last active
September 8, 2018 13:26
-
-
Save vassvik/ef3bbb13d89fe68c4b06f2ff94740102 to your computer and use it in GitHub Desktop.
Benchmarking acos implementation for input values around 1.0, comparing with libm's acos.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import "core:fmt.odin"; | |
import "core:math.odin"; | |
import "shared:odin-glfw/glfw.odin"; | |
foreign_system_library m "m"; | |
foreign m { | |
acos :: proc(x: f64) -> f64 #link_name "acos" ---; | |
} | |
/* | |
The following procedures are based on the series expansion of `acos(x)` at x = 1 | |
Only works well for input close to 1.0. The interval [0.99, 1.0] in particular works quite well. | |
*/ | |
A :: math.SQRT_TWO; | |
B :: 1.0/(6.0*A); | |
C :: 3.0/(80.0*A); | |
D :: 5.0/(448.0*A); | |
E :: 35.0/(9216.0*A); | |
F :: 63.0/(45056.0*A); | |
G :: 231.0/(425984.0*A); | |
acos_near_one_1 :: proc(x: f64) -> f64 #cc_c { | |
y1 := math.sqrt(1.0 - x); | |
return A*y1; | |
} | |
acos_near_one_3 :: proc(x: f64) -> f64 #cc_c { | |
y1 := math.sqrt(1.0 - x); | |
y2 := y1*y1; | |
return y1*(A + B*y2); | |
} | |
acos_near_one_5 :: proc(x: f64) -> f64 #cc_c { | |
y1 := math.sqrt(1.0 - x); | |
y2 := y1*y1; | |
return y1*(A + y2*(B + y2*C)); | |
} | |
acos_near_one_7 :: proc(x: f64) -> f64 #cc_c { | |
y1 := math.sqrt(1.0 - x); | |
y2 := y1*y1; | |
return y1*(A + y2*(B + y2*(C + y2*D))); | |
} | |
acos_near_one_9 :: proc(x: f64) -> f64 #cc_c { | |
y1 := math.sqrt(1.0 - x); | |
y2 := y1*y1; | |
return y1*(A + y2*(B + y2*(C + y2*(D + y2*E)))); | |
} | |
acos_near_one_11 :: proc(x: f64) -> f64 #cc_c { | |
y1 := math.sqrt(1.0 - x); | |
y2 := y1*y1; | |
return y1*(A + y2*(B + y2*(C + y2*(D + y2*(E + y2*F))))); | |
} | |
acos_near_one_13 :: proc(x: f64) -> f64 #cc_c { | |
y1 := math.sqrt(1.0 - x); | |
y2 := y1*y1; | |
return y1*(A + y2*(B + y2*(C + y2*(D + y2*(E + y2*(F + y2*G)))))); | |
} | |
main :: proc() { | |
glfw.Init(); | |
// round up all the functions to be tested | |
functions := [...](proc(f64) -> f64 #cc_c) { | |
acos, | |
acos_near_one_1, | |
acos_near_one_3, | |
acos_near_one_5, | |
acos_near_one_7, | |
acos_near_one_9, | |
acos_near_one_11, | |
acos_near_one_13 | |
}; | |
desc := [...]string { | |
"libm's acos", | |
"series expansion, 1 term", | |
"series expansion, 2 terms", | |
"series expansion, 3 terms", | |
"series expansion, 4 terms", | |
"series expansion, 5 terms", | |
"series expansion, 6 terms", | |
"series expansion, 7 terms", | |
}; | |
// Test values: acos(v) == c | |
vs := [...]f64 { | |
0.9, | |
0.99, | |
0.999, | |
0.9999, | |
1.0, | |
}; | |
cs := [...]f64 { | |
0.4510268117962624325446446357943518262034225132842500, // 25.84 degrees | |
0.1415394733244272187457893569750270149939829305397531, // 8.11 degrees | |
0.0447250871687334312496962326715510699041805567621581, // 2.563 degrees | |
0.0141422534775128775962402258176561057244030662289954, // 0.8103 degrees | |
0.0000000000000000000000000000000000000000000000000000, // 0 degrees... | |
}; | |
// wroom wroom | |
for j in 0..len(cs) { | |
v := vs[j]; | |
c := cs[j]; | |
fmt.printf("// acos(%.4f) == %f degrees\n", v, c*180.0/math.PI); | |
for func, k in functions { | |
t1 := glfw.GetTime(); | |
i := 0; | |
for { | |
b := func(v); | |
i += 1; | |
if i > 1e8 do break; // 100 million times | |
} | |
t2 := glfw.GetTime(); | |
fmt.printf("value (radians) = %.16f, error (radians) = %+.20f, time (milliseconds) = %f // %s\n", func(v), c - func(v), 1000*(t2-t1), desc[k]); | |
} | |
fmt.println(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// acos(0.9000) == 25.842 deg | |
value (rad) = 0.4510268117962624, error (rad) = +0.00000000000000016653, time (ms) = 1242.578 // libm's acos | |
value (rad) = 0.4472135954999579, error (rad) = +0.00381321629630460013, time (ms) = 236.264 // series expansion, 1 term | |
value (rad) = 0.4509403754624575, error (rad) = +0.00008643633380500670, time (ms) = 241.046 // series expansion, 2 terms | |
value (rad) = 0.4510242280116138, error (rad) = +0.00000258378464873532, time (ms) = 246.079 // series expansion, 3 terms | |
value (rad) = 0.4510267236231958, error (rad) = +0.00000008817306668130, time (ms) = 271.378 // series expansion, 4 terms | |
value (rad) = 0.4510268085433122, error (rad) = +0.00000000325295035353, time (ms) = 332.393 // series expansion, 5 terms | |
value (rad) = 0.4510268116699164, error (rad) = +0.00000000012634610025, time (ms) = 399.089 // series expansion, 6 terms | |
value (rad) = 0.4510268117911725, error (rad) = +0.00000000000508998399, time (ms) = 485.806 // series expansion, 7 terms | |
// acos(0.9900) == 8.110 deg | |
value (rad) = 0.1415394733244273, error (rad) = -0.00000000000000005551, time (ms) = 1787.917 // libm's acos | |
value (rad) = 0.1414213562373096, error (rad) = +0.00011811708711764735, time (ms) = 237.372 // series expansion, 1 term | |
value (rad) = 0.1415392073675073, error (rad) = +0.00000026595691990372, time (ms) = 238.003 // series expansion, 2 terms | |
value (rad) = 0.1415394725325503, error (rad) = +0.00000000079187695290, time (ms) = 246.510 // series expansion, 3 terms | |
value (rad) = 0.1415394733217319, error (rad) = +0.00000000000269528844, time (ms) = 271.047 // series expansion, 4 terms | |
value (rad) = 0.1415394733244174, error (rad) = +0.00000000000000988098, time (ms) = 332.767 // series expansion, 5 terms | |
value (rad) = 0.1415394733244273, error (rad) = -0.00000000000000002776, time (ms) = 398.757 // series expansion, 6 terms | |
value (rad) = 0.1415394733244273, error (rad) = -0.00000000000000005551, time (ms) = 482.430 // series expansion, 7 terms | |
// acos(0.9990) == 2.563 deg | |
value (rad) = 0.0447250871687335, error (rad) = -0.00000000000000002776, time (ms) = 11869.829 // libm's acos | |
value (rad) = 0.0447213595499958, error (rad) = +0.00000372761873761174, time (ms) = 236.650 // series expansion, 1 term | |
value (rad) = 0.0447250863299583, error (rad) = +0.00000000083877511187, time (ms) = 238.215 // series expansion, 2 terms | |
value (rad) = 0.0447250871684838, error (rad) = +0.00000000000024961977, time (ms) = 247.308 // series expansion, 3 terms | |
value (rad) = 0.0447250871687334, error (rad) = +0.00000000000000006245, time (ms) = 270.275 // series expansion, 4 terms | |
value (rad) = 0.0447250871687334, error (rad) = -0.00000000000000002082, time (ms) = 332.430 // series expansion, 5 terms | |
value (rad) = 0.0447250871687334, error (rad) = -0.00000000000000002082, time (ms) = 401.287 // series expansion, 6 terms | |
value (rad) = 0.0447250871687334, error (rad) = -0.00000000000000002082, time (ms) = 482.315 // series expansion, 7 terms | |
// acos(0.9999) == 0.810 deg | |
value (rad) = 0.0141422534775121, error (rad) = +0.00000000000000077889, time (ms) = 1788.514 // libm's acos | |
value (rad) = 0.0141421356237302, error (rad) = +0.00000011785378270512, time (ms) = 236.306 // series expansion, 1 term | |
value (rad) = 0.0141422534748604, error (rad) = +0.00000000000265250842, time (ms) = 237.829 // series expansion, 2 terms | |
value (rad) = 0.0141422534775120, error (rad) = +0.00000000000000085695, time (ms) = 246.659 // series expansion, 3 terms | |
value (rad) = 0.0141422534775121, error (rad) = +0.00000000000000077889, time (ms) = 272.118 // series expansion, 4 terms | |
value (rad) = 0.0141422534775121, error (rad) = +0.00000000000000077889, time (ms) = 332.560 // series expansion, 5 terms | |
value (rad) = 0.0141422534775121, error (rad) = +0.00000000000000077889, time (ms) = 403.273 // series expansion, 6 terms | |
value (rad) = 0.0141422534775121, error (rad) = +0.00000000000000077889, time (ms) = 488.513 // series expansion, 7 terms | |
// acos(1.0000) == 0.000 deg | |
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 668.809 // libm's acos | |
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 189.287 // series expansion, 1 term | |
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 189.331 // series expansion, 2 terms | |
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 188.990 // series expansion, 3 terms | |
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 225.059 // series expansion, 4 terms | |
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 279.655 // series expansion, 5 terms | |
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 338.520 // series expansion, 6 terms | |
value (rad) = 0.0000000000000000, error (rad) = +0.00000000000000000000, time (ms) = 412.603 // series expansion, 7 terms |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment