-
-
Save volkansalma/2972237 to your computer and use it in GitHub Desktop.
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
float atan2_approximation1(float y, float x); | |
float atan2_approximation2(float y, float x); | |
int main() | |
{ | |
float x = 1; | |
float y = 0; | |
for( y = 0; y < 2*M_PI; y+= 0.1 ) | |
{ | |
for(x = 0; x < 2*M_PI; x+= 0.1) | |
{ | |
printf("atan2 for %f,%f: %f \n", y, x, atan2(y, x)); | |
printf("approx1 for %f,%f: %f \n", y, x, atan2_approximation1(y, x)); | |
printf("approx2 for %f,%f: %f \n \n", y, x, atan2_approximation2(y, x)); | |
getch(); | |
} | |
} | |
return 0; | |
} | |
float atan2_approximation1(float y, float x) | |
{ | |
//http://pubs.opengroup.org/onlinepubs/009695399/functions/atan2.html | |
//Volkan SALMA | |
const float ONEQTR_PI = M_PI / 4.0; | |
const float THRQTR_PI = 3.0 * M_PI / 4.0; | |
float r, angle; | |
float abs_y = fabs(y) + 1e-10f; // kludge to prevent 0/0 condition | |
if ( x < 0.0f ) | |
{ | |
r = (x + abs_y) / (abs_y - x); | |
angle = THRQTR_PI; | |
} | |
else | |
{ | |
r = (x - abs_y) / (x + abs_y); | |
angle = ONEQTR_PI; | |
} | |
angle += (0.1963f * r * r - 0.9817f) * r; | |
if ( y < 0.0f ) | |
return( -angle ); // negate if in quad III or IV | |
else | |
return( angle ); | |
} | |
#define PI_FLOAT 3.14159265f | |
#define PIBY2_FLOAT 1.5707963f | |
// |error| < 0.005 | |
float atan2_approximation2( float y, float x ) | |
{ | |
if ( x == 0.0f ) | |
{ | |
if ( y > 0.0f ) return PIBY2_FLOAT; | |
if ( y == 0.0f ) return 0.0f; | |
return -PIBY2_FLOAT; | |
} | |
float atan; | |
float z = y/x; | |
if ( fabs( z ) < 1.0f ) | |
{ | |
atan = z/(1.0f + 0.28f*z*z); | |
if ( x < 0.0f ) | |
{ | |
if ( y < 0.0f ) return atan - PI_FLOAT; | |
return atan + PI_FLOAT; | |
} | |
} | |
else | |
{ | |
atan = PIBY2_FLOAT - z/(z*z + 0.28f); | |
if ( y < 0.0f ) return atan - PI_FLOAT; | |
} | |
return atan; | |
} |
In my (somewhat limited but concise) testing on both a very fast Gen 7 Xeon and an STM32F4 (w/FPU) ARM micro show atan2_approximation1
to be faster (and much more accurate) than the 2nd version.
On STM32F4 a1
is ~2.3 times faster than stdlib (with gcc-arm 4.7.4)., while a2
is ~2.1 times faster than stdlib.
On the Xeon under Windows with MSVC a1
shows maybe a very slight improvement in speed over std (within margin of error, really), while with MinGW-w64 on same system the std version is a whopping 12 times slower!
Thanks for this snip, very handy!
For anyone looking at this in the future, this function was first referenced on an Apple mailing list in 2005 that's not in their current archives (but it is on the wayback machine!). It was a derivative of a similar function form 1999 which can be found here. That function was in the public domain
if anyone is concerned about licensing.
if ( x == 0.0f ) // line 62
if ( y == 0.0f ) return 0.0f; // line 65
Do these lines ever get executed (in cases where 0.0f is not explicitly used as the argument, but rather is a result of some calculation that evaluates "sufficiently close" to 0.0f)?
approximation1 can be made branchless using std::copysign and std::fabs - which boil down to simple bitwise logic.
float atan2_approx(float y, float x) {
float abs_y = std::fabs(y) + 1e-10f; // kludge to prevent 0/0 condition
float r = (x - std::copysign(abs_y, x)) / (abs_y + std::fabs(x));
float angle = M_PI/2.f - std::copysign(M_PI/4.f, x);
angle += (0.1963f * r * r - 0.9817f) * r;
return std::copysign(angle, y);
}
I was pointing out the fact they used a compare operation to 0.0 float. That will be true very seldom (to put it mildly) and - according to my knowledge - shall be avoided. Let me think about your code snippet above (interesting kludge of 1e-10f ;)
the kludge line comes from approximation1, you were pointing out the comparisons in approximation2. Sorry, I didn't mean to answer your question, but wanted to comment on the original code because it inspired me to get to a branchless version.
The beauty of a branchless atan2 is that it can easily be performed using SIMD instructions.
very nice, indeed.
Can I use and publish this under Apache 2 license with proper attribution? It would be for graphhopper