Skip to content

Instantly share code, notes, and snippets.

@WilliamBundy
Created May 20, 2018 05:31
Show Gist options
  • Save WilliamBundy/91516738b79f825f0e0c36b889d6e519 to your computer and use it in GitHub Desktop.
Save WilliamBundy/91516738b79f825f0e0c36b889d6e519 to your computer and use it in GitHub Desktop.
typedef __m128 vf128;
WBTM_API vf128 wb_atan_ps(vf128 xx)
{
vf128 mask, mask2, y = ppf(0);
vf128 one = ppf(1.0f);
vf128 signs = _mm_and_ps(xx, pfi(0x80000000));
vf128 x = _mm_and_ps(xx, pfi(~0x80000000));
{
vf128 tx1, tx2;
mask = _mm_cmpgt_ps(x, ppf(2.414213562373095f));
tx1 = _mm_div_ps(ppf(-1.0f), x);
mask2 = _mm_cmpgt_ps(x, ppf(0.4142135623730950f));
tx2 = _mm_div_ps(
_mm_sub_ps(x, one),
_mm_add_ps(x, one));
x = _mm_or_ps(_mm_and_ps(tx1, mask), _mm_andnot_ps(mask,
_mm_or_ps(
_mm_and_ps(tx2, mask2),
_mm_andnot_ps(mask2, x))));
y = _mm_or_ps(_mm_and_ps(_mm_set1_ps(WB_PI/2), mask), _mm_andnot_ps(mask,
_mm_or_ps(
_mm_and_ps(_mm_set1_ps(WB_PI/4), mask2),
_mm_andnot_ps(mask2, y))));
}
vf128 z = _mm_mul_ps(x, x);
vf128 u = _mm_sub_ps(_mm_mul_ps(ppf(8.05374449538e-2), z),
ppf(1.38776856032E-1));
u = _mm_add_ps(_mm_mul_ps(u, z), ppf(1.99777106478E-1));
u = _mm_sub_ps(_mm_mul_ps(u, z), ppf(3.33329491539E-1));
u = _mm_add_ps(_mm_mul_ps(_mm_mul_ps(u, z), x), x);
y = _mm_add_ps(y, u);
return _mm_mul_ps(y, _mm_or_ps(signs, one));
}
WBTM_API f32 wb_atanf(f32 x)
{
vf128 v = _mm_set_ss(x);
vf128 w = wb_atan_ps(v);
f32 q = _mm_cvtss_f32(w);
return q;
}
WBTM_API f32 wb_atan2f(f32 y, f32 x)
{
int code = 0;
if(x < 0) {
code = 2;
}
if(y < 0) {
code |= 1;
}
if(x == 0) {
if(code & 1) {
return -WB_PI / 2;
}
if(y == 0) {
return 0;
}
return WB_PI / 2;
}
if(y == 0) {
if(code & 2) {
return WB_PI;
}
return 0;
}
f32 w = 0;
if(x < 0) {
if(y < 0) {
w = -WB_PI;
} else {
w = WB_PI;
}
}
return w + wb_atanf(y/x);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment