Created
October 10, 2022 22:34
-
-
Save Marc-B-Reynolds/42694d290786abb38d955d76d3478a4f to your computer and use it in GitHub Desktop.
hacky tanpi testbed based on sinpi/cospi
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Public Domain under http://unlicense.org, see link for details. | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <math.h> | |
#include <string.h> | |
#include "xmmintrin.h" | |
#include "wmmintrin.h" | |
#include "x86intrin.h" | |
#define RANDOMIZE | |
// number of trials | |
#define TLEN UINT64_C(0x000000000fffffff) | |
// gen 'x' on +/-RANGE | |
#define RANGE 1.f | |
// use | |
#define SHORTER | |
// external code: xoroshiro128+ | |
inline uint32_t f32_to_bits(float x) | |
{ | |
uint32_t u; memcpy(&u,&x,4); return u; | |
} | |
inline float f32_from_bits(uint32_t x) | |
{ | |
float u; memcpy(&u,&x,4); return u; | |
} | |
inline uint64_t f64_to_bits(double x) | |
{ | |
uint64_t u; memcpy(&u,&x,8); return u; | |
} | |
inline double f64_from_bits(uint64_t x) | |
{ | |
double d; memcpy(&d,&x,8); return d; | |
} | |
// | |
inline uint64_t u64_abs(uint64_t x) | |
{ | |
return (int64_t)x >= 0 ? x : -x; | |
} | |
inline uint32_t u32_abs(uint32_t x) | |
{ | |
return (int32_t)x >= 0 ? x : -x; | |
} | |
// ulp distance provided a & b are finite | |
// and have the same sign | |
inline uint64_t f64_ulp_dist_ss(double a, double b) | |
{ | |
uint64_t ua = f64_to_bits(a); | |
uint64_t ub = f64_to_bits(b); | |
return u64_abs(ua-ub); | |
} | |
inline uint32_t f32_ulp_dist_ss(float a, float b) | |
{ | |
uint32_t ua = f32_to_bits(a); | |
uint32_t ub = f32_to_bits(b); | |
return u32_abs(ua-ub); | |
} | |
inline uint32_t f32_ulp_dist(float a, float b) | |
{ | |
uint32_t ua = f32_to_bits(a); | |
uint32_t ub = f32_to_bits(b); | |
uint32_t s = ub^ua; | |
if ((int32_t)s >= 0) | |
return u32_abs(ua-ub); | |
return ua+ub+0x80000000; | |
} | |
//******************* | |
uint64_t rng_state[2]; | |
inline uint64_t rotl(const uint64_t v, int i) | |
{ | |
return (v << i)|(v >> (64-i)); | |
} | |
inline uint64_t rng_u64(void) | |
{ | |
uint64_t s0 = rng_state[0]; | |
uint64_t s1 = rng_state[1]; | |
uint64_t r = s0 + s1; | |
s1 ^= s0; | |
rng_state[0] = rotl(s0,55) ^ s1 ^ (s1<<14); | |
rng_state[1] = rotl(s1,36); | |
return r; | |
} | |
// end: xoroshiro128+ | |
// unbiased uniform signed integer (in floating-point) [-2^p,2^p-1] | |
inline float rng_f32_si() { return (float) ((int64_t)rng_u64() >> (64-25)); } | |
inline double rng_f64_si() { return (double)((int64_t)rng_u64() >> (64-54)); } | |
// unbiased uniform floating-point values on [-1,1) | |
inline float rng_f32_s() { return rng_f32_si() * 0x1p-24f; } | |
inline double rng_f64_s() { return rng_f64_si() * 0x1p-53; } | |
// unbiased uniform floating-point values on (-1,1] | |
inline float rng_f32_sez() { return fmaf(rng_f32_si(), 0x1p-24f, 0x1p-24f); } | |
inline double rng_f64_sez() { return fma (rng_f64_si(), 0x1p-53, 0x1p-53 ); } | |
void reset_generators(uint64_t s0, uint64_t s1) | |
{ | |
rng_state[0] = s0; | |
rng_state[1] = s1; | |
rng_u64(); | |
} | |
typedef struct {float a,b; } f32_pair_t; | |
inline float f32_xor(float a, float b) { return f32_from_bits(f32_to_bits(a) ^ f32_to_bits(b)); } | |
inline float f32_mulsign(float v, uint32_t s) { return f32_from_bits(f32_to_bits(v)^s); } | |
#if defined(SHORTER) | |
// constants for sin(pi x) and cos(pi x) for x on [-1/4,1/4] | |
const float f32_sinpi_7_k[] = { 0x1.921fb6p1f, -0x1.4abbecp2f, 0x1.466b2p1f, -0x1.2f5992p-1f }; | |
const float f32_cospi_6_k[] = { 0x1.fffffep-1f, -0x1.3bd37ep2f, 0x1.03acccp2f, -0x1.4dfd3ap0f }; | |
void f32_sincospi(float* dst, float a) | |
{ | |
static const float* S = f32_sinpi_7_k; | |
static const float* C = f32_cospi_6_k; | |
float c,s,a2,a3,r; | |
uint32_t q,sx,sy; | |
r = rintf(a+a); | |
a = fmaf(r,-0.5f,a); | |
q = (uint32_t)r; | |
a2 = a*a; | |
sy = (q<<31); sx = (q>>1)<<31; sy ^= sx; q &= 1; | |
c = fmaf(C[3], a2, C[2]); s = fmaf(S[3], a2, S[2]); a3 = a2*a; | |
c = fmaf(c, a2, C[1]); s = fmaf(s, a2, S[1]); | |
c = fmaf(c, a2, C[0]); s = a3 * s; | |
c = f32_mulsign(c,sx); s = fmaf(a, S[0], s); | |
s = f32_mulsign(s,sy); | |
dst[q ] = c; | |
dst[q^1] = s; | |
} | |
#else | |
// constants for sin(pi x) and cos(pi x) for x on [-1/4,1/4] | |
const float f32_sinpi_7_k[] = { 0x1.921fb6p1f, -0x1.4abbecp2f, 0x1.466b2p1f, -0x1.2f5992p-1f }; | |
const float f32_cospi_8_k[] = {-0x1.3bd3ccp2f, 0x1.03c1b8p2f, -0x1.55b7cep0f, 0x1.d684aap-3f }; | |
// dst[0] = cos(pi a), dst[1] = sin(pi a) | |
void f32_sincospi(float* dst, float a) | |
{ | |
static const float* S = f32_sinpi_7_k; | |
static const float* C = f32_cospi_8_k; | |
float c,s,a2,a3,r; | |
uint32_t q,sx,sy; | |
// range reduce | |
r = rintf(a+a); | |
a = fmaf(r,-0.5f,a); | |
// setting up for reconstruct | |
q = (uint32_t)r; | |
sy = (q<<31); sx = (q>>1)<<31; sy ^= sx; q &= 1; | |
// compute the approximations | |
a2 = a*a; | |
c = fmaf(C[3], a2, C[2]); s = fmaf(S[3], a2, S[2]); a3 = a2*a; | |
c = fmaf(c, a2, C[1]); s = fmaf(s, a2, S[1]); | |
c = fmaf(c, a2, C[0]); s = a3 * s; | |
c = fmaf(c, a2, 1.f); s = fmaf(a, S[0], s); | |
c = f32_mulsign(c,sx); s = f32_mulsign(s,sy); | |
dst[q ] = c; | |
dst[q^1] = s; | |
} | |
#endif | |
#define HISTO_LEN 16 | |
#define LENGTHOF(X) (sizeof(X)/sizeof(X[0])) | |
void tanfoo() | |
{ | |
uint32_t histo[HISTO_LEN] = {0}; | |
for(uint64_t i=0; i<TLEN; i++) { | |
f32_pair_t p; | |
float x0 = RANGE*(rng_f32_s()); | |
float t0; | |
float t1; | |
// just cheating a bit and treating 'double' computation as | |
// correct. one gotch here is integer multiples of pi/2, | |
// the double version can't do that and the approximation | |
// well be correct. | |
double x1 = M_PI*x0; | |
t1 = (float)tan(x1); | |
f32_sincospi((float*)&p,x0); | |
t0 = p.b/p.a; | |
uint32_t diff = f32_ulp_dist(t0,t1); | |
uint32_t id = (diff < HISTO_LEN) ? diff : HISTO_LEN-1; | |
histo[id]++; | |
// dump first 'n' example of each ulp diff | |
if (histo[id] < 10) { | |
// double's can't represent multiples of (pi/2) so filter out when 'x0' is. | |
// (the issue is no tanpi function) | |
if ((id != (HISTO_LEN-1)) || (floorf(x0+x0) != ceilf(x0+x0))) | |
printf("%14f | % 15e % 15e | %15a %15a | %5u | %016lx%016lx\n", x0, t1,t0, t1,t0, diff,rng_state[0],rng_state[1]); | |
} | |
} | |
printf("\ntan {"); | |
for(uint32_t i=0; i<HISTO_LEN; i++) { | |
printf("%u,", histo[i]); | |
} | |
printf("}\n"); | |
} | |
int main(void) | |
{ | |
uint64_t s0; | |
uint64_t s1; | |
s0 = UINT64_C(0xcb9c59b3f9f87d4d); | |
s1 = UINT64_C(0x2545f4914f6cdd1d); | |
s0 ^= _rdtsc(); | |
float t = -0.5f; | |
printf("%f %f : %u\n", floorf(t+t),ceilf(t+t), floorf(t+t) != ceilf(t+t)); | |
printf("seed: %016lx:%016lx\n\n",s1,s0); | |
printf("%14s | %15s %15s | %15s %15s | %5s\n","x","ref","approx","ref","approx","diff"); | |
reset_generators(s0,s1); | |
tanfoo(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment