Skip to content

Instantly share code, notes, and snippets.

@Marc-B-Reynolds
Created October 10, 2022 22:34
Show Gist options
  • Save Marc-B-Reynolds/42694d290786abb38d955d76d3478a4f to your computer and use it in GitHub Desktop.
Save Marc-B-Reynolds/42694d290786abb38d955d76d3478a4f to your computer and use it in GitHub Desktop.
hacky tanpi testbed based on sinpi/cospi
// Public Domain under http://unlicense.org, see link for details.
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include "xmmintrin.h"
#include "wmmintrin.h"
#include "x86intrin.h"
#define RANDOMIZE
// number of trials
#define TLEN UINT64_C(0x000000000fffffff)
// gen 'x' on +/-RANGE
#define RANGE 1.f
// use
#define SHORTER
// external code: xoroshiro128+
inline uint32_t f32_to_bits(float x)
{
uint32_t u; memcpy(&u,&x,4); return u;
}
inline float f32_from_bits(uint32_t x)
{
float u; memcpy(&u,&x,4); return u;
}
inline uint64_t f64_to_bits(double x)
{
uint64_t u; memcpy(&u,&x,8); return u;
}
inline double f64_from_bits(uint64_t x)
{
double d; memcpy(&d,&x,8); return d;
}
//
inline uint64_t u64_abs(uint64_t x)
{
return (int64_t)x >= 0 ? x : -x;
}
inline uint32_t u32_abs(uint32_t x)
{
return (int32_t)x >= 0 ? x : -x;
}
// ulp distance provided a & b are finite
// and have the same sign
inline uint64_t f64_ulp_dist_ss(double a, double b)
{
uint64_t ua = f64_to_bits(a);
uint64_t ub = f64_to_bits(b);
return u64_abs(ua-ub);
}
inline uint32_t f32_ulp_dist_ss(float a, float b)
{
uint32_t ua = f32_to_bits(a);
uint32_t ub = f32_to_bits(b);
return u32_abs(ua-ub);
}
inline uint32_t f32_ulp_dist(float a, float b)
{
uint32_t ua = f32_to_bits(a);
uint32_t ub = f32_to_bits(b);
uint32_t s = ub^ua;
if ((int32_t)s >= 0)
return u32_abs(ua-ub);
return ua+ub+0x80000000;
}
//*******************
uint64_t rng_state[2];
inline uint64_t rotl(const uint64_t v, int i)
{
return (v << i)|(v >> (64-i));
}
inline uint64_t rng_u64(void)
{
uint64_t s0 = rng_state[0];
uint64_t s1 = rng_state[1];
uint64_t r = s0 + s1;
s1 ^= s0;
rng_state[0] = rotl(s0,55) ^ s1 ^ (s1<<14);
rng_state[1] = rotl(s1,36);
return r;
}
// end: xoroshiro128+
// unbiased uniform signed integer (in floating-point) [-2^p,2^p-1]
inline float rng_f32_si() { return (float) ((int64_t)rng_u64() >> (64-25)); }
inline double rng_f64_si() { return (double)((int64_t)rng_u64() >> (64-54)); }
// unbiased uniform floating-point values on [-1,1)
inline float rng_f32_s() { return rng_f32_si() * 0x1p-24f; }
inline double rng_f64_s() { return rng_f64_si() * 0x1p-53; }
// unbiased uniform floating-point values on (-1,1]
inline float rng_f32_sez() { return fmaf(rng_f32_si(), 0x1p-24f, 0x1p-24f); }
inline double rng_f64_sez() { return fma (rng_f64_si(), 0x1p-53, 0x1p-53 ); }
void reset_generators(uint64_t s0, uint64_t s1)
{
rng_state[0] = s0;
rng_state[1] = s1;
rng_u64();
}
typedef struct {float a,b; } f32_pair_t;
inline float f32_xor(float a, float b) { return f32_from_bits(f32_to_bits(a) ^ f32_to_bits(b)); }
inline float f32_mulsign(float v, uint32_t s) { return f32_from_bits(f32_to_bits(v)^s); }
#if defined(SHORTER)
// constants for sin(pi x) and cos(pi x) for x on [-1/4,1/4]
const float f32_sinpi_7_k[] = { 0x1.921fb6p1f, -0x1.4abbecp2f, 0x1.466b2p1f, -0x1.2f5992p-1f };
const float f32_cospi_6_k[] = { 0x1.fffffep-1f, -0x1.3bd37ep2f, 0x1.03acccp2f, -0x1.4dfd3ap0f };
void f32_sincospi(float* dst, float a)
{
static const float* S = f32_sinpi_7_k;
static const float* C = f32_cospi_6_k;
float c,s,a2,a3,r;
uint32_t q,sx,sy;
r = rintf(a+a);
a = fmaf(r,-0.5f,a);
q = (uint32_t)r;
a2 = a*a;
sy = (q<<31); sx = (q>>1)<<31; sy ^= sx; q &= 1;
c = fmaf(C[3], a2, C[2]); s = fmaf(S[3], a2, S[2]); a3 = a2*a;
c = fmaf(c, a2, C[1]); s = fmaf(s, a2, S[1]);
c = fmaf(c, a2, C[0]); s = a3 * s;
c = f32_mulsign(c,sx); s = fmaf(a, S[0], s);
s = f32_mulsign(s,sy);
dst[q ] = c;
dst[q^1] = s;
}
#else
// constants for sin(pi x) and cos(pi x) for x on [-1/4,1/4]
const float f32_sinpi_7_k[] = { 0x1.921fb6p1f, -0x1.4abbecp2f, 0x1.466b2p1f, -0x1.2f5992p-1f };
const float f32_cospi_8_k[] = {-0x1.3bd3ccp2f, 0x1.03c1b8p2f, -0x1.55b7cep0f, 0x1.d684aap-3f };
// dst[0] = cos(pi a), dst[1] = sin(pi a)
void f32_sincospi(float* dst, float a)
{
static const float* S = f32_sinpi_7_k;
static const float* C = f32_cospi_8_k;
float c,s,a2,a3,r;
uint32_t q,sx,sy;
// range reduce
r = rintf(a+a);
a = fmaf(r,-0.5f,a);
// setting up for reconstruct
q = (uint32_t)r;
sy = (q<<31); sx = (q>>1)<<31; sy ^= sx; q &= 1;
// compute the approximations
a2 = a*a;
c = fmaf(C[3], a2, C[2]); s = fmaf(S[3], a2, S[2]); a3 = a2*a;
c = fmaf(c, a2, C[1]); s = fmaf(s, a2, S[1]);
c = fmaf(c, a2, C[0]); s = a3 * s;
c = fmaf(c, a2, 1.f); s = fmaf(a, S[0], s);
c = f32_mulsign(c,sx); s = f32_mulsign(s,sy);
dst[q ] = c;
dst[q^1] = s;
}
#endif
#define HISTO_LEN 16
#define LENGTHOF(X) (sizeof(X)/sizeof(X[0]))
void tanfoo()
{
uint32_t histo[HISTO_LEN] = {0};
for(uint64_t i=0; i<TLEN; i++) {
f32_pair_t p;
float x0 = RANGE*(rng_f32_s());
float t0;
float t1;
// just cheating a bit and treating 'double' computation as
// correct. one gotch here is integer multiples of pi/2,
// the double version can't do that and the approximation
// well be correct.
double x1 = M_PI*x0;
t1 = (float)tan(x1);
f32_sincospi((float*)&p,x0);
t0 = p.b/p.a;
uint32_t diff = f32_ulp_dist(t0,t1);
uint32_t id = (diff < HISTO_LEN) ? diff : HISTO_LEN-1;
histo[id]++;
// dump first 'n' example of each ulp diff
if (histo[id] < 10) {
// double's can't represent multiples of (pi/2) so filter out when 'x0' is.
// (the issue is no tanpi function)
if ((id != (HISTO_LEN-1)) || (floorf(x0+x0) != ceilf(x0+x0)))
printf("%14f | % 15e % 15e | %15a %15a | %5u | %016lx%016lx\n", x0, t1,t0, t1,t0, diff,rng_state[0],rng_state[1]);
}
}
printf("\ntan {");
for(uint32_t i=0; i<HISTO_LEN; i++) {
printf("%u,", histo[i]);
}
printf("}\n");
}
int main(void)
{
uint64_t s0;
uint64_t s1;
s0 = UINT64_C(0xcb9c59b3f9f87d4d);
s1 = UINT64_C(0x2545f4914f6cdd1d);
s0 ^= _rdtsc();
float t = -0.5f;
printf("%f %f : %u\n", floorf(t+t),ceilf(t+t), floorf(t+t) != ceilf(t+t));
printf("seed: %016lx:%016lx\n\n",s1,s0);
printf("%14s | %15s %15s | %15s %15s | %5s\n","x","ref","approx","ref","approx","diff");
reset_generators(s0,s1);
tanfoo();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment