Skip to content

Instantly share code, notes, and snippets.

@fabriciofx
Forked from mrbid/sine_bench.c
Created September 13, 2021 21:55
Show Gist options
  • Save fabriciofx/141d2dc5d6366866d7d8868e5a98e740 to your computer and use it in GitHub Desktop.
Save fabriciofx/141d2dc5d6366866d7d8868e5a98e740 to your computer and use it in GitHub Desktop.
A benchmark of different trigonometric sine functions.
/*
James William Fletcher (james@voxdsp.com)
August 2021
Benchmarking sine wave functions.
references:
http://www.ee.ic.ac.uk/pcheung/teaching/ee3_Study_Project/Sinewave%20Generation(708).pdf
https://demonstrations.wolfram.com/SineWaveGenerationUsingAnUnstableIIRFilter/
https://en.wikipedia.org/wiki/Goertzel_algorithm
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <string.h>
#include <locale.h>
#include <time.h>
#include <sys/time.h>
#include <unistd.h>
#include <fcntl.h>
#include <x86intrin.h>
#define interval 1000000
#define AVGITER 3000000
// adapted from ogre3d asm_math.h
// https://www.flipcode.com/archives/07-15-2002.shtml
// https://www.cs.cmu.edu/afs/andrew/scs/cs/oldfiles/15-494-sp09/dst/A/sw/ogre-1.6.4/OgreMain/include/asm_math.h
// https://gist.github.com/mrbid/9a050ee747a9188bc0aa849385bef865#file-rand_float_normal_bench-c-L63
float rand_float5(const __int64_t seed)
{
static const float FLOAT_MAX = 1/9223372036854775807.0f;
static __int64_t q = 8008135;
if(seed != 0)
q = seed;
__m64 mm0 = _mm_cvtsi64_m64(q);
__m64 mm1 = _m_pshufw(mm0, 0x1E);
mm0 = _mm_add_pi32(mm0, mm1);
q = _m_to_int64(mm0);
_m_empty();
return ( fabs(q+1e-7f) * FLOAT_MAX );
}
// source; James William Fletcher; this should be accurate to three decimal places
#define PIf 3.141592741f
#define x2PIf 6.283185482f // PI*2
#define rx2PIf 0.1591549367f // 1/(PI*2)
#define wlerp(a, b, i) ((b - a) * i + a)
float lerp_sin(const float theta)
{
// data for the wavetable
static int init = 0;
static float sine_wtable[256] = {0};
// called once on first execution
// generates the wave table
if(init == 0)
{
for(int i = 0; i < 256; i++)
sine_wtable[i] = sin(i * 0.02454369329f); // 0.02454369329f = x2PIf / 256.f;
init = 1;
}
// normalise theta to a 0-1 range
float norm = theta * rx2PIf;
// preserve the fractional part that will be lopped off by the wrapping
const float a = norm * 256.f; // scales normal range to wave table range
const float fract = a-floor(a);
// wrap it
const unsigned char i = (unsigned char)(a); // this cast does the wrapping for us, it's fast.
// check for end of array case for wrap-around lerp.
if(i >= 255)
return wlerp(sine_wtable[255], sine_wtable[0], fract);
// do regular table lerp
return wlerp(sine_wtable[i], sine_wtable[i+1], fract);
}
float aliased_sin(const float theta)
{
// data for the wavetable
static int init = 0;
static float sine_wtable[65536] = {0};
// called once on first execution
if(init == 0)
{
for(int i = 0; i < 65536; i++)
sine_wtable[i] = sin(i * 9.587380191e-05f); // 9.587380191e-05f = x2PIf / 65536.f;
init = 1;
}
// return result
const unsigned short i = (unsigned short)(10430.37793f * theta); // 10430.37793f = 65536.f / x2PIf
return sine_wtable[i];
}
// source; KVRAF; https://www.kvraudio.com/forum/viewtopic.php?t=321027
// https://www.kvraudio.com/forum/viewtopic.php?p=4555831&sid=b366604bf9b3cfd7892f40e7b3fee4e4#p4555831
// omega is passed as 2.0*PI*frequency/sampleRate
// Initialise as IIRSine(2, omega, 0); then call on tick as IIRSine(0, 0, 0);
float IIRSine(const int set, const float omega, const float phase)
{
static float m_a1, m_y1, m_y2;
if(set > 0)
{
m_a1 = omega;
if(set > 1)
{
m_a1 = 2.0 * cos(m_a1);
if(set > 2)
{
m_y1 = sin(phase - omega);
m_y2 = sin(phase - 2.0 * omega);
}
}
}
const float y3 = m_a1 * m_y1 - m_y2;
m_y2 = m_y1;
m_y1 = y3;
return y3;
}
// source; analog.com (SHARC)
// ORIGINAL CODE FROM SHARC
// short output;
// main(){
// int i; const short A=0x7e66; /* A=(1.975/2 * 32768) */
// short y[3]={0,0x1209,0}; /* (y0,y1,y2), y1=(0.1409*32768) */
// for (i=0; i<40; i++) { y[0] = (((A*y[1])>>15) + ((A*y[1])>>15)) – y[2]; y[2] = y[1]; /* y2 <–– y1 */ y[1] = y[0]; /* y1 <–– y0 */
// output = y[0];
// }}
float IIRSine2()
{
static float mv[3] = {0, 0.1409f, 0};
static const float A = 1.975f/2;
mv[0] = A * mv[1] - mv[2];
mv[2] = mv[1];
mv[1] = mv[0];
return mv[0];
}
// source; https://stackoverflow.com/a/62425331
float fast_sin1(float x)
{
x /= 2 * PIf;
x -= (int)x;
if(x <= 0.5)
{
float t = 2 * x * (2 * x - 1);
return (PIf * t) / ((PIf - 4) * t - 1);
}
else
{
float t = 2 * (1 - x) * (1 - 2 * x);
return -(PIf * t) / ((PIf - 4) * t - 1);
}
}
// source; https://www.gamedev.net/forums/topic/621589-extremely-fast-sin-approximation/
double fast_sin2(double x)
{
int k;
double y;
double z;
z = x;
z *= 0.3183098861837907;
z += 6755399441055744.0;
k = *((int *) &z);
z = k;
z *= 3.1415926535897932;
x -= z;
y = x;
y *= x;
z = 0.0073524681968701;
z *= y;
z -= 0.1652891139701474;
z *= y;
z += 0.9996919862959676;
x *= z;
k &= 1;
k += k;
z = k;
z *= x;
x -= z;
return x;
}
// source; https://www.musicdsp.org/en/latest/Other/115-sin-cos-tan-approximation.html
float fast_sin3(float fAngle)
{
float fASqr = fAngle*fAngle;
float fResult = 7.61e-03f;
fResult *= fASqr;
fResult -= 1.6605e-01f;
fResult *= fASqr;
fResult += 1.0f;
fResult *= fAngle;
return fResult;
}
float fast_sin4(float fAngle)
{
float fASqr = fAngle*fAngle;
float fResult = -2.39e-08f;
fResult *= fASqr;
fResult += 2.7526e-06f;
fResult *= fASqr;
fResult -= 1.98409e-04f;
fResult *= fASqr;
fResult += 8.3333315e-03f;
fResult *= fASqr;
fResult -= 1.666666664e-01f;
fResult *= fASqr;
fResult += 1.0f;
fResult *= fAngle;
return fResult;
}
// source; https://stackoverflow.com/a/52693283
float fast_sin5(float x)
{
return (16 * x * (PIf - x)) / (5 * PIf * PIf - (4 * x * (PIf - x)));
//return ( 4 * x * (180 - x)) / (40500 - x * (180 - x));
}
// source; I don't know, I found these some ten years ago and failed to accredit the original authors.
float fast_sin6(float x)
{
return 1.273239544f * x + -0.636619772f * x * abs(x);
}
float fast_sin7(float x)
{
float res = 0, pow = x, fact = 1;
for(int i = 0; i < 5; ++i)
{
res += pow / fact;
pow *= x * x;
fact *= (2*(i+1))*(2*(i+1)+1);
}
return res;
}
// main
uint64_t microtime()
{
struct timeval tv;
struct timezone tz;
memset(&tz, 0, sizeof(struct timezone));
gettimeofday(&tv, &tz);
return 1000000 * tv.tv_sec + tv.tv_usec;
}
int main()
{
srand(time(0));
rand_float5(time(0));
// the big accuracy bench
const float test_max_increment = x2PIf/2;
const float test_max_max = x2PIf*2; // ssh, our secret.
for(float test_max = test_max_increment; test_max <= test_max_max; test_max += test_max_increment)
{
printf("\n-------------------------------------------------\nTesting accuracy of functions from 0 to %.1f ...\n-------------------------------------------------\n\n", test_max);
// test the lerp_sin() accuracy
printf("Testing lerp_sin() accuracy...\n");
float delta = 0.f;
float increment = 0.1f;
float samples = 0.f;
for(int d = 0; d < 6; d++)
{
for(float f = 0.f; f < test_max; f += increment)
{
delta += fabs(lerp_sin(f) - sin(f));
// printf("%.1f: %.7f - %.7f\n", f, lerp_sin(f), sin(f));
// usleep(333333);
samples += 1.f;
}
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
increment /= 10.f;
}
//exit(0);
// test the aliased_sin() accuracy
printf("\nTesting aliased_sin() accuracy...\n");
delta = 0.f;
increment = 0.1f;
samples = 0.f;
for(int d = 0; d < 6; d++)
{
for(float f = 0.f; f < test_max; f += increment)
{
delta += fabs(aliased_sin(f) - sin(f));
// printf("%.7f - %.7f\n", aliased_sin(f), sin(f));
// usleep(333333);
samples += 1.f;
}
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
increment /= 10.f;
}
// test the fast_sin1() accuracy
printf("\nTesting fast_sin1() accuracy...\n");
delta = 0.f;
increment = 0.1f;
samples = 0.f;
for(int d = 0; d < 6; d++)
{
for(float f = 0.f; f < test_max; f += increment)
{
delta += fabs(fast_sin1(f) - sin(f));
// printf("%.7f - %.7f\n", fast_sin1(f), sin(f));
// usleep(333333);
samples += 1.f;
}
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
increment /= 10.f;
}
// test the fast_sin2() accuracy
printf("\nTesting fast_sin2() accuracy...\n");
delta = 0.f;
increment = 0.1f;
samples = 0.f;
for(int d = 0; d < 6; d++)
{
for(float f = 0.f; f < test_max; f += increment)
{
delta += fabs(fast_sin2(f) - sin(f));
// printf("%.7f - %.7f\n", fast_sin2(f), sin(f));
// usleep(333333);
samples += 1.f;
}
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
increment /= 10.f;
}
// test the fast_sin3() accuracy
printf("\nTesting fast_sin3() accuracy...\n");
delta = 0.f;
increment = 0.1f;
samples = 0.f;
for(int d = 0; d < 6; d++)
{
for(float f = 0.f; f < test_max; f += increment)
{
delta += fabs(fast_sin3(f) - sin(f));
// printf("%.7f - %.7f\n", fast_sin3(f), sin(f));
// usleep(333333);
samples += 1.f;
}
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
increment /= 10.f;
}
// test the fast_sin4() accuracy
printf("\nTesting fast_sin4() accuracy...\n");
delta = 0.f;
increment = 0.1f;
samples = 0.f;
for(int d = 0; d < 6; d++)
{
for(float f = 0.f; f < test_max; f += increment)
{
delta += fabs(fast_sin4(f) - sin(f));
// printf("%.7f - %.7f\n", fast_sin4(f), sin(f));
// usleep(333333);
samples += 1.f;
}
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
increment /= 10.f;
}
// test the fast_sin5() accuracy
printf("\nTesting fast_sin5() accuracy...\n");
delta = 0.f;
increment = 0.1f;
samples = 0.f;
for(int d = 0; d < 6; d++)
{
for(float f = 0.f; f < test_max; f += increment)
{
delta += fabs(fast_sin5(f) - sin(f));
// printf("%.7f - %.7f\n", fast_sin5(f), sin(f));
// usleep(333333);
samples += 1.f;
}
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
increment /= 10.f;
}
// test the fast_sin6() accuracy
printf("\nTesting fast_sin6() accuracy...\n");
delta = 0.f;
increment = 0.1f;
samples = 0.f;
for(int d = 0.f; d < 6; d++)
{
for(float f = 0; f < test_max; f += increment)
{
delta += fabs(fast_sin6(f) - sin(f));
// printf("%.7f - %.7f\n", fast_sin6(f), sin(f));
// usleep(333333);
samples += 1.f;
}
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
increment /= 10.f;
}
// test the fast_sin7() accuracy
printf("\nTesting fast_sin7() accuracy...\n");
delta = 0.f;
increment = 0.1f;
samples = 0.f;
for(int d = 0; d < 6; d++)
{
for(float f = 0.f; f < test_max; f += increment)
{
delta += fabs(fast_sin7(f) - sin(f));
// printf("%.7f - %.7f\n", fast_sin7(f), sin(f));
// usleep(333333);
samples += 1.f;
}
printf("Average %f deviance over %.0f samples to %i decimal places (%.6f).\n", delta / samples, samples, d+1, increment);
increment /= 10.f;
}
}
//
printf("\n-------------------------------------------------\nTesting speed of functions using random inputs.\n-------------------------------------------------\n\n");
// prep benchmarks
setlocale(LC_NUMERIC, "");
float ret = 0;
unsigned long e = 0, ui = 0;
uint64_t st = 0, et = 0, avg = 0;
//////// __rdtsc()
avg = 0;
for(int i = 0; i < AVGITER; i++)
{
st = __rdtsc();
avg += __rdtsc()-st;
}
printf("__rdtsc() Cycles: %'lu\n", avg / AVGITER);
e = 0;
st = microtime();
while(microtime() - st <= interval)
{
ret += __rdtsc();
e++;
}
ui = interval / 1000000;
printf("Executions in %'lu seconds: %'lu\n", ui, e);
printf("Executions per millisecond: %'lu\n", e/(1000*ui));
printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));
//////// sin()
avg = 0;
for(int i = 0; i < AVGITER; i++)
{
st = __rdtsc();
ret += sin(st);
avg += __rdtsc()-st;
}
printf("sin() Cycles: %'lu\n", avg / AVGITER);
e = 0;
st = microtime();
while(microtime() - st <= interval)
{
ret += sin(rand_float5(0)*x2PIf);
e++;
}
ui = interval / 1000000;
printf("Executions in %'lu seconds: %'lu\n", ui, e);
printf("Executions per millisecond: %'lu\n", e/(1000*ui));
printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));
//////// IIRSine()
IIRSine(2, 0, 0);
avg = 0;
for(int i = 0; i < AVGITER; i++)
{
st = __rdtsc();
ret += IIRSine(1, st, 0);
avg += __rdtsc()-st;
}
printf("IIRSine() Cycles: %'lu\n", avg / AVGITER);
e = 0;
st = microtime();
while(microtime() - st <= interval)
{
ret += IIRSine(1, 0, 0);
e++;
}
ui = interval / 1000000;
printf("Executions in %'lu seconds: %'lu\n", ui, e);
printf("Executions per millisecond: %'lu\n", e/(1000*ui));
printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));
//////// IIRSine2()
IIRSine2();
avg = 0;
for(int i = 0; i < AVGITER; i++)
{
st = __rdtsc();
ret += IIRSine2();
avg += __rdtsc()-st;
}
printf("IIRSine2() Cycles: %'lu\n", avg / AVGITER);
e = 0;
st = microtime();
while(microtime() - st <= interval)
{
ret += IIRSine2();
e++;
}
ui = interval / 1000000;
printf("Executions in %'lu seconds: %'lu\n", ui, e);
printf("Executions per millisecond: %'lu\n", e/(1000*ui));
printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));
//////// lerp_sin()
avg = 0;
for(int i = 0; i < AVGITER; i++)
{
st = __rdtsc();
ret += lerp_sin(rand_float5(0)*x2PIf);
avg += __rdtsc()-st;
}
printf("lerp_sin() Cycles: %'lu\n", avg / AVGITER);
e = 0;
st = microtime();
while(microtime() - st <= interval)
{
ret += lerp_sin(rand_float5(0)*x2PIf);
e++;
}
ui = interval / 1000000;
printf("Executions in %'lu seconds: %'lu\n", ui, e);
printf("Executions per millisecond: %'lu\n", e/(1000*ui));
printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));
//////// aliased_sin()
avg = 0;
for(int i = 0; i < AVGITER; i++)
{
st = __rdtsc();
ret += aliased_sin(rand_float5(0)*x2PIf);
avg += __rdtsc()-st;
}
printf("aliased_sin() Cycles: %'lu\n", avg / AVGITER);
e = 0;
st = microtime();
while(microtime() - st <= interval)
{
ret += aliased_sin(rand_float5(0)*x2PIf);
e++;
}
ui = interval / 1000000;
printf("Executions in %'lu seconds: %'lu\n", ui, e);
printf("Executions per millisecond: %'lu\n", e/(1000*ui));
printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));
//////// fast_sin1()
avg = 0;
for(int i = 0; i < AVGITER; i++)
{
st = __rdtsc();
ret += fast_sin1(st);
avg += __rdtsc()-st;
}
printf("fast_sin1() Cycles: %'lu\n", avg / AVGITER);
e = 0;
st = microtime();
while(microtime() - st <= interval)
{
ret += fast_sin1(rand_float5(0)*x2PIf);
e++;
}
ui = interval / 1000000;
printf("Executions in %'lu seconds: %'lu\n", ui, e);
printf("Executions per millisecond: %'lu\n", e/(1000*ui));
printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));
//////// fast_sin2()
avg = 0;
for(int i = 0; i < AVGITER; i++)
{
st = __rdtsc();
ret += fast_sin2(st);
avg += __rdtsc()-st;
}
printf("fast_sin2() Cycles: %'lu\n", avg / AVGITER);
e = 0;
st = microtime();
while(microtime() - st <= interval)
{
ret += fast_sin2(rand_float5(0)*x2PIf);
e++;
}
ui = interval / 1000000;
printf("Executions in %'lu seconds: %'lu\n", ui, e);
printf("Executions per millisecond: %'lu\n", e/(1000*ui));
printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));
//////// fast_sin3()
avg = 0;
for(int i = 0; i < AVGITER; i++)
{
st = __rdtsc();
ret += fast_sin3(st);
avg += __rdtsc()-st;
}
printf("fast_sin3() Cycles: %'lu\n", avg / AVGITER);
e = 0;
st = microtime();
while(microtime() - st <= interval)
{
ret += fast_sin3(rand_float5(0)*x2PIf);
e++;
}
ui = interval / 1000000;
printf("Executions in %'lu seconds: %'lu\n", ui, e);
printf("Executions per millisecond: %'lu\n", e/(1000*ui));
printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));
//////// fast_sin4()
avg = 0;
for(int i = 0; i < AVGITER; i++)
{
st = __rdtsc();
ret += fast_sin4(st);
avg += __rdtsc()-st;
}
printf("fast_sin4() Cycles: %'lu\n", avg / AVGITER);
e = 0;
st = microtime();
while(microtime() - st <= interval)
{
ret += fast_sin4(rand_float5(0)*x2PIf);
e++;
}
ui = interval / 1000000;
printf("Executions in %'lu seconds: %'lu\n", ui, e);
printf("Executions per millisecond: %'lu\n", e/(1000*ui));
printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));
//////// fast_sin5()
avg = 0;
for(int i = 0; i < AVGITER; i++)
{
st = __rdtsc();
ret += fast_sin5(st);
avg += __rdtsc()-st;
}
printf("fast_sin5() Cycles: %'lu\n", avg / AVGITER);
e = 0;
st = microtime();
while(microtime() - st <= interval)
{
ret += fast_sin5(rand_float5(0)*x2PIf);
e++;
}
ui = interval / 1000000;
printf("Executions in %'lu seconds: %'lu\n", ui, e);
printf("Executions per millisecond: %'lu\n", e/(1000*ui));
printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));
//////// fast_sin6()
avg = 0;
for(int i = 0; i < AVGITER; i++)
{
st = __rdtsc();
ret += fast_sin6(st);
avg += __rdtsc()-st;
}
printf("fast_sin6() Cycles: %'lu\n", avg / AVGITER);
e = 0;
st = microtime();
while(microtime() - st <= interval)
{
ret += fast_sin6(rand_float5(0)*x2PIf);
e++;
}
ui = interval / 1000000;
printf("Executions in %'lu seconds: %'lu\n", ui, e);
printf("Executions per millisecond: %'lu\n", e/(1000*ui));
printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));
//////// fast_sin7()
avg = 0;
for(int i = 0; i < AVGITER; i++)
{
st = __rdtsc();
ret += fast_sin7(st);
avg += __rdtsc()-st;
}
printf("fast_sin7() Cycles: %'lu\n", avg / AVGITER);
e = 0;
st = microtime();
while(microtime() - st <= interval)
{
ret += fast_sin7(rand_float5(0)*x2PIf);
e++;
}
ui = interval / 1000000;
printf("Executions in %'lu seconds: %'lu\n", ui, e);
printf("Executions per millisecond: %'lu\n", e/(1000*ui));
printf("Executions per microsecond: %'lu\n", e/(1000000*ui));
printf("~%'.8f executions every nanosecond\n\n", (float)e/(1000000000*ui));
////////
// done
printf("%c\n", (char)ret); // forces the compiler to not disreguard the functions we are testing
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment