Skip to content

Instantly share code, notes, and snippets.

@Sam-Belliveau
Last active February 1, 2024 15:49
Show Gist options
  • Save Sam-Belliveau/9c2e5a7584ec9900831877b3155f6f16 to your computer and use it in GitHub Desktop.
Save Sam-Belliveau/9c2e5a7584ec9900831877b3155f6f16 to your computer and use it in GitHub Desktop.
Fast sin & cos implementation. 2x std::sin on M2, No Subroutines, No Loops, No Branches, No Divides, High Accuracy.
// Copyright (c) 2024 Sam Belliveau. All rights reserved.
//
// This work is licensed under the terms of the MIT license.
// For a copy, see <https://opensource.org/licenses/MIT>.
/**
* Below is a test ran on my M2 Max Macbook Pro. The results are not
* guaranteed to be the same on other systems, but should be similar.
*
* This code is intended to be ran on embedded devices where the
* performance gains are likely to be larger due to the lack of
* branches, divisions, or memory accesses.
*
* $ g++ -O2 test.cpp -o ./test
* $ ./test
* std::sin<double>(): ~3.44ns
* __builtin_sin(): ~3.41ns
* fast_sin(): ~1.79ns
* faster_sin(): ~1.64ns
*
* std::sin<float>(): ~3.41ns
* __builtin_sinf(): ~3.42ns
* fast_sinf(): ~1.62ns
* faster_sinf(): ~1.34ns
*
* std::cos<double>(): ~3.45ns
* __builtin_cos(): ~3.41ns
* fast_cos(): ~1.93ns
* faster_cos(): ~1.72ns
*
* std::cos<float>(): ~3.48ns
* __builtin_cosf(): ~3.44ns
* fast_cosf(): ~1.81ns
* faster_cosf(): ~1.45ns
*/
/***** Function Definitions *****/
// fast & simple approximation for sin
// ∀ (x ∈ (-∞, ∞)) → (|fast_sin(x) - sin(x)| < 0.000051)
// 8 Multiplies, 4 Additions, 3 Absolute Values, 1 Round.
double fast_sin(double x);
float fast_sinf(float x);
// faster & simple 64bit approximation for sin
// ∀ (x ∈ (-∞, ∞)) → (|faster_sin(x) - sin(x)| < 0.0025)
// 6 Multiplies, 3 Additions, 2 Absolute Values, 1 Round.
double faster_sin(double x);
float faster_sinf(float x);
// fast & simple 64bit approximation for cos
// ∀ (x ∈ (-∞, ∞)) → (|fast_cos(x) - cos(x)| < 0.000051)
// 8 Multiplies, 6 Additions, 3 Absolute Values, 1 Floor.
double fast_cos(double x);
float fast_cosf(float x);
// faster & simple 64bit approximation for cos
// ∀ (x ∈ (-∞, ∞)) → (|faster_cos(x) - cos(x)| < 0.0025)
// 6 Multiplies, 5 Additions, 2 Absolute Values, 1 Floor.
double faster_cos(double x);
float faster_cosf(float x);
/***** Helper Functions *****/
static inline __attribute__((always_inline)) double _normalize_sin(double x);
static inline __attribute__((always_inline)) float _normalize_sinf(float x);
static inline __attribute__((always_inline)) double _normalize_cos(double x);
static inline __attribute__((always_inline)) float _normalize_cosf(float x);
static inline __attribute__((always_inline)) double _fast_unnormed(double x);
static inline __attribute__((always_inline)) float _fast_unnormedf(float x);
static inline __attribute__((always_inline)) double _faster_unnormed(double x);
static inline __attribute__((always_inline)) float _faster_unnormedf(float x);
/***** Trig Function Implementations *****/
double fast_sin(double x) { return _fast_unnormed(_normalize_sin(x)); }
float fast_sinf(float x) { return _fast_unnormedf(_normalize_sinf(x)); }
double faster_sin(double x) { return _faster_unnormed(_normalize_sin(x)); }
float faster_sinf(float x) { return _faster_unnormedf(_normalize_sinf(x)); }
double fast_cos(double x) { return _fast_unnormed(_normalize_cos(x)); }
float fast_cosf(float x) { return _fast_unnormedf(_normalize_cosf(x)); }
double faster_cos(double x) { return _faster_unnormed(_normalize_cos(x)); }
float faster_cosf(float x) { return _faster_unnormedf(_normalize_cosf(x)); }
/***** Helper Function Implementations *****/
double _normalize_sin(double x)
{
x *= +0.1591549430919;
x -= __builtin_floor(x + 0.5);
x *= +6.2831853071796;
return x;
}
float _normalize_sinf(float x)
{
x *= +0.1591549430919f;
x -= __builtin_floorf(x + 0.5f);
x *= +6.2831853071796f;
return x;
}
double _normalize_cos(double x)
{
x *= +0.1591549430919;
x -= __builtin_floor(x + 0.75) - 0.25;
x *= +6.2831853071796;
return x;
}
float _normalize_cosf(float x)
{
x *= +0.1591549430919f;
x -= __builtin_floorf(x + 0.75f) - 0.25f;
x *= +6.2831853071796f;
return x;
}
double _fast_unnormed(double x)
{
x += -0.3183098861838 * x * __builtin_fabs(x);
x += +0.2500000000000 * x * __builtin_fabs(x);
x += +0.0684571845286 * x * __builtin_fabs(x);
return x;
}
float _fast_unnormedf(float x)
{
x += -0.3183098861838f * x * __builtin_fabsf(x);
x += +0.2500000000000f * x * __builtin_fabsf(x);
x += +0.0684571845286f * x * __builtin_fabsf(x);
return x;
}
double _faster_unnormed(double x)
{
x += -0.3183098861838 * x * __builtin_fabs(x);
x += +0.3451140202480 * x * __builtin_fabs(x);
return x;
}
float _faster_unnormedf(float x)
{
x += -0.3183098861838f * x * __builtin_fabsf(x);
x += +0.3451140202480f * x * __builtin_fabsf(x);
return x;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment