Last active
February 1, 2024 15:49
-
-
Save Sam-Belliveau/9c2e5a7584ec9900831877b3155f6f16 to your computer and use it in GitHub Desktop.
Fast sin & cos implementation. 2x std::sin on M2, No Subroutines, No Loops, No Branches, No Divides, High Accuracy.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (c) 2024 Sam Belliveau. All rights reserved. | |
// | |
// This work is licensed under the terms of the MIT license. | |
// For a copy, see <https://opensource.org/licenses/MIT>. | |
/** | |
* Below is a test ran on my M2 Max Macbook Pro. The results are not | |
* guaranteed to be the same on other systems, but should be similar. | |
* | |
* This code is intended to be ran on embedded devices where the | |
* performance gains are likely to be larger due to the lack of | |
* branches, divisions, or memory accesses. | |
* | |
* $ g++ -O2 test.cpp -o ./test | |
* $ ./test | |
* std::sin<double>(): ~3.44ns | |
* __builtin_sin(): ~3.41ns | |
* fast_sin(): ~1.79ns | |
* faster_sin(): ~1.64ns | |
* | |
* std::sin<float>(): ~3.41ns | |
* __builtin_sinf(): ~3.42ns | |
* fast_sinf(): ~1.62ns | |
* faster_sinf(): ~1.34ns | |
* | |
* std::cos<double>(): ~3.45ns | |
* __builtin_cos(): ~3.41ns | |
* fast_cos(): ~1.93ns | |
* faster_cos(): ~1.72ns | |
* | |
* std::cos<float>(): ~3.48ns | |
* __builtin_cosf(): ~3.44ns | |
* fast_cosf(): ~1.81ns | |
* faster_cosf(): ~1.45ns | |
*/ | |
/***** Function Definitions *****/ | |
// fast & simple approximation for sin | |
// ∀ (x ∈ (-∞, ∞)) → (|fast_sin(x) - sin(x)| < 0.000051) | |
// 8 Multiplies, 4 Additions, 3 Absolute Values, 1 Round. | |
double fast_sin(double x); | |
float fast_sinf(float x); | |
// faster & simple 64bit approximation for sin | |
// ∀ (x ∈ (-∞, ∞)) → (|faster_sin(x) - sin(x)| < 0.0025) | |
// 6 Multiplies, 3 Additions, 2 Absolute Values, 1 Round. | |
double faster_sin(double x); | |
float faster_sinf(float x); | |
// fast & simple 64bit approximation for cos | |
// ∀ (x ∈ (-∞, ∞)) → (|fast_cos(x) - cos(x)| < 0.000051) | |
// 8 Multiplies, 6 Additions, 3 Absolute Values, 1 Floor. | |
double fast_cos(double x); | |
float fast_cosf(float x); | |
// faster & simple 64bit approximation for cos | |
// ∀ (x ∈ (-∞, ∞)) → (|faster_cos(x) - cos(x)| < 0.0025) | |
// 6 Multiplies, 5 Additions, 2 Absolute Values, 1 Floor. | |
double faster_cos(double x); | |
float faster_cosf(float x); | |
/***** Helper Functions *****/ | |
static inline __attribute__((always_inline)) double _normalize_sin(double x); | |
static inline __attribute__((always_inline)) float _normalize_sinf(float x); | |
static inline __attribute__((always_inline)) double _normalize_cos(double x); | |
static inline __attribute__((always_inline)) float _normalize_cosf(float x); | |
static inline __attribute__((always_inline)) double _fast_unnormed(double x); | |
static inline __attribute__((always_inline)) float _fast_unnormedf(float x); | |
static inline __attribute__((always_inline)) double _faster_unnormed(double x); | |
static inline __attribute__((always_inline)) float _faster_unnormedf(float x); | |
/***** Trig Function Implementations *****/ | |
double fast_sin(double x) { return _fast_unnormed(_normalize_sin(x)); } | |
float fast_sinf(float x) { return _fast_unnormedf(_normalize_sinf(x)); } | |
double faster_sin(double x) { return _faster_unnormed(_normalize_sin(x)); } | |
float faster_sinf(float x) { return _faster_unnormedf(_normalize_sinf(x)); } | |
double fast_cos(double x) { return _fast_unnormed(_normalize_cos(x)); } | |
float fast_cosf(float x) { return _fast_unnormedf(_normalize_cosf(x)); } | |
double faster_cos(double x) { return _faster_unnormed(_normalize_cos(x)); } | |
float faster_cosf(float x) { return _faster_unnormedf(_normalize_cosf(x)); } | |
/***** Helper Function Implementations *****/ | |
double _normalize_sin(double x) | |
{ | |
x *= +0.1591549430919; | |
x -= __builtin_floor(x + 0.5); | |
x *= +6.2831853071796; | |
return x; | |
} | |
float _normalize_sinf(float x) | |
{ | |
x *= +0.1591549430919f; | |
x -= __builtin_floorf(x + 0.5f); | |
x *= +6.2831853071796f; | |
return x; | |
} | |
double _normalize_cos(double x) | |
{ | |
x *= +0.1591549430919; | |
x -= __builtin_floor(x + 0.75) - 0.25; | |
x *= +6.2831853071796; | |
return x; | |
} | |
float _normalize_cosf(float x) | |
{ | |
x *= +0.1591549430919f; | |
x -= __builtin_floorf(x + 0.75f) - 0.25f; | |
x *= +6.2831853071796f; | |
return x; | |
} | |
double _fast_unnormed(double x) | |
{ | |
x += -0.3183098861838 * x * __builtin_fabs(x); | |
x += +0.2500000000000 * x * __builtin_fabs(x); | |
x += +0.0684571845286 * x * __builtin_fabs(x); | |
return x; | |
} | |
float _fast_unnormedf(float x) | |
{ | |
x += -0.3183098861838f * x * __builtin_fabsf(x); | |
x += +0.2500000000000f * x * __builtin_fabsf(x); | |
x += +0.0684571845286f * x * __builtin_fabsf(x); | |
return x; | |
} | |
double _faster_unnormed(double x) | |
{ | |
x += -0.3183098861838 * x * __builtin_fabs(x); | |
x += +0.3451140202480 * x * __builtin_fabs(x); | |
return x; | |
} | |
float _faster_unnormedf(float x) | |
{ | |
x += -0.3183098861838f * x * __builtin_fabsf(x); | |
x += +0.3451140202480f * x * __builtin_fabsf(x); | |
return x; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment