Last active
May 29, 2024 00:42
-
-
Save williamyang98/7aca0ca0f1978c7374a66002892e0d8a to your computer and use it in GitHub Desktop.
Calculate chebyshev polynomial that will approximate f(x) = sin(kx). where k = π/root. Uses gradient descent.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Compiling with clang or gcc | |
// clang main.cpp -o main -march=native -O3 -ffast-math -Wextra -Werror -Wunused-parameter -lm | |
// Use "-march=native -ffast-math -O3" for faster gradient descent | |
// Use "-lm" if linking to glibc math libraries on Linux | |
// Remove "-W*" warning flags if you don't need checks | |
#define _USE_MATH_DEFINES | |
#include <stdio.h> | |
#include <cmath> | |
#include <vector> | |
#define USE_SIGNAL_HANDLER 1 | |
#if USE_SIGNAL_HANDLER | |
static bool is_running = true; | |
#if _WIN32 | |
#define WIN32_LEAN_AND_MEAN | |
#define NOMINMAX | |
#include <windows.h> | |
BOOL WINAPI sighandler(DWORD signum) { | |
if (signum == CTRL_C_EVENT) { | |
fprintf(stderr, "Signal caught, exiting!\n"); | |
is_running = false; | |
return TRUE; | |
} | |
return FALSE; | |
} | |
#else | |
#include <errno.h> | |
#include <signal.h> | |
static void sighandler(int signum) { | |
fprintf(stderr, "Signal caught, exiting! (%d)\n", signum); | |
is_running = false; | |
} | |
#endif | |
#endif | |
// Configuration | |
using grad_t = double; | |
using coefficient_t = double; | |
constexpr size_t PRINT_ITER = 10'000'000; | |
constexpr size_t TOTAL_ITER_ERROR_PLATEAU_THRESHOLD = 10'000; | |
// Learning rate update rule when minimum error reaches a plateau | |
// - If we update than rescale the learning rate | |
// - Otherwise we exit early | |
constexpr bool IS_UPDATE_LEARNING_RATE = true; | |
constexpr grad_t LEARNING_RATE_RESCALER = 1.1; | |
// We are solving for a[n] in g(x) = sum a[n]*x^(2n) | |
// Where f(x) = h(x)*g(x), where: | |
// - f(x) = target function | |
// - h(x) = modifier function to reduce complexity of function g(x) needs to solve | |
// - g(x) = polynomial that can be calculated using Horner's method | |
// https://en.wikipedia.org/wiki/Horner%27s_method | |
// Gradient descent hyperparameters | |
// - We are approximating f(x) in the range: x = [X_SAMPLE_START, X_SAMPLE_END] with TOTAL_SAMPLES | |
// - g(x) = sum a[n]*x^(2n) for n = [0, TOTAL_COEFFICIENTS-1] | |
// - learning rate: INITIAL_LEARNING_RATE | |
// - Decrease learning rate if unstable | |
// - rescaling gradients for de/da[n] by: x^(n*GRAD_RESCALE_MAGNITUDE + GRAD_RESCALE_BIAS) | |
// - NOTE: This drastically improves training speed, you should tweak it | |
// - Decrease GRAD_RESCALE_MAGNITUDE if training is unstable | |
// - NOTE: You should tweak this value before tweaking learning rate | |
// Sample functions to approximate | |
#if 1 | |
// y = sin(2*pi*x) | |
template <typename T> | |
inline static T get_y_target(T x) { | |
return std::sin(T(M_PI)*2.0*x); | |
} | |
template <typename T> | |
inline static T get_h(T x) { | |
// constraint so that roots match sin(2*pi*x) | |
constexpr T B0 = T(0.5*0.5); | |
const T z = x*x; | |
return x*(z-B0); | |
} | |
constexpr size_t TOTAL_SAMPLES = 256; | |
constexpr grad_t X_SAMPLE_START = grad_t(0); | |
constexpr grad_t X_SAMPLE_END = grad_t(0.5); | |
constexpr size_t TOTAL_COEFFICIENTS = 6; | |
constexpr grad_t INITIAL_LEARNING_RATE = grad_t(1e0); | |
constexpr grad_t GRAD_RESCALE_MAGNITUDE = 4; | |
constexpr grad_t GRAD_RESCALE_BIAS = 1; | |
#elif 0 | |
// y = cos(2*pi*x) | |
template <typename T> | |
inline static T get_y_target(T x) { | |
return std::cos(T(M_PI)*2.0*x); | |
} | |
template <typename T> | |
inline static T get_h(T x) { | |
// constraint so that roots match cos(2*pi*x) | |
constexpr T B0 = T(0.25*0.25); | |
constexpr T B1 = T(0.75*0.75); | |
const T z = x*x; | |
return (z-B0)*(z-B1); | |
} | |
constexpr size_t TOTAL_SAMPLES = 256; | |
constexpr grad_t X_SAMPLE_START = grad_t(0); | |
constexpr grad_t X_SAMPLE_END = grad_t(0.5 + 1e-2); | |
constexpr size_t TOTAL_COEFFICIENTS = 6; | |
constexpr grad_t INITIAL_LEARNING_RATE = grad_t(1e0); | |
constexpr grad_t GRAD_RESCALE_MAGNITUDE = 4; | |
constexpr grad_t GRAD_RESCALE_BIAS = 1; | |
#else | |
// y = x^2 * exp(pi*x^2) * sin(2pix) | |
template <typename T> | |
inline static T get_y_target(T x) { | |
const T z = x*x; | |
return z*std::exp(T(M_PI)*z)*std::sin(2*T(M_PI)*x); | |
} | |
template <typename T> | |
inline static T get_h(T x) { | |
constexpr T B0 = T(0.5*0.5); | |
const T z = x*x; | |
return z*x*(z-B0); | |
} | |
constexpr size_t TOTAL_SAMPLES = 256; | |
constexpr grad_t X_SAMPLE_START = grad_t(0); | |
constexpr grad_t X_SAMPLE_END = grad_t(0.5 + 1e-2); | |
constexpr size_t TOTAL_COEFFICIENTS = 8; | |
constexpr grad_t INITIAL_LEARNING_RATE = grad_t(1e0); | |
constexpr grad_t GRAD_RESCALE_MAGNITUDE = 3.6; // needed to be decreased for stable gradient descent | |
constexpr grad_t GRAD_RESCALE_BIAS = 1; | |
#endif | |
inline static coefficient_t get_horner_polynomial(coefficient_t x, coefficient_t* A) { | |
// g(x) = sum a[n]*x^(2n) | |
// Use Horner's method to calculate this efficiently | |
// https://en.wikipedia.org/wiki/Horner%27s_method | |
const coefficient_t z = x*x; | |
coefficient_t g = A[TOTAL_COEFFICIENTS-1]; | |
for (size_t i = 1; i < TOTAL_COEFFICIENTS; i++) { | |
size_t j = TOTAL_COEFFICIENTS-i-1; | |
g = g*z + A[j]; | |
} | |
return g; | |
} | |
int main(int /*argc*/, char** /*argv*/) { | |
#if USE_SIGNAL_HANDLER | |
#if _WIN32 | |
SetConsoleCtrlHandler(sighandler, TRUE); | |
#else | |
struct sigaction sigact; | |
sigact.sa_handler = sighandler; | |
sigemptyset(&sigact.sa_mask); | |
sigact.sa_flags = 0; | |
sigaction(SIGINT, &sigact, nullptr); | |
sigaction(SIGTERM, &sigact, nullptr); | |
sigaction(SIGQUIT, &sigact, nullptr); | |
sigaction(SIGPIPE, &sigact, nullptr); | |
#endif | |
#endif | |
coefficient_t poly[TOTAL_COEFFICIENTS] = {coefficient_t(0)}; | |
static_assert(X_SAMPLE_END > X_SAMPLE_START, "Range of evaluation must be from x_min to x_max"); | |
constexpr grad_t EVAL_STEP = (X_SAMPLE_END-X_SAMPLE_START)/grad_t(TOTAL_SAMPLES); | |
auto X_in = std::vector<grad_t>(TOTAL_SAMPLES); | |
auto Y_expected = std::vector<grad_t>(TOTAL_SAMPLES); | |
auto H_in = std::vector<grad_t>(TOTAL_SAMPLES); | |
for (size_t i = 0; i < TOTAL_SAMPLES; i++) { | |
const grad_t x = X_SAMPLE_START + grad_t(i)*EVAL_STEP; | |
const grad_t y = get_y_target(x); | |
const grad_t h = get_h(x); | |
X_in[i] = x; | |
Y_expected[i] = y; | |
H_in[i] = h; | |
} | |
// normalised gradients for each term | |
// de/da[n] = 2*(f-y)*h*x^2n | |
// = 2*e(x)*h*x^2n | |
// Assume e(x) converges to 0, then e_max = e^2n | |
// de/da[n] = 2*h*x^4n | |
// In reality the x^4n factor is approximated as x^(an+b) | |
// where a,b are hyperparameters | |
grad_t gradient_scale[TOTAL_COEFFICIENTS] = {0.0}; | |
for (size_t i = 0; i < TOTAL_COEFFICIENTS; i++) { | |
grad_t scale = 0.0; | |
for (size_t j = 0; j < TOTAL_SAMPLES; j++) { | |
const grad_t x = grad_t(X_in[j]); | |
const grad_t h = grad_t(H_in[j]); | |
const grad_t n = grad_t(i)*GRAD_RESCALE_MAGNITUDE + GRAD_RESCALE_BIAS; | |
const grad_t x_n = std::pow(x, n); | |
scale += (h*x_n); | |
} | |
scale = std::abs(scale); | |
scale /= grad_t(TOTAL_SAMPLES); | |
gradient_scale[i] = grad_t(1) / scale; | |
} | |
printf("grad_rescale = ["); | |
for (size_t i = 0; i < TOTAL_COEFFICIENTS; i++) { | |
printf("%.2e,", gradient_scale[i]); | |
} | |
printf("]\n"); | |
printf("is_update_learning_rate: %u\n", IS_UPDATE_LEARNING_RATE); | |
constexpr grad_t NORM_SAMPLES = 1.0f / grad_t(TOTAL_SAMPLES); | |
grad_t learning_rate = INITIAL_LEARNING_RATE; | |
coefficient_t best_poly[TOTAL_COEFFICIENTS] = {coefficient_t(0)}; | |
grad_t error_minimum = grad_t(1e6); | |
size_t error_minimum_iter = 0; | |
size_t total_iter_error_plateau = 0; | |
for (size_t iter = 0; ; iter++) { | |
grad_t total_error = grad_t(0); | |
grad_t gradients[TOTAL_COEFFICIENTS] = {grad_t(0)}; | |
for (size_t i = 0; i < TOTAL_SAMPLES; i++) { | |
const coefficient_t x = X_in[i]; | |
const coefficient_t y_given = coefficient_t(H_in[i])*get_horner_polynomial(coefficient_t(x), poly); | |
const grad_t y_expected = Y_expected[i]; | |
const grad_t error = grad_t(y_given) - y_expected; | |
// gradient descent | |
// g(x) = sum a[n]*x^(2n) | |
// f(x) = g(x)*h(x) | |
// e(x) = [f(x) - y]^2 | |
// de/df = 2*(f-y) | |
// df/dg = h | |
// de/dg = de/df * df/dg, chain rule | |
// de/dg = 2*(f-y)*h | |
// de/da[n] = de/dg * dg/da[n] | |
// dg/da[n] = x^(2n) | |
// de/da[n] = 2*(f-y)*h*x^2n | |
const grad_t z = x*x; | |
const grad_t h = H_in[i]; | |
grad_t gradient = error*h*grad_t(2); | |
gradients[0] += gradient; | |
for (size_t j = 1; j < TOTAL_COEFFICIENTS; j++) { | |
gradient *= z; | |
gradients[j] += gradient; | |
} | |
total_error += std::abs(error); | |
} | |
// normalised error and gradient | |
total_error *= NORM_SAMPLES; | |
for (size_t i = 0; i < TOTAL_COEFFICIENTS; i++) { | |
const grad_t scale = NORM_SAMPLES * gradient_scale[i]; | |
gradients[i] *= scale; | |
} | |
// save lowest error polynomial | |
if (total_error < error_minimum && !std::isnan(total_error)) { | |
error_minimum = total_error; | |
error_minimum_iter = iter; | |
total_iter_error_plateau = 0; | |
for (size_t i = 0; i < TOTAL_COEFFICIENTS; i++) { | |
best_poly[i] = poly[i]; | |
} | |
} else { | |
total_iter_error_plateau++; | |
} | |
if (iter % PRINT_ITER == 0) { | |
printf("[%zu] error=%.2e, grad=[", iter, total_error); | |
for (size_t i = 0; i < TOTAL_COEFFICIENTS; i++) { | |
printf("%.2e,", gradients[i]); | |
} | |
printf("]\n"); | |
} | |
// apply gradients | |
bool has_nan = false; | |
for (size_t i = 0; i < TOTAL_COEFFICIENTS; i++) { | |
const grad_t delta = gradients[i] * learning_rate; | |
const grad_t coeff = grad_t(poly[i]) - delta; | |
poly[i] = coefficient_t(coeff); | |
if (std::abs(coeff) > 1e5f) { | |
printf("[%zu] [error] Detected exploding coefficient exiting\n", iter); | |
has_nan = true; | |
break; | |
} | |
if (std::isnan(coeff) || std::isinf(coeff)) { | |
printf("[%zu] [error] Detected NAN in coefficient exiting\n", iter); | |
has_nan = true; | |
break; | |
} | |
} | |
if (has_nan) { | |
printf("[%zu] [error] Early exit due to bad values, printing them now\n", iter); | |
printf(" learning_rate: %.2e\n", learning_rate); | |
printf(" grad: ["); | |
for (size_t i = 0; i < TOTAL_COEFFICIENTS; i++) { | |
printf("%.2e,", gradients[i]); | |
} | |
printf("]\n"); | |
printf(" poly: ["); | |
for (size_t i = 0; i < TOTAL_COEFFICIENTS; i++) { | |
printf("%.2e,", poly[i]); | |
} | |
printf("]\n"); | |
break; | |
} | |
if (total_iter_error_plateau >= TOTAL_ITER_ERROR_PLATEAU_THRESHOLD) { | |
if (IS_UPDATE_LEARNING_RATE) { | |
total_iter_error_plateau = 0; | |
learning_rate *= LEARNING_RATE_RESCALER; | |
} else { | |
printf("[%zu] [info] Exiting since error has plateaued after %zu iters\n", | |
iter, TOTAL_ITER_ERROR_PLATEAU_THRESHOLD); | |
break; | |
} | |
} | |
if (!is_running) break; | |
} | |
printf("\n[BEST RESULTS]\n"); | |
printf("step: %zu\n", error_minimum_iter); | |
printf("mean_absolute_error: %.2e\n", error_minimum); | |
printf("poly[%zu] = {", TOTAL_COEFFICIENTS); | |
for (size_t i = 0; i < TOTAL_COEFFICIENTS; i++) { | |
constexpr size_t total_dp = (sizeof(coefficient_t) == 4) ? 10 : 16; | |
printf("%.*f", int(total_dp), best_poly[i]); | |
if (i != TOTAL_COEFFICIENTS-1) { | |
printf(","); | |
} | |
} | |
printf("}\n"); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Definition of Chebyshev polynomial | |
Tn(cos(x)) = cos(nx) | |
T0(cos(x)) = cos(0x) = 1 | |
T1(cos(x)) = cos(x) | |
T2(cos(x)) = cos(2x) = 2cos(x)^2 - 1 | |
Let X = cos(x) | |
- T0(X) = 1 | |
- T1(X) = X | |
- T2(X) = 2X^2 - 1 | |
// Deriving trigonometric identities | |
e^j(a+b) = e^ja * e^jb | |
cis(a+b) = cis(a) * cis(b) | |
cos(a+b) = cos(a)cos(b) - sin(a)sin(b) | |
sin(a+b) = cos(a)sin(b) + sin(a)cos(b) | |
e^j(a-b) = e^ja * e^j(-b) | |
cis(a-b) = cis(a)*cis(-b) | |
cos(a-b) = cos(a)cos(b) + sin(a)sin(b) | |
sin(a-b) = -cos(a)sin(b) + sin(a)cos(b) | |
cos(a+b) + cos(a-b) = 2cos(a)cos(b) | |
cos(a)cos(b) = 1/2 * [ cos(a+b) + cos(a-b) ] | |
cos((n+1)x) = cos(x)cos(nx) - sin(x)sin(nx) | |
// Derive recursive definition for Chebyshev polynomials using trigonometric identities | |
T_(n+1)(cos(x)) = cos((n+1)x) = cos(x)cos(nx) - sin(x)sin(nx) | |
T_(n+1)(X) = X*Tn(X) - sin(x)sin(nx) | |
T_(n-1)(cos(x)) = cos(-x)cos(nx) - sin(-x)sin(nx) | |
T_(n-1)(X) = X*T_n(X) + sin(x)sin(nx) | |
T_(n+1)(X) + T_(n-1)(X) = 2*X*T_n(X) | |
T_(n+1)(X) = 2*X*T_n(X) - T_(n-1)(X) | |
T3(X) = 2X*T2(X) - T1(X) | |
T3(X) = 2X*(2X^2-1) - X | |
T3(X) = 4X^3 - 3X | |
// Derivation of closed form solutions to cosine integrals | |
cos(nx)*cos(mx) | |
= 1/2 * [ cos((n+m)*x) + cos((n-m)*x) ] | |
- Need to choose a monotonic slice since we are going to use substitution X = cos(x) | |
- For this to work with an integral the mapping between X <-> cos(x) must be reversible | |
int_0^pi cos(kx) dx = 0 | Integrals of harmonics are 0 | |
I(x) = int_0^pi cos(nx)cos(mx) dx | |
= int_0^pi 1/2 * [ cos((n+m)*x) + cos((n-m)*x) ] dx | |
when n != m, I(x) = int_0^pi 1/2 * [ cos(k1*x) + cos(k2*x) ] dx = 0 | |
when n == m, I(x) = int_0^pi 1/2 * [ cos(k1*x) + cos(0) ] dx = int_0^pi 1/2 dx = pi/2 | |
when n == m == 0, I(x) = int_0^pi 1/2 * [cos(0) + cos(0) ] dx = int_0^pi dx = pi | |
// Expressing cosine integrals in terms of Chebyshev polynomials | |
Letting X = cos(x) | |
dX = -sin(x) dx | X^2 = cos^2(x) = 1-sin^2(x) | sin(x) = sqrt(1-X^2) | |
dx = -1/sqrt(1-X^2) dX | |
I(x) = int_1^-1 -Tn(X)*Tm(X)/sqrt(1-X^2) dX | |
I(x) = int_-1^1 Tn(X)*Tm(X)/sqrt(1-X^2) dX | |
when n != m, I(x) = 0 | |
when n == m, I(x) = pi/2 | |
when n == m == 0, I(x) = pi | |
// Using closed form integral solutions to represent arbitrary functions as sum of Chebyshev polynomials | |
- Approximate f(x) as a sum of Chebyshev polynomials | |
f(x) = sum_n An*Tn(x) | |
- Expanding the following integral with f(x) shows we can derive An for Tn(x) | |
I(x) = int_-1^1 f(x)*Tm(x)/sqrt(1-x^2) dx | |
I(x) = sum_n An int_-1^1 Tm(x)Tn(x)/sqrt(1-x^2) dx | substitute f(x) = sum_n An*Tn(x) | Notice the nested sum | |
I(x) = sum_n An In(x) | In(x) = int_-1^1 Tm(x)Tn(x)/sqrt(1-x^2) dx | |
- Substitute our closed form solutions to integrals involving Chebyshev polynomials | |
when m != n, In(x) = 0 | This property is called orthoganality | |
when m == n, In(x) = pi/2 | |
when m == n == 0, In(x) = pi | |
- We can now solve for An for each unique value of n | |
- Get rid of integrals with Tm(x)Tn(x) where m != n, since this will evaluate to 0 | |
I(x) = An In(x) | In(x) = int_-1^1 Tn(x)Tn(x)/sqrt(1-x^2) dx | |
- We have In(x) solved and An is what we want | |
- Solve for I(x) by using initial definition | |
I(x) = int_-1^1 f(x)*Tn(x)/sqrt(1-x^2) dx where m == n | Initial definition | |
I(x) = An*In(x) = int_-1^1 f(x)*Tn(x)/sqrt(1-x^2) dx | |
An = 1/In(x) * int_-1^1 f(x)*Tn(x)/sqrt(1-x^2) dx | |
where In(x) = pi/2 for n != 0 | |
In(x) = pi for n == 0 | |
// How do we calculate I(x) = int_-1^1 f(x)*Tn(x)/sqrt(1-x^2) dx ? | |
- Tm(x) is a Chebyshev polynomial. I.e. Tn(x) = sum_i ai*x^i | |
- Solve the integral as a sum of small integrals | |
- I(x) = sum_j int_aj^bj fj(x)*Tn(x)/sqrt(1-x^2) dx | |
- |bj-aj| == very small so we can use approximations for fj(x) | |
- Use a good approximation for fj(x) in the range x in [aj,bj] | |
- Zero order hold: fj(x) = [f(aj)+f(bj)]/2 | |
- Linear fit: fj(x) = m0 + m1*x where fj(aj)=f(aj) and fj(bj)=f(bj) | |
- Quadratic fit: fj(x) = m0 + m1*x + m2*x^2 | |
- Notice all of these approximations are polynomials | |
- If we approximate fj(x) between [aj,bj] as a polynomial, and Tn(x) is a Chebyshev polynomial | |
- Then Gj(x) = fj(x)*Tn(x) will also be a polynomial | |
- Gj(x) = sum_i ai*x^i | |
I(x) = int_-1^1 f(x)*Tn(x)/sqrt(1-x^2) dx | |
= sum_j int_aj^bj fj(x)*Tn(x)/sqrt(1-x^2) dx | x = [aj,bj] is our region where the polynomial fit fj(x) is made | |
= sum_j int_aj^bj Gj(x)/sqrt(1-x^2) dx | |
= sum_j int_aj^bj sum_i ai*x^i/sqrt(1-x^2) dx | |
= sum_j sum_i ai * int_a^b x^i/sqrt(1-x^2) dx | ai is our polynomial coefficient | |
// Deriving recursive closed form solution to integral of Jn(x) = x^n/sqrt(1-x^2) | |
Jn(x) = int x^n / sqrt(1-x^2) dx | |
= int x^(n-1) * x / sqrt(1-x^2) dx | |
- Using integral by parts: int u dv = uv - int v du | |
- u = x^(n-1) | du = (n-1)*x^(n-2) | |
- dv = x / sqrt(1-x^2) | v = -sqrt(1-x^2) | |
Jn(x) = - x^(n-1) * sqrt(1-x^2) + int sqrt(1-x^2) * (n-1) * x^(n-2) dx | |
= -x^(n-1) * sqrt(1-x^2) + (n-1) * int x^(n-2) * sqrt(1-x^2) dx | |
= -x^(n-1) * sqrt(1-x^2) + (n-1) * int x^(n-2) * (1-x^2) / sqrt(1-x^2) dx | |
= -x^(n-1) * sqrt(1-x^2) - (n-1) * int x^n / sqrt(1-x^2) dx + (n-1) * int x^(n-2) / sqrt(1-x^2) dx | |
= -x^(n-1) * sqrt(1-x^2) - (n-1) In(x) + (n-1) J_(n-2)(x) | |
Jn(x) = 1/n * [ -x^(n-1) * sqrt(1-x^2) + (n-1) * J_(n-2)(x) ] | |
- Get initial solutions for recursive solution | |
J0(x) = int 1 / sqrt(1-x^2) dx = arcsin(x) | |
J1(x) = int x / sqrt(1-x^2) dx = -sqrt(1-x^2) | |
// Putting all together | |
An = 1/In(x) * int_-1^1 f(x)*Tn(x)/sqrt(1-x^2) dx | |
An = 1/In(x) * I(x) | |
where In(x) = pi/2 for n != 0 | |
In(x) = pi for n == 0 | |
I(x) = int_-1^1 f(x)*Tn(x)/sqrt(1-x^2) dx | |
= sum_j sum_i ai * int_a^b x^i/sqrt(1-x^2) dx | |
= sum_j sum_i ai * Ji(x) | |
- j = index of x = [aj,bj] where fj(x) approximates f(x) | |
- i = power of polynomial coefficient in fj(x) = sum_i ai*x^i |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Gradient descent or direct computation
Using gradient descent solver
x = x - round(x)
to keepf(x) = sin(2πx)
within valid approximation range._mm_round_ps
and_mm256_round_ps
are x86 simd instructions so we can vectorise input clamping.coefficient_t
tofloat
in solver and use the initialdouble
solution as our initialpoly[] = {}
.grad_t
asdouble
.float
coefficients.Calculated polynomials for
f(x) = sin(2πx)
and its mean absolute errorUsing
f64
for coefficients and gradient with1024
samples.7.07e-10
{-25.1327411142213464,64.8358266034100694,-67.0768851968012996,38.4999814590310407,-14.0736995967404166,3.2086237284325541}
6.01e-8
{-25.1327327703024501,64.8348800694709126,-67.0481124846729131,38.1547044862267200,-12.3089616248433558}
Using
f32
for coefficients with initial estimate fromf64
solution.2.47e-8
{-25.1327419281,64.8358230591,-67.0768585205,38.5037078857,-14.1102266312,3.2983388901}
6.32e-8
{-25.1327323914,64.8348770142,-67.0481109619,38.1546096802,-12.3083391190}
Polynomials from original article
poly[6] = {-25.1327419281,64.8358230591,-67.0766296387,38.495880127,-14.0496635437,3.16160202026}
@2.98e-8