Created
September 19, 2020 22:39
-
-
Save franzbischoff/136dee5f58e528b6a128665ac025d888 to your computer and use it in GitHub Desktop.
FFT thread-safe
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
R : A Computer Language for Statistical Data Analysis | |
Copyright (C) 1998--2019 The R Core Team | |
Copyright (C) 1995, 1996, 1997 Robert Gentleman and Ross Ihaka | |
This program is free software; you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation; either version 2 of the License, or | |
(at your option) any later version. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with this program; if not, a copy is available at | |
https://www.R-project.org/Licenses/ | |
*/ | |
/* | |
Simplified code of stats::fft() from R, for using directly in | |
Rcpp, C, C++ code. Also working on RcppParallel, at least today | |
2020-09-19. Mostly complying with C++ Standard Library (STD). | |
Still taking more time on benchmarks than stats::fft, but is faster | |
then importing the function from R to use in C++. | |
This code is intended to work without any dependency on R libraries. | |
Just use the fft.cpp and fft.h files. | |
Francisco 'franz' Bischoff | |
*/ | |
#include <iostream> | |
#include "fft.h" | |
namespace FFT | |
{ | |
/* Fast Fourier Transform | |
These routines are based on code by Richard Singleton in the | |
book "Programs for Digital Signal Processing" put out by IEEE. | |
I have translated them to C and moved the memory allocation | |
so that it takes place under the control of the algorithm | |
which calls these; for R, see ./fourier.c | |
~~~~~~~~~~~ | |
void fft_factor(int n, int *maxf, int *maxp) | |
This factorizes the series length and computes the values of | |
maxf and maxp which determine the amount of scratch storage | |
required by the algorithm. | |
If maxf is zero on return, an error occured during factorization. | |
The nature of the error can be determined from the value of maxp. | |
If maxp is zero, an invalid (zero) parameter was passed and | |
if maxp is one, the internal nfac array was too small. This can only | |
happen for series lengths which exceed 12,754,584. | |
PROBLEM (see fftmx below): nfac[] is overwritten by fftmx() in fft_work() | |
------- Consequence: fft_factor() must be called way too often, | |
at least from do_mvfft() [ ../main/fourier.c ] | |
The following arrays need to be allocated following the call to | |
fft_factor and preceding the call to fft_work. | |
work double[4*maxf] | |
iwork int[maxp] | |
int fft_work(double *a, double *b, int nseg, int n, int nspn, | |
int isn, double *work, int *iwork) | |
The routine returns 1 if the transform was completed successfully and | |
0 if invalid values of the parameters were supplied. | |
Ross Ihaka | |
University of Auckland | |
February 1997 | |
========================================================================== | |
Header from the original Singleton algorithm: | |
-------------------------------------------------------------- | |
subroutine: fft | |
multivariate complex fourier transform, computed in place | |
using mixed-radix fast fourier transform algorithm. | |
-------------------------------------------------------------- | |
arrays a and b originally hold the real and imaginary | |
components of the data, and return the real and | |
imaginary components of the resulting fourier coefficients. | |
multivariate data is indexed according to the fortran | |
array element successor function, without limit | |
on the number of implied multiple subscripts. | |
the subroutine is called once for each variate. | |
the calls for a multivariate transform may be in any order. | |
n is the dimension of the current variable. | |
nspn is the spacing of consecutive data values | |
while indexing the current variable. | |
nseg nseg*n*nspn is the total number of complex data values. | |
isn the sign of isn determines the sign of the complex | |
exponential, and the magnitude of isn is normally one. | |
the magnitude of isn determines the indexing increment for a&b. | |
if fft is called twice, with opposite signs on isn, an | |
identity transformation is done...calls can be in either order. | |
the results are scaled by 1/n when the sign of isn is positive. | |
a tri-variate transform with a(n1,n2,n3), b(n1,n2,n3) | |
is computed by | |
call fft(a,b,n2*n3,n1, 1, -1) | |
call fft(a,b,n3 ,n2,n1, -1) | |
call fft(a,b,1, n3,n1*n2,-1) | |
a single-variate transform of n complex data values is computed by | |
call fft(a,b,1,n,1,-1) | |
the data may alternatively be stored in a single complex | |
array a, then the magnitude of isn changed to two to | |
give the correct indexing increment and a(2) used to | |
pass the initial address for the sequence of imaginary | |
values, e.g. | |
call fft(a,a(2),nseg,n,nspn,-2) | |
nfac[15] (array) is working storage for factoring n. the smallest | |
number exceeding the 15 locations provided is 12,754,584. | |
Update in R 3.1.0: nfac[20], increased array size. It is now possible to | |
factor any positive int n, up to 2^31 - 1. | |
*/ | |
fftw::fftw() {} | |
// Destructor | |
fftw::~fftw() | |
{ | |
if (work != nullptr) | |
std::free(work); | |
if (iwork != nullptr) | |
std::free(iwork); | |
if (cplx != nullptr) | |
std::free(cplx); | |
} | |
std::vector<std::complex<double>> fftw::fft(std::vector<double> z, bool inverse) | |
{ | |
int n = z.size(); | |
std::vector<std::complex<double>> res(n); | |
for (int i = 0; i < n; i++) | |
{ | |
res[i] = std::complex<double>(z[i], 0.0); | |
} | |
std::vector<std::complex<double>> final = fft(res, inverse); | |
return final; | |
} | |
/* Fourier Transform for Univariate Spatial */ | |
std::vector<std::complex<double>> fftw::fft(std::vector<std::complex<double>> z, bool inverse) | |
{ | |
int i, inv, maxf, maxp, n; | |
double f; | |
size_t smaxf; | |
size_t maxsize = ((size_t)-1) / 4; | |
n = z.size(); | |
if (inverse) | |
{ | |
f = (double) n; | |
inv = +2; | |
} | |
else | |
{ | |
f = (double) 1.0; | |
inv = -2; | |
} | |
std::vector<std::complex<double>> res(n); | |
if (n > 1) | |
{ | |
fft_factor(n, &maxf, &maxp); | |
if (maxf == 0) | |
{ | |
std::cout << "fft factorization error" << std::endl; | |
} | |
smaxf = maxf; | |
if (smaxf > maxsize) | |
{ | |
std::cout << "fft too large" << std::endl; | |
} | |
work = (double *)std::calloc(4 * smaxf, sizeof(double)); | |
iwork = (int *)std::calloc(maxp, sizeof(int)); | |
cplx = (complex_t *)std::calloc(n, sizeof(complex_t)); | |
for (i = 0; i < n; i++) | |
{ | |
cplx[i].r = z[i].real(); | |
cplx[i].i = z[i].imag(); | |
} | |
fft_work(&cplx[0].r, &cplx[0].i, 1, n, 1, inv, work, iwork); | |
for (i = 0; i < n; i++) | |
{ | |
res[i] = std::complex<double>(cplx[i].r, cplx[i].i) / f; | |
} | |
// for(i = 0; i < 20; i++) | |
// nfac[i] = -1; | |
// | |
// old_n = m_fac = kt = maxf = maxp = -1; | |
if (work != nullptr) { | |
std::free(work); | |
work = nullptr; | |
} | |
if (iwork != nullptr) { | |
std::free(iwork); | |
iwork = nullptr; | |
} | |
if (cplx != nullptr) { | |
std::free(cplx); | |
cplx = nullptr; | |
} | |
} | |
// std::cout << "cplx 2" << std::endl; | |
return res; | |
} | |
/* At the end of factorization, | |
nfac[] contains the factors, | |
m_fac contains the number of factors and | |
kt contains the number of square factors */ | |
/* non-API, but used by package RandomFields */ | |
void fftw::fft_factor(int n, int *pmaxf, int *pmaxp) | |
{ | |
/* fft_factor - factorization check and determination of memory | |
requirements for the fft. | |
On return, *pmaxf will give the maximum factor size | |
and *pmaxp will give the amount of integer scratch storage required. | |
If *pmaxf == 0, there was an error, the error type is indicated by *pmaxp: | |
If *pmaxp == 0 There was an illegal zero parameter among nseg, n, and nspn. | |
If *pmaxp == 1 There were more than 20 factors to ntot. */ | |
int j, jj, k, sqrtk, kchanged; | |
/* check series length */ | |
if (n <= 0) | |
{ | |
old_n = 0; | |
*pmaxf = 0; | |
*pmaxp = 0; | |
return; | |
} | |
else | |
{ | |
old_n = n; | |
} | |
/* determine the factors of n */ | |
m_fac = 0; | |
k = n; /* k := remaining unfactored factor of n */ | |
if (k == 1) | |
{ | |
return; | |
} | |
/* extract square factors first ------------------ */ | |
/* extract 4^2 = 16 separately | |
==> at most one remaining factor 2^2 = 4, done below */ | |
while (k % 16 == 0) | |
{ | |
nfac[m_fac++] = 4; | |
k /= 16; | |
} | |
/* extract 3^2, 5^2, ... */ | |
kchanged = 0; | |
sqrtk = (int)sqrt(k); | |
for (j = 3; j <= sqrtk; j += 2) | |
{ | |
jj = j * j; | |
while (k % jj == 0) | |
{ | |
nfac[m_fac++] = j; | |
k /= jj; | |
kchanged = 1; | |
} | |
if (kchanged) | |
{ | |
kchanged = 0; | |
sqrtk = (int)sqrt(k); | |
} | |
} | |
if (k <= 4) | |
{ | |
kt = m_fac; | |
nfac[m_fac] = k; | |
if (k != 1) | |
{ | |
m_fac++; | |
} | |
} | |
else | |
{ | |
if (k % 4 == 0) | |
{ | |
nfac[m_fac++] = 2; | |
k /= 4; | |
} | |
/* all square factors out now, but k >= 5 still */ | |
kt = m_fac; | |
maxp = MAX(kt + kt + 2, k - 1); | |
j = 2; | |
do | |
{ | |
if (k % j == 0) | |
{ | |
nfac[m_fac++] = j; | |
k /= j; | |
} | |
if (j > INT_MAX - 2) | |
{ | |
break; | |
} | |
j = ((j + 1) / 2) * 2 + 1; | |
} while (j <= k); | |
} | |
if (m_fac <= kt + 1) | |
{ | |
maxp = m_fac + kt + 1; | |
} | |
if (m_fac + kt > 20) | |
{ /* error - too many factors */ | |
old_n = 0; | |
*pmaxf = 0; | |
*pmaxp = 0; | |
return; | |
} | |
else | |
{ | |
if (kt != 0) | |
{ | |
j = kt; | |
while (j != 0) | |
{ | |
nfac[m_fac++] = nfac[--j]; | |
} | |
} | |
maxf = nfac[m_fac - kt - 1]; | |
/* The last squared factor is not necessarily the largest PR#1429 */ | |
if (kt > 0) | |
{ | |
maxf = MAX(nfac[kt - 1], maxf); | |
} | |
if (kt > 1) | |
{ | |
maxf = MAX(nfac[kt - 2], maxf); | |
} | |
if (kt > 2) | |
{ | |
maxf = MAX(nfac[kt - 3], maxf); | |
} | |
} | |
*pmaxf = maxf; | |
*pmaxp = maxp; | |
} | |
int fftw::fft_work(double *a, double *b, int nseg, int n, int nspn, int isn, double *work, int *iwork) | |
{ | |
/* check that factorization was successful */ | |
if (old_n == 0) | |
{ | |
return 0; | |
} | |
/* check that the parameters match those of the factorization call */ | |
if (n != old_n || nseg <= 0 || nspn <= 0 || isn == 0) | |
{ | |
return 0; | |
} | |
/* perform the transform */ | |
size_t mf = maxf; | |
int nspan = n * nspn, ntot = nspan * nseg; | |
fftmx(a, b, ntot, n, nspan, isn, m_fac, kt, | |
work, work + mf, work + 2 * mf, work + 3 * mf, | |
iwork, nfac); | |
return 1; | |
} | |
void fftw::fftmx(double *a, double *b, int ntot, int n, int nspan, int isn, int m, int kt, double *at, double *ck, double *bt, double *sk, int *np, int *nfac) | |
{ | |
/* called from fft_work() */ | |
/* Design BUG: One purpose of fft_factor() would be to compute | |
---------- nfac[] once and for all; and fft_work() [i.e. fftmx ] | |
could reuse the factorization. | |
However: nfac[] is `destroyed' currently in the code below | |
*/ | |
double aa, aj, ajm, ajp, ak, akm, akp; | |
double bb, bj, bjm, bjp, bk, bkm, bkp; | |
double c1, c2 = 0, c3 = 0, c72, cd; | |
double dr, rad; | |
double s1, s120, s2 = 0, s3 = 0, s72, sd; | |
int i, inc, j, jc, jf, jj; | |
int k, k1, k2, k3 = 0, k4, kk, klim, ks, kspan, kspnn; | |
int lim, maxf, mm, nn, nt; | |
// converted from Fortran, so 1-based indexing | |
a--; | |
b--; | |
at--; | |
ck--; | |
bt--; | |
sk--; | |
np--; | |
// nfac has had indexing converted to avoid compiler warnings | |
inc = abs(isn); | |
nt = inc * ntot; | |
ks = inc * nspan; | |
rad = M_PI_4; /* = pi/4 =^= 45 degrees */ | |
s72 = rad / 0.625; /* 72 = 45 / .625 degrees */ | |
c72 = cos(s72); | |
s72 = sin(s72); | |
s120 = 0.5 * M_SQRT_3; /* sin(120) = sqrt(3)/2 */ | |
if (isn <= 0) | |
{ | |
s72 = -s72; | |
s120 = -s120; | |
rad = -rad; | |
} | |
else | |
{ | |
#ifdef SCALING | |
/* scale by 1/n for isn > 0 */ | |
ak = 1.0 / n; | |
for (j = 1; j <= nt; j += inc) | |
{ | |
a[j] *= ak; | |
b[j] *= ak; | |
} | |
#endif | |
} | |
kspan = ks; | |
nn = nt - inc; | |
jc = ks / n; | |
/* sin, cos values are re-initialized each lim steps */ | |
lim = 32; | |
klim = lim * jc; | |
i = 0; | |
jf = 0; | |
maxf = nfac[m - kt - 1]; | |
if (kt > 0) | |
{ | |
maxf = MAX(nfac[kt - 1], maxf); | |
} | |
/* compute fourier transform */ | |
L_start: | |
dr = (8.0 * jc) / kspan; | |
cd = sin(0.5 * dr * rad); | |
cd = 2.0 * cd * cd; | |
sd = sin(dr * rad); | |
kk = 1; | |
i++; | |
if (nfac[i - 1] != 2) | |
{ | |
goto L110; | |
} | |
/* transform for factor of 2 (including rotation factor) */ | |
kspan /= 2; | |
k1 = kspan + 2; | |
do | |
{ | |
do | |
{ | |
k2 = kk + kspan; | |
ak = a[k2]; | |
bk = b[k2]; | |
a[k2] = a[kk] - ak; | |
b[k2] = b[kk] - bk; | |
a[kk] += ak; | |
b[kk] += bk; | |
kk = k2 + kspan; | |
} while (kk <= nn); | |
kk -= nn; | |
} while (kk <= jc); | |
if (kk > kspan) | |
{ | |
goto L_fin; | |
} | |
L60: | |
c1 = 1.0 - cd; | |
s1 = sd; | |
mm = MIN(k1 / 2, klim); | |
goto L80; | |
L70: | |
ak = c1 - (cd * c1 + sd * s1); | |
s1 = (sd * c1 - cd * s1) + s1; | |
/* the following three statements compensate for truncation error. */ | |
/* if rounded arithmetic is used (nowadays always ?!), substitute c1=ak */ | |
#ifdef TRUNCATED_ARITHMETIC | |
c1 = 0.5 / (ak * ak + s1 * s1) + 0.5; | |
s1 = c1 * s1; | |
c1 = c1 * ak; | |
#else | |
c1 = ak; | |
#endif | |
L80: | |
do | |
{ | |
k2 = kk + kspan; | |
ak = a[kk] - a[k2]; | |
bk = b[kk] - b[k2]; | |
a[kk] += a[k2]; | |
b[kk] += b[k2]; | |
a[k2] = c1 * ak - s1 * bk; | |
b[k2] = s1 * ak + c1 * bk; | |
kk = k2 + kspan; | |
} while (kk < nt); | |
k2 = kk - nt; | |
c1 = -c1; | |
kk = k1 - k2; | |
if (kk > k2) | |
{ | |
goto L80; | |
} | |
kk += jc; | |
if (kk <= mm) | |
{ | |
goto L70; | |
} | |
if (kk >= k2) | |
{ | |
k1 = k1 + inc + inc; | |
kk = (k1 - kspan) / 2 + jc; | |
if (kk <= jc + jc) | |
{ | |
goto L60; | |
} | |
goto L_start; | |
} | |
s1 = ((kk - 1) / jc) * dr * rad; | |
c1 = cos(s1); | |
s1 = sin(s1); | |
mm = MIN(k1 / 2, mm + klim); | |
goto L80; | |
/* transform for factor of 3 (optional code) */ | |
L100: | |
k1 = kk + kspan; | |
k2 = k1 + kspan; | |
ak = a[kk]; | |
bk = b[kk]; | |
aj = a[k1] + a[k2]; | |
bj = b[k1] + b[k2]; | |
a[kk] = ak + aj; | |
b[kk] = bk + bj; | |
ak = -0.5 * aj + ak; | |
bk = -0.5 * bj + bk; | |
aj = (a[k1] - a[k2]) * s120; | |
bj = (b[k1] - b[k2]) * s120; | |
a[k1] = ak - bj; | |
b[k1] = bk + aj; | |
a[k2] = ak + bj; | |
b[k2] = bk - aj; | |
kk = k2 + kspan; | |
if (kk < nn) | |
{ | |
goto L100; | |
} | |
kk = kk - nn; | |
if (kk <= kspan) | |
{ | |
goto L100; | |
} | |
goto L290; | |
/* transform for factor of 4 */ | |
L110: | |
if (nfac[i - 1] != 4) | |
{ | |
goto L_f_odd; | |
} | |
kspnn = kspan; | |
kspan /= 4; | |
L120: | |
c1 = 1.0; | |
s1 = 0; | |
mm = MIN(kspan, klim); | |
goto L150; | |
L130: | |
c2 = c1 - (cd * c1 + sd * s1); | |
s1 = (sd * c1 - cd * s1) + s1; | |
/* the following three statements compensate for truncation error. */ | |
/* if rounded arithmetic is used (nowadays always ?!), substitute c1=c2 */ | |
#ifdef TRUNCATED_ARITHMETIC | |
c1 = 0.5 / (c2 * c2 + s1 * s1) + 0.5; | |
s1 = c1 * s1; | |
c1 = c1 * c2; | |
#else | |
c1 = c2; | |
#endif | |
L140: | |
c2 = c1 * c1 - s1 * s1; | |
s2 = c1 * s1 * 2.0; | |
c3 = c2 * c1 - s2 * s1; | |
s3 = c2 * s1 + s2 * c1; | |
L150: | |
k1 = kk + kspan; | |
k2 = k1 + kspan; | |
k3 = k2 + kspan; | |
akp = a[kk] + a[k2]; | |
akm = a[kk] - a[k2]; | |
ajp = a[k1] + a[k3]; | |
ajm = a[k1] - a[k3]; | |
a[kk] = akp + ajp; | |
ajp = akp - ajp; | |
bkp = b[kk] + b[k2]; | |
bkm = b[kk] - b[k2]; | |
bjp = b[k1] + b[k3]; | |
bjm = b[k1] - b[k3]; | |
b[kk] = bkp + bjp; | |
bjp = bkp - bjp; | |
if (isn < 0) | |
{ | |
goto L180; | |
} | |
akp = akm - bjm; | |
akm = akm + bjm; | |
bkp = bkm + ajm; | |
bkm = bkm - ajm; | |
if (s1 == 0.0) | |
{ | |
goto L190; | |
} | |
L160: | |
a[k1] = akp * c1 - bkp * s1; | |
b[k1] = akp * s1 + bkp * c1; | |
a[k2] = ajp * c2 - bjp * s2; | |
b[k2] = ajp * s2 + bjp * c2; | |
a[k3] = akm * c3 - bkm * s3; | |
b[k3] = akm * s3 + bkm * c3; | |
kk = k3 + kspan; | |
if (kk <= nt) | |
{ | |
goto L150; | |
} | |
L170: | |
kk = kk - nt + jc; | |
if (kk <= mm) | |
{ | |
goto L130; | |
} | |
if (kk < kspan) | |
{ | |
goto L200; | |
} | |
kk = kk - kspan + inc; | |
if (kk <= jc) | |
{ | |
goto L120; | |
} | |
if (kspan == jc) | |
{ | |
goto L_fin; | |
} | |
goto L_start; | |
L180: | |
akp = akm + bjm; | |
akm = akm - bjm; | |
bkp = bkm - ajm; | |
bkm = bkm + ajm; | |
if (s1 != 0.0) | |
{ | |
goto L160; | |
} | |
L190: | |
a[k1] = akp; | |
b[k1] = bkp; | |
a[k2] = ajp; | |
b[k2] = bjp; | |
a[k3] = akm; | |
b[k3] = bkm; | |
kk = k3 + kspan; | |
if (kk <= nt) | |
{ | |
goto L150; | |
} | |
goto L170; | |
L200: | |
s1 = ((kk - 1) / jc) * dr * rad; | |
c1 = cos(s1); | |
s1 = sin(s1); | |
mm = MIN(kspan, mm + klim); | |
goto L140; | |
/* transform for factor of 5 (optional code) */ | |
L_f5: | |
c2 = c72 * c72 - s72 * s72; | |
s2 = 2.0 * c72 * s72; | |
L220: | |
k1 = kk + kspan; | |
k2 = k1 + kspan; | |
k3 = k2 + kspan; | |
k4 = k3 + kspan; | |
akp = a[k1] + a[k4]; | |
akm = a[k1] - a[k4]; | |
bkp = b[k1] + b[k4]; | |
bkm = b[k1] - b[k4]; | |
ajp = a[k2] + a[k3]; | |
ajm = a[k2] - a[k3]; | |
bjp = b[k2] + b[k3]; | |
bjm = b[k2] - b[k3]; | |
aa = a[kk]; | |
bb = b[kk]; | |
a[kk] = aa + akp + ajp; | |
b[kk] = bb + bkp + bjp; | |
ak = akp * c72 + ajp * c2 + aa; | |
bk = bkp * c72 + bjp * c2 + bb; | |
aj = akm * s72 + ajm * s2; | |
bj = bkm * s72 + bjm * s2; | |
a[k1] = ak - bj; | |
a[k4] = ak + bj; | |
b[k1] = bk + aj; | |
b[k4] = bk - aj; | |
ak = akp * c2 + ajp * c72 + aa; | |
bk = bkp * c2 + bjp * c72 + bb; | |
aj = akm * s2 - ajm * s72; | |
bj = bkm * s2 - bjm * s72; | |
a[k2] = ak - bj; | |
a[k3] = ak + bj; | |
b[k2] = bk + aj; | |
b[k3] = bk - aj; | |
kk = k4 + kspan; | |
if (kk < nn) | |
{ | |
goto L220; | |
} | |
kk = kk - nn; | |
if (kk <= kspan) | |
{ | |
goto L220; | |
} | |
goto L290; | |
/* transform for odd factors */ | |
L_f_odd: | |
k = nfac[i - 1]; | |
kspnn = kspan; | |
kspan /= k; | |
if (k == 3) | |
{ | |
goto L100; | |
} | |
if (k == 5) | |
{ | |
goto L_f5; | |
} | |
if (k == jf) | |
{ | |
goto L250; | |
} | |
jf = k; | |
s1 = rad / (k / 8.0); | |
c1 = cos(s1); | |
s1 = sin(s1); | |
ck[jf] = 1.0; | |
sk[jf] = 0.0; | |
for (j = 1; j < k; j++) | |
{ /* k is changing as well */ | |
ck[j] = ck[k] * c1 + sk[k] * s1; | |
sk[j] = ck[k] * s1 - sk[k] * c1; | |
k--; | |
ck[k] = ck[j]; | |
sk[k] = -sk[j]; | |
} | |
L250: | |
k1 = kk; | |
k2 = kk + kspnn; | |
aa = a[kk]; | |
bb = b[kk]; | |
ak = aa; | |
bk = bb; | |
j = 1; | |
k1 = k1 + kspan; | |
L260: | |
k2 = k2 - kspan; | |
j++; | |
at[j] = a[k1] + a[k2]; | |
ak = at[j] + ak; | |
bt[j] = b[k1] + b[k2]; | |
bk = bt[j] + bk; | |
j++; | |
at[j] = a[k1] - a[k2]; | |
bt[j] = b[k1] - b[k2]; | |
k1 = k1 + kspan; | |
if (k1 < k2) | |
{ | |
goto L260; | |
} | |
a[kk] = ak; | |
b[kk] = bk; | |
k1 = kk; | |
k2 = kk + kspnn; | |
j = 1; | |
L270: | |
k1 += kspan; | |
k2 -= kspan; | |
jj = j; | |
ak = aa; | |
bk = bb; | |
aj = 0.0; | |
bj = 0.0; | |
k = 1; | |
for (k = 2; k < jf; k++) | |
{ | |
ak += at[k] * ck[jj]; | |
bk += bt[k] * ck[jj]; | |
k++; | |
aj += at[k] * sk[jj]; | |
bj += bt[k] * sk[jj]; | |
jj += j; | |
if (jj > jf) | |
{ | |
jj -= jf; | |
} | |
} | |
k = jf - j; | |
a[k1] = ak - bj; | |
b[k1] = bk + aj; | |
a[k2] = ak + bj; | |
b[k2] = bk - aj; | |
j++; | |
if (j < k) | |
{ | |
goto L270; | |
} | |
kk = kk + kspnn; | |
if (kk <= nn) | |
{ | |
goto L250; | |
} | |
kk = kk - nn; | |
if (kk <= kspan) | |
{ | |
goto L250; | |
} | |
/* multiply by rotation factor (except for factors of 2 and 4) */ | |
L290: | |
if (i == m) | |
{ | |
goto L_fin; | |
} | |
kk = jc + 1; | |
L300: | |
c2 = 1.0 - cd; | |
s1 = sd; | |
mm = MIN(kspan, klim); | |
do | |
{ /* L320: */ | |
c1 = c2; | |
s2 = s1; | |
kk += kspan; | |
do | |
{ /* L330: */ | |
do | |
{ | |
ak = a[kk]; | |
a[kk] = c2 * ak - s2 * b[kk]; | |
b[kk] = s2 * ak + c2 * b[kk]; | |
kk += kspnn; | |
} while (kk <= nt); | |
ak = s1 * s2; | |
s2 = s1 * c2 + c1 * s2; | |
c2 = c1 * c2 - ak; | |
kk += -nt + kspan; | |
} while (kk <= kspnn); | |
kk += -kspnn + jc; | |
if (kk <= mm) | |
{ /* L310: */ | |
c2 = c1 - (cd * c1 + sd * s1); | |
s1 = s1 + (sd * c1 - cd * s1); | |
/* the following three statements compensate for truncation error.*/ | |
/* if rounded arithmetic is used (nowadays always ?!), they may be deleted. */ | |
#ifdef TRUNCATED_ARITHMETIC | |
c1 = 0.5 / (c2 * c2 + s1 * s1) + 0.5; | |
s1 = c1 * s1; | |
c2 = c1 * c2; | |
#endif | |
continue /* goto L320*/; | |
} | |
if (kk >= kspan) | |
{ | |
kk = kk - kspan + jc + inc; | |
if (kk <= jc + jc) | |
{ | |
goto L300; | |
} | |
goto L_start; | |
} | |
s1 = ((kk - 1) / jc) * dr * rad; | |
c2 = cos(s1); | |
s1 = sin(s1); | |
mm = MIN(kspan, mm + klim); | |
} while (1); | |
/*------------------------------------------------------------*/ | |
/* permute the results to normal order---done in two stages */ | |
/* permutation for square factors of n */ | |
L_fin: | |
np[1] = ks; | |
if (kt == 0) | |
{ | |
goto L440; | |
} | |
k = kt + kt + 1; | |
if (m < k) | |
{ | |
k--; | |
} | |
np[k + 1] = jc; | |
for (j = 1; j < k; j++, k--) | |
{ | |
np[j + 1] = np[j] / nfac[j - 1]; | |
np[k] = np[k + 1] * nfac[j - 1]; | |
} | |
k3 = np[k + 1]; | |
kspan = np[2]; | |
kk = jc + 1; | |
k2 = kspan + 1; | |
j = 1; | |
if (n == ntot) | |
{ | |
/* permutation for single-variate transform (optional code) */ | |
L370: | |
do | |
{ | |
ak = a[kk]; | |
a[kk] = a[k2]; | |
a[k2] = ak; | |
bk = b[kk]; | |
b[kk] = b[k2]; | |
b[k2] = bk; | |
kk += inc; | |
k2 += kspan; | |
} while (k2 < ks); | |
L380: | |
do | |
{ | |
k2 -= np[j]; | |
j++; | |
k2 += np[j + 1]; | |
} while (k2 > np[j]); | |
j = 1; | |
do | |
{ | |
if (kk < k2) | |
{ | |
goto L370; | |
} | |
kk += inc; | |
k2 += kspan; | |
} while (k2 < ks); | |
if (kk < ks) | |
{ | |
goto L380; | |
} | |
jc = k3; | |
} | |
else | |
{ | |
/* permutation for multivariate transform */ | |
L400: | |
k = kk + jc; | |
do | |
{ | |
ak = a[kk]; | |
a[kk] = a[k2]; | |
a[k2] = ak; | |
bk = b[kk]; | |
b[kk] = b[k2]; | |
b[k2] = bk; | |
kk += inc; | |
k2 += inc; | |
} while (kk < k); | |
kk += ks - jc; | |
k2 += ks - jc; | |
if (kk < nt) | |
{ | |
goto L400; | |
} | |
k2 += -nt + kspan; | |
kk += -nt + jc; | |
if (k2 < ks) | |
{ | |
goto L400; | |
} | |
do | |
{ | |
do | |
{ | |
k2 -= np[j]; | |
j++; | |
k2 += np[j + 1]; | |
} while (k2 > np[j]); | |
j = 1; | |
do | |
{ | |
if (kk < k2) | |
{ | |
goto L400; | |
} | |
kk += jc; | |
k2 += kspan; | |
} while (k2 < ks); | |
} while (kk < ks); | |
jc = k3; | |
} | |
L440: | |
if (2 * kt + 1 >= m) | |
{ | |
return; | |
} | |
kspnn = np[kt + 1]; | |
/* permutation for square-free factors of n */ | |
/* Here, nfac[] is overwritten... -- now CUMULATIVE ("cumprod") factors */ | |
nn = m - kt; | |
nfac[nn] = 1; | |
for (j = nn; j > kt; j--) | |
{ | |
nfac[j - 1] *= nfac[j]; | |
} | |
kt++; | |
nn = nfac[kt - 1] - 1; | |
jj = 0; | |
j = 0; | |
goto L480; | |
L460: | |
jj -= k2; | |
k2 = kk; | |
k++; | |
kk = nfac[k - 1]; | |
L470: | |
jj += kk; | |
if (jj >= k2) | |
{ | |
goto L460; | |
} | |
np[j] = jj; | |
L480: | |
k2 = nfac[kt - 1]; | |
k = kt + 1; | |
kk = nfac[k - 1]; | |
j++; | |
if (j <= nn) | |
{ | |
goto L470; | |
} | |
/* determine the permutation cycles of length greater than 1 */ | |
j = 0; | |
goto L500; | |
do | |
{ | |
do | |
{ | |
k = kk; | |
kk = np[k]; | |
np[k] = -kk; | |
} while (kk != j); | |
k3 = kk; | |
L500: | |
do | |
{ | |
j++; | |
kk = np[j]; | |
} while (kk < 0); | |
} while (kk != j); | |
np[j] = -j; | |
if (j != nn) | |
{ | |
goto L500; | |
} | |
maxf *= inc; | |
goto L570; | |
/* reorder a and b, following the permutation cycles */ | |
L_ord: | |
do | |
{ | |
j--; | |
} while (np[j] < 0); | |
jj = jc; | |
L520: | |
kspan = MIN(jj, maxf); | |
jj -= kspan; | |
k = np[j]; | |
kk = jc * k + i + jj; | |
for (k1 = kk + kspan, k2 = 1; k1 != kk; | |
k1 -= inc, k2++) | |
{ | |
at[k2] = a[k1]; | |
bt[k2] = b[k1]; | |
} | |
do | |
{ | |
k1 = kk + kspan; | |
k2 = k1 - jc * (k + np[k]); | |
k = -np[k]; | |
do | |
{ | |
a[k1] = a[k2]; | |
b[k1] = b[k2]; | |
k1 -= inc; | |
k2 -= inc; | |
} while (k1 != kk); | |
kk = k2; | |
} while (k != j); | |
for (k1 = kk + kspan, k2 = 1; k1 > kk; | |
k1 -= inc, k2++) | |
{ | |
a[k1] = at[k2]; | |
b[k1] = bt[k2]; | |
} | |
if (jj != 0) | |
{ | |
goto L520; | |
} | |
if (j != 1) | |
{ | |
goto L_ord; | |
} | |
L570: | |
j = k3 + 1; | |
nt = nt - kspnn; | |
i = nt - inc + 1; | |
if (nt >= 0) | |
{ | |
goto L_ord; | |
} | |
} /* fftmx */ | |
} // namespace FFT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef __FFT__ | |
#define __FFT__ | |
#include <vector> | |
#include <complex> | |
namespace FFT | |
{ | |
#ifndef MIN | |
#define MIN(y, x) ((x) < (y) && (x) == (x) ? (x) : (y)) | |
#endif | |
#ifndef MAX | |
#define MAX(y, x) ((x) > (y) && (x) == (x) ? (x) : (y)) | |
#endif | |
#ifndef M_SQRT_3 | |
#define M_SQRT_3 1.732050807568877293527446341506 /* sqrt(3) */ | |
#endif | |
#ifndef M_PI_4 | |
#define M_PI_4 0.785398163397448309615660845820 /* pi/4 */ | |
#endif | |
typedef struct complex | |
{ | |
double r; | |
double i; | |
} complex_t; | |
class fftw | |
{ | |
public: | |
// Constructor | |
fftw(); | |
// Destructor | |
~fftw(); | |
std::vector<std::complex<double>> fft(std::vector<double> z, bool inverse); | |
std::vector<std::complex<double>> fft(std::vector<std::complex<double>> z, bool inverse); | |
private: | |
int old_n = 0; | |
int nfac[20]; | |
int m_fac; | |
int kt; | |
int maxf; | |
int maxp; | |
double *work = nullptr; | |
int *iwork = nullptr; | |
complex_t *cplx = nullptr; | |
/* non-API, but used by package RandomFields */ | |
void fft_factor(int n, int *pmaxf, int *pmaxp); | |
int fft_work(double *a, double *b, int nseg, int n, int nspn, int isn, | |
double *work, int *iwork); | |
void fftmx(double *a, double *b, int ntot, int n, int nspan, int isn, int m, int kt, | |
double *at, double *ck, double *bt, double *sk, int *np, int *nfac); | |
}; | |
} // namespace FFT | |
#endif // __FFT__ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment