Skip to content

Instantly share code, notes, and snippets.

@robodhruv
Created November 7, 2017 18:18
Show Gist options
  • Save robodhruv/97387ced89b6f86205e3dd348aa5d406 to your computer and use it in GitHub Desktop.
Save robodhruv/97387ced89b6f86205e3dd348aa5d406 to your computer and use it in GitHub Desktop.
# include <cstdlib>
# include <iostream>
# include <iomanip>
# include <cmath>
# include <ctime>
# include <omp.h>
using namespace std;
int main ( void );
void ccopy ( int n, double x[], double y[] );
void cfft2 ( int n, double x[], double y[], double w[], double sgn );
void cffti ( int n, double w[] );
double ggl ( double *ds );
void step ( int n, int mj, double a[], double b[], double c[], double d[],
double w[], double sgn );
void timestamp ( );
int main ( void ) {
double error, flops, fnml, mflops, sgn, wtime, z0, z1, *w, *x, *y, *z;
int first, i, icase, it, ln2, n;
int ln2_max = 25;
int nits = 10000;
static double seed;
timestamp ( );
cout << "\n";
cout << "FFT_OPENMP\n";
cout << "\n";
cout << " Demonstrate an implementation of the Fast Fourier Transform\n";
cout << " of a complex data vector, using OpenMP for parallel execution.\n";
cout << "\n";
cout << " Number of processors available = " << omp_get_num_procs ( ) << "\n";
cout << " Number of threads = " << omp_get_max_threads ( ) << "\n";
cout << "\n";
cout << " Accuracy check:\n";
cout << "\n";
cout << " FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n";
cout << "\n";
cout << " N NITS Error Time Time/Call MFLOPS\n";
cout << "\n";
seed = 331.0;
n = 1;
for ( ln2 = 1; ln2 <= ln2_max; ln2++ )
{
n = 2 * n;
w = new double[ n];
x = new double[2*n];
y = new double[2*n];
z = new double[2*n];
first = 1;
for ( icase = 0; icase < 2; icase++ )
{
if ( first )
{
for ( i = 0; i < 2 * n; i = i + 2 )
{
z0 = ggl ( &seed );
z1 = ggl ( &seed );
x[i] = z0;
z[i] = z0;
x[i+1] = z1;
z[i+1] = z1;
}
}
else
{
# pragma omp parallel shared ( n, x, z ) private ( i, z0, z1 )
# pragma omp for nowait
for ( i = 0; i < 2 * n; i = i + 2 )
{
z0 = 0.0;
z1 = 0.0;
x[i] = z0;
z[i] = z0;
x[i+1] = z1;
z[i+1] = z1;
}
}
cffti ( n, w );
if ( first )
{
sgn = + 1.0;
cfft2 ( n, x, y, w, sgn );
sgn = - 1.0;
cfft2 ( n, y, x, w, sgn );
fnm1 = 1.0 / ( double ) n;
error = 0.0;
for ( i = 0; i < 2 * n; i = i + 2 )
{
error = error
+ pow ( z[i] - fnm1 * x[i], 2 )
+ pow ( z[i+1] - fnm1 * x[i+1], 2 );
}
error = sqrt ( fnm1 * error );
cout << " " << setw(12) << n
<< " " << setw(8) << nits
<< " " << setw(12) << error;
first = 0;
}
else
{
wtime = omp_get_wtime ( );
for ( it = 0; it < nits; it++ )
{
sgn = + 1.0;
cfft2 ( n, x, y, w, sgn );
sgn = - 1.0;
cfft2 ( n, y, x, w, sgn );
}
wtime = omp_get_wtime ( ) - wtime;
flops = ( double ) 2 * ( double ) nits
* ( ( double ) 5 * ( double ) n * ( double ) ln2 );
mflops = flops / 1.0E+06 / wtime;
cout << " " << setw(12) << ctime
<< " " << setw(12) << wtime / ( double ) ( 2 * nits )
<< " " << setw(12) << mflops << "\n";
}
}
if ( ( ln2 % 4 ) == 0 )
{
nits = nits / 10;
}
if ( nits < 1 )
{
nits = 1;
}
delete [] w;
delete [] x;
delete [] y;
delete [] z;
}
cout << "\n";
cout << "FFT_OPENMP:\n";
cout << " Normal end of execution.\n";
cout << "\n";
timestamp ( );
return 0;
}
void ccopy ( int n, double x[], double y[] ) {
int i;
for ( i = 0; i < n; i++ )
{
y[i*2+0] = x[i*2+0];
y[i*2+1] = x[i*2+1];
}
return;
}
void cfft2 ( int n, double x[], double y[], double w[], double sgn ) {
int j;
int m;
int mj;
int tgle;
m = ( int ) ( log ( ( double ) n ) / log ( 1.99 ) );
mj = 1;
tgle = 1;
step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn );
if ( n == 2 )
{
return;
}
for ( j = 0; j < m - 2; j++ )
{
mj = mj * 2;
if ( tgle )
{
step ( n, mj, &y[0*2+0], &y[(n/2)*2+0], &x[0*2+0], &x[mj*2+0], w, sgn );
tgle = 0;
}
else
{
step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn );
tgle = 1;
}
}
if ( tgle )
{
ccopy ( n, y, x );
}
mj = n / 2;
step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn );
return;
}
void cffti ( int n, double w[] ) {
double arg;
double aw;
int i;
int n2;
const double pi = 3.141592653589793;
n2 = n / 2;
aw = 2.0 * pi / ( ( double ) n );
# pragma omp parallel shared ( aw, n, w ) private ( arg, i )
# pragma omp for nowait
for ( i = 0; i < n2; i++ )
{
arg = aw * ( ( double ) i );
w[i*2+0] = cos ( arg );
w[i*2+1] = sin ( arg );
}
return;
}
double ggl ( double *seed ) {
double d2 = 0.2147483647e10;
double t;
double value;
t = ( double ) *seed;
t = fmod ( 16807.0 * t, d2 );
*seed = ( double ) t;
value = ( double ) ( ( t - 1.0 ) / ( d2 - 1.0 ) );
return value;
}
void step ( int n, int mj, double a[], double b[], double c[],
double d[], double w[], double sgn ) {
double ambr;
double ambu;
int j;
int ja;
int jb;
int jc;
int jd;
int jw;
int k;
int lj;
int mj2;
double wjw[2];
mj2 = 2 * mj;
lj = n / mj2;
# pragma omp parallel \
shared ( a, b, c, d, lj, mj, mj2, sgn, w ) private ( ambr, ambu, j, ja, jb, jc, jd, jw, k, wjw )
# pragma omp for nowait
for ( j = 0; j < lj; j++ )
{
jw = j * mj;
ja = jw;
jb = ja;
jc = j * mj2;
jd = jc;
wjw[0] = w[jw*2+0];
wjw[1] = w[jw*2+1];
if ( sgn < 0.0 )
{
wjw[1] = - wjw[1];
}
for ( k = 0; k < mj; k++ )
{
c[(jc+k)*2+0] = a[(ja+k)*2+0] + b[(jb+k)*2+0];
c[(jc+k)*2+1] = a[(ja+k)*2+1] + b[(jb+k)*2+1];
ambr = a[(ja+k)*2+0] - b[(jb+k)*2+0];
ambu = a[(ja+k)*2+1] - b[(jb+k)*2+1];
d[(jd+k)*2+0] = wjw[0] * ambr - wjw[1] * ambu;
d[(jd+k)*2+1] = wjw[1] * ambr + wjw[0] * ambu;
}
}
return;
}
void timestamp () {
# define TIME_SIZE 40
static char time_buffer[TIME_SIZE];
const struct tm *tm;
size_t len;
time_t now;
now = time ( NULL );
tm = localtime ( &now );
len = strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );
cout << time_buffer << "\n";
return;
# undef TIME_SIZE
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment