Created
May 9, 2012 20:33
-
-
Save kspaff/2648600 to your computer and use it in GitHub Desktop.
3D FFT in Aspen
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // 3D_FFT.aspen | |
| // 3D FFT using Penci\Slab Decomposition | |
| model fft3d { | |
| // Reminder: When combined with a machine model, Aspen will override these parameters | |
| param P = 64 // --> number of processors | |
| param Z = 24 * mega // --> LL $$ Capacity | |
| param L = 128 // --> Cache line size | |
| // Dimensions of cubic 3D Volume | |
| param n = 8192 | |
| param wordSize = 16 // double complex | |
| param a = 6.3 // constant for cache miss calculation, see TR 5.1.2 | |
| param dataPerProc = (n^3 * wordSize) / P | |
| data fftVolume [n^3 * wordSize] | |
| kernel transpose { | |
| exposes parallelism [P] | |
| requires loads [dataPerProc] from fftVolume | |
| requires stores [dataPerProc] to fftVolume | |
| } | |
| // Local 1D FFT using theoretical bound on FLOPs | |
| kernel localFFT { | |
| exposes parallelism [n^2] | |
| requires flops [5 * n * log2(n)] as dp, complex, simd | |
| // Based on analysis of FFT mem pattern, see TR | |
| requires loads [a * n * max(1, log(n)/log(Z)) * wordSize] from fftVolume | |
| } | |
| kernel exchange { | |
| exposes parallelism [P] | |
| requires messages [(n^3 * wordSize) / P] as allToAll | |
| } | |
| control slab { | |
| localFFT -> transpose // in X | |
| localFFT -> transpose // in Y | |
| exchange | |
| localFFT -> transpose // in Z | |
| } | |
| control main { | |
| localFFT -> transpose // in X | |
| exchange | |
| localFFT -> transpose // in Y | |
| exchange | |
| localFFT -> transpose // in Z | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment