Created
April 25, 2023 14:48
-
-
Save al42and/c5b1cf3afe261585102971579c851e42 to your computer and use it in GitHub Desktop.
VkFFT example failing with current "develop" branch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#include "vkFFT.h" | |
const float inputdata[500] = { | |
-3.5, 6.3, 1.2, 0.3, 1.1, -5.7, 5.8, -1.9, -6.3, -1.4, 7.4, 2.4, | |
-9.9, -7.2, 5.4, 6.1, -1.9, -7.6, 1.4, -3.5, 0.7, 5.6, -4.2, -1.1, | |
-4.4, -6.3, -7.2, 4.6, -3.0, -0.9, 7.2, 2.5, -3.6, 6.1, -3.2, -2.1, | |
6.5, -0.4, -9.0, 2.3, 8.4, 4.0, -5.2, -9.0, 4.7, -3.7, -2.0, -9.5, | |
-3.9, -3.6, 7.1, 0.8, -0.6, 5.2, -9.3, -4.5, 5.9, 2.2, -5.8, 5.0, | |
1.2, -0.1, 2.2, 0.2, -7.7, 1.9, -8.4, 4.4, 2.3, -2.9, 6.7, 2.7, | |
5.8, -3.6, 8.9, 8.9, 4.3, 9.1, 9.3, -8.7, 4.1, 9.6, -6.2, 6.6, | |
-9.3, 8.2, 4.5, 6.2, 9.4, -8.0, -6.8, -3.3, 7.2, 1.7, 0.6, -4.9, | |
9.8, 1.3, 3.2, -0.2, 9.9, 4.4, -9.9, -7.2, 4.4, 4.7, 7.2, -0.3, | |
0.3, -2.1, 8.4, -2.1, -6.1, 4.1, -5.9, -2.2, -3.8, 5.2, -8.2, -7.8, | |
-8.8, 6.7, -9.5, -4.2, 0.8, 8.3, 5.2, -9.0, 8.7, 9.8, -9.9, -7.8, | |
-8.3, 9.0, -2.8, -9.2, -9.6, 8.4, 2.5, 6.0, -0.4, 1.3, -0.5, 9.1, | |
-9.5, -0.8, 1.9, -6.2, 4.3, -3.8, 8.6, -1.9, -2.1, -0.4, -7.1, -3.7, | |
9.1, -6.4, -0.6, 2.5, 8.0, -5.2, -9.8, -4.3, 4.5, 1.7, 9.3, 9.2, | |
1.0, 5.3, -4.5, 6.4, -6.6, 3.1, -6.8, 2.1, 2.0, 7.3, 8.6, 5.0, | |
5.2, 0.4, -7.1, 4.5, -9.2, -9.1, 0.2, -6.3, -1.1, -9.6, 7.4, -3.7, | |
-5.5, 2.6, -3.5, -0.7, 9.0, 9.8, -8.0, 3.6, 3.0, -2.2, -2.8, 0.8, | |
9.0, 2.8, 7.7, -0.7, -5.0, -1.8, -2.3, -0.4, -6.2, -9.1, -9.2, 0.5, | |
5.7, -3.9, 2.1, 0.6, 0.4, 9.1, 7.4, 7.1, -2.5, 7.3, 7.8, -4.3, | |
6.3, -0.8, -3.8, -1.5, 6.6, 2.3, 3.9, -4.6, 5.8, -7.4, 5.9, 2.8, | |
4.7, 3.9, -5.4, 9.1, -1.6, -1.9, -4.2, -2.6, 0.6, -5.1, 1.8, 5.2, | |
4.0, -6.2, 6.5, -9.1, 0.5, 2.1, 7.1, -8.6, 7.6, -9.7, -4.6, -5.7, | |
6.1, -1.8, -7.3, 9.4, 8.0, -2.6, -1.8, 5.7, 9.3, -7.9, 7.4, 6.3, | |
2.0, 9.6, -4.5, -6.2, 6.1, 2.3, 0.8, 5.9, -2.8, -3.5, -1.5, 6.0, | |
-4.9, 3.5, 7.7, -4.2, -9.7, 2.4, 8.1, 5.9, 3.4, -7.5, 7.5, 2.6, | |
4.7, 2.7, 2.2, 2.6, 6.2, 7.5, 0.2, -6.4, -2.8, -0.5, -0.3, 0.4, | |
1.2, 3.5, -4.0, -0.5, 9.3, -7.2, 8.5, -5.5, -1.7, -5.3, 0.3, 3.9, | |
-3.6, -3.6, 4.7, -8.1, 1.4, 4.0, 1.3, -4.3, -8.8, -7.3, 6.3, -7.5, | |
-9.0, 9.1, 4.5, -1.9, 1.9, 9.9, -1.7, -9.1, -5.1, 8.5, -9.3, 2.1, | |
-5.8, -3.6, -0.8, -0.9, -3.3, -2.7, 7.0, -7.2, -5.0, 7.4, -1.4, 0.0, | |
-4.5, -9.7, 0.7, -1.0, -9.1, -5.3, 4.3, 3.4, -6.6, 9.8, -1.1, 8.9, | |
5.0, 2.9, 0.2, -2.9, 0.8, 6.7, -0.6, 0.6, 4.1, 5.3, -1.7, -0.3, | |
4.2, 3.7, -8.3, 4.0, 1.3, 6.3, 0.2, 1.3, -1.1, -3.5, 2.8, -7.7, | |
6.2, -4.9, -9.9, 9.6, 3.0, -9.2, -8.0, -3.9, 7.9, -6.1, 6.0, 5.9, | |
9.6, 1.2, 6.2, 3.6, 2.1, 5.8, 9.2, -8.8, 8.8, -3.3, -9.2, 4.6, | |
1.8, 4.6, 2.9, -2.7, 4.2, 7.3, -0.4, 7.7, -7.0, 2.1, 0.3, 3.7, | |
3.3, -8.6, 9.8, 3.6, 3.1, 6.5, -2.4, 7.8, 7.5, 8.4, -2.8, -6.3, | |
-5.1, -2.7, 9.3, -0.8, -9.2, 7.9, 8.9, 3.4, 0.1, -5.3, -6.8, 4.9, | |
4.3, -0.7, -2.2, -3.2, -7.5, -2.3, 0.0, 8.1, -9.2, -2.3, -5.7, 2.1, | |
2.6, 2.0, 0.3, -8.0, -2.0, -7.9, 6.6, 8.4, 4.0, -6.2, -6.9, -7.2, | |
7.7, -5.0, 5.3, 1.9, -5.3, -7.5, 8.8, 8.3, 9.0, 8.1, 3.2, 1.2, | |
-5.4, -0.2, 2.1, -5.2, 9.5, 5.9, 5.6, -7.8, | |
}; | |
void handleFftError(VkFFTResult result, const std::string &msg) { | |
if (result == VKFFT_SUCCESS) { | |
return; | |
} | |
std::cerr << msg << ": " << getVkFFTErrorString(result) << std::endl; | |
throw std::exception{}; | |
} | |
void handleHipError(hipError_t error) { | |
if (error == hipSuccess) { | |
return; | |
} | |
std::cerr << hipGetErrorName(error) << std::endl; | |
throw std::exception{}; | |
} | |
#define DIM 3 | |
#define XX 0 | |
#define YY 1 | |
#define ZZ 2 | |
typedef int ivec[DIM]; | |
const ivec realGridSize = {5, 5, 10}; | |
const ivec realGridSizePadded = {realGridSize[XX], realGridSize[YY], ((realGridSize[ZZ] / 2) + 1) * 2}; | |
const ivec complexGridSizePadded = {realGridSize[XX], realGridSize[YY], (realGridSize[ZZ] / 2) + 1}; | |
const int size = complexGridSizePadded[0] * complexGridSizePadded[1] * complexGridSizePadded[2]; | |
const int sizeInReals = size * 2; | |
constexpr bool sc_performOutOfPlaceFFT = false; | |
int main() { | |
hipDevice_t queue_device_ = 0; | |
hipSetDevice(queue_device_); | |
hipStream_t stream; | |
hipStreamCreate(&stream); | |
// Setup VkFFT plan | |
VkFFTConfiguration configuration_ = {}; | |
VkFFTApplication application_ = {}; | |
VkFFTLaunchParams launchParams = {}; | |
uint64_t bufferSize_; | |
uint64_t inputBufferSize_; | |
configuration_.FFTdim = 3; | |
configuration_.size[0] = realGridSize[ZZ]; | |
configuration_.size[1] = realGridSize[YY]; | |
configuration_.size[2] = realGridSize[XX]; | |
configuration_.performR2C = 1; | |
configuration_.device = &queue_device_; | |
configuration_.num_streams = 1; | |
bufferSize_ = sizeInReals * sizeof(float); | |
configuration_.bufferSize = &bufferSize_; | |
configuration_.aimThreads = 64; // Tuned for AMD GCN architecture | |
configuration_.bufferStride[0] = complexGridSizePadded[ZZ]; | |
configuration_.bufferStride[1] = complexGridSizePadded[ZZ] * complexGridSizePadded[YY]; | |
configuration_.bufferStride[2] = complexGridSizePadded[ZZ] * | |
complexGridSizePadded[YY] * | |
complexGridSizePadded[XX]; | |
configuration_.isInputFormatted = 1; | |
configuration_.inverseReturnToInputBuffer = 1; | |
inputBufferSize_ = realGridSizePadded[XX] * realGridSizePadded[YY] * | |
realGridSizePadded[ZZ] * sizeof(float); | |
configuration_.inputBufferSize = &inputBufferSize_; | |
configuration_.inputBufferStride[0] = realGridSizePadded[ZZ]; | |
configuration_.inputBufferStride[1] = | |
realGridSizePadded[ZZ] * realGridSizePadded[YY]; | |
configuration_.inputBufferStride[2] = | |
realGridSizePadded[ZZ] * realGridSizePadded[YY] * realGridSizePadded[XX]; | |
VkFFTResult result = initializeVkFFT(&application_, configuration_); | |
handleFftError(result, "Initializing VkFFT"); | |
// Allocate and copy data | |
float *realGrid; | |
hipMalloc(&realGrid, sizeInReals * sizeof(float)); | |
float *complexGrid; | |
hipMalloc(&complexGrid, sizeInReals * sizeof(float)); | |
hipError_t res = hipMemcpy(realGrid, inputdata, sizeInReals * sizeof(float), | |
hipMemcpyHostToDevice); | |
handleHipError(res); | |
application_.configuration.stream = &stream; | |
launchParams.inputBuffer = reinterpret_cast<void **>(&realGrid); | |
launchParams.buffer = reinterpret_cast<void **>(&complexGrid); | |
// Run FFT | |
result = VkFFTAppend(&application_, -1, &launchParams); | |
handleFftError(result, "VkFFT: Real to complex"); | |
res = hipStreamSynchronize(stream); | |
handleHipError(res); | |
result = VkFFTAppend(&application_, 1, &launchParams); | |
handleFftError(result, "VkFFT: Complex to real"); | |
res = hipStreamSynchronize(stream); | |
handleHipError(res); | |
// Check the results | |
std::vector<float> rtResult(sizeInReals); | |
res = hipMemcpy(rtResult.data(), realGrid, sizeInReals * sizeof(float), | |
hipMemcpyDeviceToHost); | |
const float normalizationConstant = | |
1.0 / (realGridSize[XX] * realGridSize[YY] * realGridSize[ZZ]); | |
for (int i = 0; i < realGridSize[XX] * realGridSize[YY]; i++) { | |
const float *expectedArr = inputdata + i * realGridSizePadded[ZZ]; | |
const float *actualArr = rtResult.data() + i * realGridSizePadded[ZZ]; | |
for (size_t j = 0; j < realGridSize[ZZ]; j++) { | |
float diff = expectedArr[j] - actualArr[j] * normalizationConstant; | |
if (std::fabs(diff) > 1e-5) { | |
std::cerr << "Fail at index {" << i << ", " << j << "}: got " | |
<< actualArr[j]*normalizationConstant << ", expected " << expectedArr[j] << std::endl; | |
} | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment