Skip to content

Instantly share code, notes, and snippets.

@glandium

glandium/command Secret

Created November 1, 2022 06:30
Show Gist options
  • Save glandium/273f01e7c825750854e68d9bf6049d4f to your computer and use it in GitHub Desktop.
Save glandium/273f01e7c825750854e68d9bf6049d4f to your computer and use it in GitHub Desktop.
clang -std=gnu99 --target=arm-linux-androideabi -o fft.o -c -march=armv7-a -mthumb -Oz fft.i
# 1 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft.c"
# 1 "<built-in>" 1
# 1 "<built-in>" 3
# 388 "<built-in>" 3
# 1 "<command line>" 1
# 1 "<built-in>" 2
# 1 "/builds/worker/checkouts/gecko/config/gcc_hidden.h" 1
#pragma GCC visibility push(hidden)
# 2 "<built-in>" 2
# 1 "/builds/worker/workspace/obj-build/mozilla-config.h" 1
# 10 "/builds/worker/workspace/obj-build/mozilla-config.h"
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wreserved-id-macro"
# 215 "/builds/worker/workspace/obj-build/mozilla-config.h"
#pragma clang diagnostic pop
# 3 "<built-in>" 2
# 1 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft.c" 2
# 12 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft.c"
# 1 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/aom_dsp_common.h" 1
# 15 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/aom_dsp_common.h"
# 1 "/builds/worker/checkouts/gecko/media/libaom/config/linux/arm/config/aom_config.h" 1
# 16 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/aom_dsp_common.h" 2
# 1 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h" 1
# 15 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h"
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/stddef.h" 1
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/stddef.h" 3
#pragma GCC visibility push(default)
# 1 "/builds/worker/fetches/clang/lib/clang/16.0.0/include/stddef.h" 1 3
# 35 "/builds/worker/fetches/clang/lib/clang/16.0.0/include/stddef.h" 3
typedef int ptrdiff_t;
# 46 "/builds/worker/fetches/clang/lib/clang/16.0.0/include/stddef.h" 3
typedef unsigned int size_t;
# 74 "/builds/worker/fetches/clang/lib/clang/16.0.0/include/stddef.h" 3
typedef unsigned int wchar_t;
# 4 "/builds/worker/workspace/obj-build/dist/system_wrappers/stddef.h" 2 3
#pragma GCC visibility pop
# 16 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h" 2
# 52 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h"
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/stdint.h" 1
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/stdint.h" 3
#pragma GCC visibility push(default)
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/stdint.h" 1 3
# 32 "/builds/worker/fetches/android-ndk/sysroot/usr/include/stdint.h" 3
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/bits/wchar_limits.h" 1 3
# 36 "/builds/worker/fetches/android-ndk/sysroot/usr/include/bits/wchar_limits.h" 3
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 1 3
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 3
#pragma GCC visibility push(default)
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/sys/cdefs.h" 1 3
# 356 "/builds/worker/fetches/android-ndk/sysroot/usr/include/sys/cdefs.h" 3
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/versioning.h" 1 3
# 357 "/builds/worker/fetches/android-ndk/sysroot/usr/include/sys/cdefs.h" 2 3
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/android/api-level.h" 1 3
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/android/api-level.h" 3
#pragma GCC visibility push(default)
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/api-level.h" 1 3
# 39 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/api-level.h" 3
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 1 3
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 3
#pragma GCC visibility push(default)
#pragma GCC visibility pop
# 40 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/api-level.h" 2 3
# 150 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/api-level.h" 3
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/bits/get_device_api_level_inlines.h" 1 3
# 33 "/builds/worker/fetches/android-ndk/sysroot/usr/include/bits/get_device_api_level_inlines.h" 3
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 1 3
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 3
#pragma GCC visibility push(default)
#pragma GCC visibility pop
# 34 "/builds/worker/fetches/android-ndk/sysroot/usr/include/bits/get_device_api_level_inlines.h" 2 3
int __system_property_get(const char* __name, char* __value);
int atoi(const char* __s) __attribute__((__pure__));
static __inline int android_get_device_api_level() {
char value[92] = { 0 };
if (__system_property_get("ro.build.version.sdk", value) < 1) return -1;
int api_level = atoi(value);
return (api_level > 0) ? api_level : -1;
}
# 151 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/api-level.h" 2 3
# 4 "/builds/worker/workspace/obj-build/dist/system_wrappers/android/api-level.h" 2 3
#pragma GCC visibility pop
# 358 "/builds/worker/fetches/android-ndk/sysroot/usr/include/sys/cdefs.h" 2 3
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/ndk-version.h" 1 3
# 360 "/builds/worker/fetches/android-ndk/sysroot/usr/include/sys/cdefs.h" 2 3
# 4 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 2 3
#pragma GCC visibility pop
# 37 "/builds/worker/fetches/android-ndk/sysroot/usr/include/bits/wchar_limits.h" 2 3
# 33 "/builds/worker/fetches/android-ndk/sysroot/usr/include/stdint.h" 2 3
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/stddef.h" 1 3
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/stddef.h" 3
#pragma GCC visibility push(default)
# 1 "/builds/worker/fetches/clang/lib/clang/16.0.0/include/stddef.h" 1 3
# 4 "/builds/worker/workspace/obj-build/dist/system_wrappers/stddef.h" 2 3
#pragma GCC visibility pop
# 34 "/builds/worker/fetches/android-ndk/sysroot/usr/include/stdint.h" 2 3
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 1 3
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 3
#pragma GCC visibility push(default)
#pragma GCC visibility pop
# 35 "/builds/worker/fetches/android-ndk/sysroot/usr/include/stdint.h" 2 3
typedef signed char __int8_t;
typedef unsigned char __uint8_t;
typedef short __int16_t;
typedef unsigned short __uint16_t;
typedef int __int32_t;
typedef unsigned int __uint32_t;
typedef long long __int64_t;
typedef unsigned long long __uint64_t;
typedef int __intptr_t;
typedef unsigned int __uintptr_t;
typedef __int8_t int8_t;
typedef __uint8_t uint8_t;
typedef __int16_t int16_t;
typedef __uint16_t uint16_t;
typedef __int32_t int32_t;
typedef __uint32_t uint32_t;
typedef __int64_t int64_t;
typedef __uint64_t uint64_t;
typedef __intptr_t intptr_t;
typedef __uintptr_t uintptr_t;
typedef int8_t int_least8_t;
typedef uint8_t uint_least8_t;
typedef int16_t int_least16_t;
typedef uint16_t uint_least16_t;
typedef int32_t int_least32_t;
typedef uint32_t uint_least32_t;
typedef int64_t int_least64_t;
typedef uint64_t uint_least64_t;
typedef int8_t int_fast8_t;
typedef uint8_t uint_fast8_t;
typedef int64_t int_fast64_t;
typedef uint64_t uint_fast64_t;
typedef int32_t int_fast16_t;
typedef uint32_t uint_fast16_t;
typedef int32_t int_fast32_t;
typedef uint32_t uint_fast32_t;
typedef uint64_t uintmax_t;
typedef int64_t intmax_t;
# 4 "/builds/worker/workspace/obj-build/dist/system_wrappers/stdint.h" 2 3
#pragma GCC visibility pop
# 53 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h" 2
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/inttypes.h" 1
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/inttypes.h" 3
#pragma GCC visibility push(default)
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/inttypes.h" 1 3
# 22 "/builds/worker/fetches/android-ndk/sysroot/usr/include/inttypes.h" 3
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/stdint.h" 1 3
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/stdint.h" 3
#pragma GCC visibility push(default)
#pragma GCC visibility pop
# 23 "/builds/worker/fetches/android-ndk/sysroot/usr/include/inttypes.h" 2 3
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 1 3
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 3
#pragma GCC visibility push(default)
#pragma GCC visibility pop
# 24 "/builds/worker/fetches/android-ndk/sysroot/usr/include/inttypes.h" 2 3
# 252 "/builds/worker/fetches/android-ndk/sysroot/usr/include/inttypes.h" 3
typedef struct {
intmax_t quot;
intmax_t rem;
} imaxdiv_t;
# 264 "/builds/worker/fetches/android-ndk/sysroot/usr/include/inttypes.h" 3
intmax_t strtoimax(const char* __s, char** __end_ptr, int __base);
uintmax_t strtoumax(const char* __s, char** __end_ptr, int __base);
# 4 "/builds/worker/workspace/obj-build/dist/system_wrappers/inttypes.h" 2 3
#pragma GCC visibility pop
# 61 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h" 2
# 82 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h"
size_t aom_uleb_size_in_bytes(uint64_t value);
int aom_uleb_decode(const uint8_t *buffer, size_t available, uint64_t *value,
size_t *length);
int aom_uleb_encode(uint64_t value, size_t available, uint8_t *coded_value,
size_t *coded_size);
int aom_uleb_encode_fixed_size(uint64_t value, size_t available,
size_t pad_to_size, uint8_t *coded_value,
size_t *coded_size);
# 18 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/aom_dsp_common.h" 2
# 1 "/builds/worker/checkouts/gecko/third_party/aom/aom_ports/mem.h" 1
# 19 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/aom_dsp_common.h" 2
# 52 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/aom_dsp_common.h"
typedef uint8_t qm_val_t;
typedef int64_t tran_high_t;
typedef int32_t tran_low_t;
static inline uint8_t clip_pixel(int val) {
return (val > 255) ? 255 : (val < 0) ? 0 : val;
}
static inline int clamp(int value, int low, int high) {
return value < low ? low : (value > high ? high : value);
}
static inline int64_t clamp64(int64_t value, int64_t low, int64_t high) {
return value < low ? low : (value > high ? high : value);
}
static inline double fclamp(double value, double low, double high) {
return value < low ? low : (value > high ? high : value);
}
static inline uint16_t clip_pixel_highbd(int val, int bd) {
switch (bd) {
case 8:
default: return (uint16_t)clamp(val, 0, 255);
case 10: return (uint16_t)clamp(val, 0, 1023);
case 12: return (uint16_t)clamp(val, 0, 4095);
}
}
static inline unsigned int negative_to_zero(int value) {
return value & ~(value >> (sizeof(value) * 8 - 1));
}
# 13 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft.c" 2
# 1 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft_common.h" 1
# 45 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft_common.h"
typedef void (*aom_fft_1d_func_t)(const float *input, float *output,
int stride);
void aom_fft1d_4_float(const float *input, float *output, int stride);
void aom_fft1d_8_float(const float *input, float *output, int stride);
void aom_fft1d_16_float(const float *input, float *output, int stride);
void aom_fft1d_32_float(const float *input, float *output, int stride);
typedef void (*aom_fft_transpose_func_t)(const float *input, float *output,
int n);
# 73 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft_common.h"
typedef void (*aom_fft_unpack_func_t)(const float *input, float *output, int n);
# 90 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft_common.h"
void aom_fft_2d_gen(const float *input, float *temp, float *output, int n,
aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose,
aom_fft_unpack_func_t unpack, int vec_size);
# 106 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft_common.h"
void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n,
aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi,
aom_fft_1d_func_t ifft_multi,
aom_fft_transpose_func_t transpose, int vec_size);
# 14 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft.c" 2
static inline void simple_transpose(const float *A, float *B, int n) {
for (int y = 0; y < n; y++) {
for (int x = 0; x < n; x++) {
B[y * n + x] = A[x * n + y];
}
}
}
# 33 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft.c"
static inline void unpack_2d_output(const float *col_fft, float *output,
int n) {
for (int y = 0; y <= n / 2; ++y) {
const int y2 = y + n / 2;
const int y_extra = y2 > n / 2 && y2 < n;
for (int x = 0; x <= n / 2; ++x) {
const int x2 = x + n / 2;
const int x_extra = x2 > n / 2 && x2 < n;
output[2 * (y * n + x)] =
col_fft[y * n + x] - (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
output[2 * (y * n + x) + 1] = (y_extra ? col_fft[y2 * n + x] : 0) +
(x_extra ? col_fft[y * n + x2] : 0);
if (y_extra) {
output[2 * ((n - y) * n + x)] =
col_fft[y * n + x] +
(x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
output[2 * ((n - y) * n + x) + 1] =
-(y_extra ? col_fft[y2 * n + x] : 0) +
(x_extra ? col_fft[y * n + x2] : 0);
}
}
}
}
void aom_fft_2d_gen(const float *input, float *temp, float *output, int n,
aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose,
aom_fft_unpack_func_t unpack, int vec_size) {
for (int x = 0; x < n; x += vec_size) {
tform(input + x, output + x, n);
}
transpose(output, temp, n);
for (int x = 0; x < n; x += vec_size) {
tform(temp + x, output + x, n);
}
transpose(output, temp, n);
unpack(temp, output, n);
}
static inline void store_float(float *output, float input) { *output = input; }
static inline float add_float(float a, float b) { return a + b; }
static inline float sub_float(float a, float b) { return a - b; }
static inline float mul_float(float a, float b) { return a * b; }
void aom_fft1d_2_float(const float *input, float *output, int stride) { const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); store_float(output + 0 * stride, i0 + i1); store_float(output + 1 * stride, i0 - i1); };
void aom_fft1d_4_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float w0 = add_float(i0, i2); const float w1 = sub_float(i0, i2); const float w2 = add_float(i1, i3); const float w3 = sub_float(i1, i3); store_float(output + 0 * stride, add_float(w0, w2)); store_float(output + 1 * stride, w1); store_float(output + 2 * stride, sub_float(w0, w2)); store_float(output + 3 * stride, sub_float(kWeight0, w3)); };
void aom_fft1d_8_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float kWeight2 = (float)(0.707107f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float i4 = *(input + 4 * stride); const float i5 = *(input + 5 * stride); const float i6 = *(input + 6 * stride); const float i7 = *(input + 7 * stride); const float w0 = add_float(i0, i4); const float w1 = sub_float(i0, i4); const float w2 = add_float(i2, i6); const float w3 = sub_float(i2, i6); const float w4 = add_float(w0, w2); const float w5 = sub_float(w0, w2); const float w7 = add_float(i1, i5); const float w8 = sub_float(i1, i5); const float w9 = add_float(i3, i7); const float w10 = sub_float(i3, i7); const float w11 = add_float(w7, w9); const float w12 = sub_float(w7, w9); store_float(output + 0 * stride, add_float(w4, w11)); store_float(output + 1 * stride, add_float(w1, mul_float(kWeight2, sub_float(w8, w10)))); store_float(output + 2 * stride, w5); store_float(output + 3 * stride, sub_float(w1, mul_float(kWeight2, sub_float(w8, w10)))); store_float(output + 4 * stride, sub_float(w4, w11)); store_float(output + 5 * stride, sub_float(sub_float(kWeight0, w3), mul_float(kWeight2, add_float(w10, w8)))); store_float(output + 6 * stride, sub_float(kWeight0, w12)); store_float(output + 7 * stride, sub_float(w3, mul_float(kWeight2, add_float(w10, w8)))); };
void aom_fft1d_16_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float kWeight2 = (float)(0.707107f); const float kWeight3 = (float)(0.92388f); const float kWeight4 = (float)(0.382683f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float i4 = *(input + 4 * stride); const float i5 = *(input + 5 * stride); const float i6 = *(input + 6 * stride); const float i7 = *(input + 7 * stride); const float i8 = *(input + 8 * stride); const float i9 = *(input + 9 * stride); const float i10 = *(input + 10 * stride); const float i11 = *(input + 11 * stride); const float i12 = *(input + 12 * stride); const float i13 = *(input + 13 * stride); const float i14 = *(input + 14 * stride); const float i15 = *(input + 15 * stride); const float w0 = add_float(i0, i8); const float w1 = sub_float(i0, i8); const float w2 = add_float(i4, i12); const float w3 = sub_float(i4, i12); const float w4 = add_float(w0, w2); const float w5 = sub_float(w0, w2); const float w7 = add_float(i2, i10); const float w8 = sub_float(i2, i10); const float w9 = add_float(i6, i14); const float w10 = sub_float(i6, i14); const float w11 = add_float(w7, w9); const float w12 = sub_float(w7, w9); const float w14 = add_float(w4, w11); const float w15 = sub_float(w4, w11); const float w16[2] = { add_float(w1, mul_float(kWeight2, sub_float(w8, w10))), sub_float(sub_float(kWeight0, w3), mul_float(kWeight2, add_float(w10, w8))) }; const float w18[2] = { sub_float(w1, mul_float(kWeight2, sub_float(w8, w10))), sub_float(w3, mul_float(kWeight2, add_float(w10, w8))) }; const float w19 = add_float(i1, i9); const float w20 = sub_float(i1, i9); const float w21 = add_float(i5, i13); const float w22 = sub_float(i5, i13); const float w23 = add_float(w19, w21); const float w24 = sub_float(w19, w21); const float w26 = add_float(i3, i11); const float w27 = sub_float(i3, i11); const float w28 = add_float(i7, i15); const float w29 = sub_float(i7, i15); const float w30 = add_float(w26, w28); const float w31 = sub_float(w26, w28); const float w33 = add_float(w23, w30); const float w34 = sub_float(w23, w30); const float w35[2] = { add_float(w20, mul_float(kWeight2, sub_float(w27, w29))), sub_float(sub_float(kWeight0, w22), mul_float(kWeight2, add_float(w29, w27))) }; const float w37[2] = { sub_float(w20, mul_float(kWeight2, sub_float(w27, w29))), sub_float(w22, mul_float(kWeight2, add_float(w29, w27))) }; store_float(output + 0 * stride, add_float(w14, w33)); store_float(output + 1 * stride, add_float(w16[0], add_float(mul_float(kWeight3, w35[0]), mul_float(kWeight4, w35[1])))); store_float(output + 2 * stride, add_float(w5, mul_float(kWeight2, sub_float(w24, w31)))); store_float(output + 3 * stride, add_float(w18[0], add_float(mul_float(kWeight4, w37[0]), mul_float(kWeight3, w37[1])))); store_float(output + 4 * stride, w15); store_float(output + 5 * stride, add_float(w18[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w37[0])), mul_float(kWeight3, w37[1])))); store_float(output + 6 * stride, sub_float(w5, mul_float(kWeight2, sub_float(w24, w31)))); store_float(output + 7 * stride, add_float(w16[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w35[0])), mul_float(kWeight4, w35[1])))); store_float(output + 8 * stride, sub_float(w14, w33)); store_float(output + 9 * stride, add_float(w16[1], sub_float(mul_float(kWeight3, w35[1]), mul_float(kWeight4, w35[0])))); store_float(output + 10 * stride, sub_float(sub_float(kWeight0, w12), mul_float(kWeight2, add_float(w31, w24)))); store_float(output + 11 * stride, add_float(w18[1], sub_float(mul_float(kWeight4, w37[1]), mul_float(kWeight3, w37[0])))); store_float(output + 12 * stride, sub_float(kWeight0, w34)); store_float(output + 13 * stride, sub_float(sub_float(kWeight0, w18[1]), sub_float(mul_float(kWeight3, w37[0]), mul_float(kWeight4, w37[1])))); store_float(output + 14 * stride, sub_float(w12, mul_float(kWeight2, add_float(w31, w24)))); store_float(output + 15 * stride, sub_float(sub_float(kWeight0, w16[1]), sub_float(mul_float(kWeight4, w35[0]), mul_float(kWeight3, w35[1])))); };
void aom_fft1d_32_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float kWeight2 = (float)(0.707107f); const float kWeight3 = (float)(0.92388f); const float kWeight4 = (float)(0.382683f); const float kWeight5 = (float)(0.980785f); const float kWeight6 = (float)(0.19509f); const float kWeight7 = (float)(0.83147f); const float kWeight8 = (float)(0.55557f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float i4 = *(input + 4 * stride); const float i5 = *(input + 5 * stride); const float i6 = *(input + 6 * stride); const float i7 = *(input + 7 * stride); const float i8 = *(input + 8 * stride); const float i9 = *(input + 9 * stride); const float i10 = *(input + 10 * stride); const float i11 = *(input + 11 * stride); const float i12 = *(input + 12 * stride); const float i13 = *(input + 13 * stride); const float i14 = *(input + 14 * stride); const float i15 = *(input + 15 * stride); const float i16 = *(input + 16 * stride); const float i17 = *(input + 17 * stride); const float i18 = *(input + 18 * stride); const float i19 = *(input + 19 * stride); const float i20 = *(input + 20 * stride); const float i21 = *(input + 21 * stride); const float i22 = *(input + 22 * stride); const float i23 = *(input + 23 * stride); const float i24 = *(input + 24 * stride); const float i25 = *(input + 25 * stride); const float i26 = *(input + 26 * stride); const float i27 = *(input + 27 * stride); const float i28 = *(input + 28 * stride); const float i29 = *(input + 29 * stride); const float i30 = *(input + 30 * stride); const float i31 = *(input + 31 * stride); const float w0 = add_float(i0, i16); const float w1 = sub_float(i0, i16); const float w2 = add_float(i8, i24); const float w3 = sub_float(i8, i24); const float w4 = add_float(w0, w2); const float w5 = sub_float(w0, w2); const float w7 = add_float(i4, i20); const float w8 = sub_float(i4, i20); const float w9 = add_float(i12, i28); const float w10 = sub_float(i12, i28); const float w11 = add_float(w7, w9); const float w12 = sub_float(w7, w9); const float w14 = add_float(w4, w11); const float w15 = sub_float(w4, w11); const float w16[2] = { add_float(w1, mul_float(kWeight2, sub_float(w8, w10))), sub_float(sub_float(kWeight0, w3), mul_float(kWeight2, add_float(w10, w8))) }; const float w18[2] = { sub_float(w1, mul_float(kWeight2, sub_float(w8, w10))), sub_float(w3, mul_float(kWeight2, add_float(w10, w8))) }; const float w19 = add_float(i2, i18); const float w20 = sub_float(i2, i18); const float w21 = add_float(i10, i26); const float w22 = sub_float(i10, i26); const float w23 = add_float(w19, w21); const float w24 = sub_float(w19, w21); const float w26 = add_float(i6, i22); const float w27 = sub_float(i6, i22); const float w28 = add_float(i14, i30); const float w29 = sub_float(i14, i30); const float w30 = add_float(w26, w28); const float w31 = sub_float(w26, w28); const float w33 = add_float(w23, w30); const float w34 = sub_float(w23, w30); const float w35[2] = { add_float(w20, mul_float(kWeight2, sub_float(w27, w29))), sub_float(sub_float(kWeight0, w22), mul_float(kWeight2, add_float(w29, w27))) }; const float w37[2] = { sub_float(w20, mul_float(kWeight2, sub_float(w27, w29))), sub_float(w22, mul_float(kWeight2, add_float(w29, w27))) }; const float w38 = add_float(w14, w33); const float w39 = sub_float(w14, w33); const float w40[2] = { add_float(w16[0], add_float(mul_float(kWeight3, w35[0]), mul_float(kWeight4, w35[1]))), add_float(w16[1], sub_float(mul_float(kWeight3, w35[1]), mul_float(kWeight4, w35[0]))) }; const float w41[2] = { add_float(w5, mul_float(kWeight2, sub_float(w24, w31))), sub_float(sub_float(kWeight0, w12), mul_float(kWeight2, add_float(w31, w24))) }; const float w42[2] = { add_float(w18[0], add_float(mul_float(kWeight4, w37[0]), mul_float(kWeight3, w37[1]))), add_float(w18[1], sub_float(mul_float(kWeight4, w37[1]), mul_float(kWeight3, w37[0]))) }; const float w44[2] = { add_float(w18[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w37[0])), mul_float(kWeight3, w37[1]))), sub_float(sub_float(kWeight0, w18[1]), sub_float(mul_float(kWeight3, w37[0]), mul_float(kWeight4, w37[1]))) }; const float w45[2] = { sub_float(w5, mul_float(kWeight2, sub_float(w24, w31))), sub_float(w12, mul_float(kWeight2, add_float(w31, w24))) }; const float w46[2] = { add_float(w16[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w35[0])), mul_float(kWeight4, w35[1]))), sub_float(sub_float(kWeight0, w16[1]), sub_float(mul_float(kWeight4, w35[0]), mul_float(kWeight3, w35[1]))) }; const float w47 = add_float(i1, i17); const float w48 = sub_float(i1, i17); const float w49 = add_float(i9, i25); const float w50 = sub_float(i9, i25); const float w51 = add_float(w47, w49); const float w52 = sub_float(w47, w49); const float w54 = add_float(i5, i21); const float w55 = sub_float(i5, i21); const float w56 = add_float(i13, i29); const float w57 = sub_float(i13, i29); const float w58 = add_float(w54, w56); const float w59 = sub_float(w54, w56); const float w61 = add_float(w51, w58); const float w62 = sub_float(w51, w58); const float w63[2] = { add_float(w48, mul_float(kWeight2, sub_float(w55, w57))), sub_float(sub_float(kWeight0, w50), mul_float(kWeight2, add_float(w57, w55))) }; const float w65[2] = { sub_float(w48, mul_float(kWeight2, sub_float(w55, w57))), sub_float(w50, mul_float(kWeight2, add_float(w57, w55))) }; const float w66 = add_float(i3, i19); const float w67 = sub_float(i3, i19); const float w68 = add_float(i11, i27); const float w69 = sub_float(i11, i27); const float w70 = add_float(w66, w68); const float w71 = sub_float(w66, w68); const float w73 = add_float(i7, i23); const float w74 = sub_float(i7, i23); const float w75 = add_float(i15, i31); const float w76 = sub_float(i15, i31); const float w77 = add_float(w73, w75); const float w78 = sub_float(w73, w75); const float w80 = add_float(w70, w77); const float w81 = sub_float(w70, w77); const float w82[2] = { add_float(w67, mul_float(kWeight2, sub_float(w74, w76))), sub_float(sub_float(kWeight0, w69), mul_float(kWeight2, add_float(w76, w74))) }; const float w84[2] = { sub_float(w67, mul_float(kWeight2, sub_float(w74, w76))), sub_float(w69, mul_float(kWeight2, add_float(w76, w74))) }; const float w85 = add_float(w61, w80); const float w86 = sub_float(w61, w80); const float w87[2] = { add_float(w63[0], add_float(mul_float(kWeight3, w82[0]), mul_float(kWeight4, w82[1]))), add_float(w63[1], sub_float(mul_float(kWeight3, w82[1]), mul_float(kWeight4, w82[0]))) }; const float w88[2] = { add_float(w52, mul_float(kWeight2, sub_float(w71, w78))), sub_float(sub_float(kWeight0, w59), mul_float(kWeight2, add_float(w78, w71))) }; const float w89[2] = { add_float(w65[0], add_float(mul_float(kWeight4, w84[0]), mul_float(kWeight3, w84[1]))), add_float(w65[1], sub_float(mul_float(kWeight4, w84[1]), mul_float(kWeight3, w84[0]))) }; const float w91[2] = { add_float(w65[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w84[0])), mul_float(kWeight3, w84[1]))), sub_float(sub_float(kWeight0, w65[1]), sub_float(mul_float(kWeight3, w84[0]), mul_float(kWeight4, w84[1]))) }; const float w92[2] = { sub_float(w52, mul_float(kWeight2, sub_float(w71, w78))), sub_float(w59, mul_float(kWeight2, add_float(w78, w71))) }; const float w93[2] = { add_float(w63[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w82[0])), mul_float(kWeight4, w82[1]))), sub_float(sub_float(kWeight0, w63[1]), sub_float(mul_float(kWeight4, w82[0]), mul_float(kWeight3, w82[1]))) }; store_float(output + 0 * stride, add_float(w38, w85)); store_float(output + 1 * stride, add_float(w40[0], add_float(mul_float(kWeight5, w87[0]), mul_float(kWeight6, w87[1])))); store_float(output + 2 * stride, add_float(w41[0], add_float(mul_float(kWeight3, w88[0]), mul_float(kWeight4, w88[1])))); store_float(output + 3 * stride, add_float(w42[0], add_float(mul_float(kWeight7, w89[0]), mul_float(kWeight8, w89[1])))); store_float(output + 4 * stride, add_float(w15, mul_float(kWeight2, sub_float(w62, w81)))); store_float(output + 5 * stride, add_float(w44[0], add_float(mul_float(kWeight8, w91[0]), mul_float(kWeight7, w91[1])))); store_float(output + 6 * stride, add_float(w45[0], add_float(mul_float(kWeight4, w92[0]), mul_float(kWeight3, w92[1])))); store_float(output + 7 * stride, add_float(w46[0], add_float(mul_float(kWeight6, w93[0]), mul_float(kWeight5, w93[1])))); store_float(output + 8 * stride, w39); store_float(output + 9 * stride, add_float(w46[0], sub_float(sub_float(kWeight0, mul_float(kWeight6, w93[0])), mul_float(kWeight5, w93[1])))); store_float(output + 10 * stride, add_float(w45[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w92[0])), mul_float(kWeight3, w92[1])))); store_float(output + 11 * stride, add_float(w44[0], sub_float(sub_float(kWeight0, mul_float(kWeight8, w91[0])), mul_float(kWeight7, w91[1])))); store_float(output + 12 * stride, sub_float(w15, mul_float(kWeight2, sub_float(w62, w81)))); store_float(output + 13 * stride, add_float(w42[0], sub_float(sub_float(kWeight0, mul_float(kWeight7, w89[0])), mul_float(kWeight8, w89[1])))); store_float(output + 14 * stride, add_float(w41[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w88[0])), mul_float(kWeight4, w88[1])))); store_float(output + 15 * stride, add_float(w40[0], sub_float(sub_float(kWeight0, mul_float(kWeight5, w87[0])), mul_float(kWeight6, w87[1])))); store_float(output + 16 * stride, sub_float(w38, w85)); store_float(output + 17 * stride, add_float(w40[1], sub_float(mul_float(kWeight5, w87[1]), mul_float(kWeight6, w87[0])))); store_float(output + 18 * stride, add_float(w41[1], sub_float(mul_float(kWeight3, w88[1]), mul_float(kWeight4, w88[0])))); store_float(output + 19 * stride, add_float(w42[1], sub_float(mul_float(kWeight7, w89[1]), mul_float(kWeight8, w89[0])))); store_float(output + 20 * stride, sub_float(sub_float(kWeight0, w34), mul_float(kWeight2, add_float(w81, w62)))); store_float(output + 21 * stride, add_float(w44[1], sub_float(mul_float(kWeight8, w91[1]), mul_float(kWeight7, w91[0])))); store_float(output + 22 * stride, add_float(w45[1], sub_float(mul_float(kWeight4, w92[1]), mul_float(kWeight3, w92[0])))); store_float(output + 23 * stride, add_float(w46[1], sub_float(mul_float(kWeight6, w93[1]), mul_float(kWeight5, w93[0])))); store_float(output + 24 * stride, sub_float(kWeight0, w86)); store_float(output + 25 * stride, sub_float(sub_float(kWeight0, w46[1]), sub_float(mul_float(kWeight5, w93[0]), mul_float(kWeight6, w93[1])))); store_float(output + 26 * stride, sub_float(sub_float(kWeight0, w45[1]), sub_float(mul_float(kWeight3, w92[0]), mul_float(kWeight4, w92[1])))); store_float(output + 27 * stride, sub_float(sub_float(kWeight0, w44[1]), sub_float(mul_float(kWeight7, w91[0]), mul_float(kWeight8, w91[1])))); store_float(output + 28 * stride, sub_float(w34, mul_float(kWeight2, add_float(w81, w62)))); store_float(output + 29 * stride, sub_float(sub_float(kWeight0, w42[1]), sub_float(mul_float(kWeight8, w89[0]), mul_float(kWeight7, w89[1])))); store_float(output + 30 * stride, sub_float(sub_float(kWeight0, w41[1]), sub_float(mul_float(kWeight4, w88[0]), mul_float(kWeight3, w88[1])))); store_float(output + 31 * stride, sub_float(sub_float(kWeight0, w40[1]), sub_float(mul_float(kWeight6, w87[0]), mul_float(kWeight5, w87[1])))); };
void aom_fft2x2_float_c(const float *input, float *temp, float *output) {
aom_fft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, simple_transpose,
unpack_2d_output, 1);
}
void aom_fft4x4_float_c(const float *input, float *temp, float *output) {
aom_fft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, simple_transpose,
unpack_2d_output, 1);
}
void aom_fft8x8_float_c(const float *input, float *temp, float *output) {
aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, simple_transpose,
unpack_2d_output, 1);
}
void aom_fft16x16_float_c(const float *input, float *temp, float *output) {
aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_float, simple_transpose,
unpack_2d_output, 1);
}
void aom_fft32x32_float_c(const float *input, float *temp, float *output) {
aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_float, simple_transpose,
unpack_2d_output, 1);
}
void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n,
aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi,
aom_fft_1d_func_t ifft_multi,
aom_fft_transpose_func_t transpose, int vec_size) {
for (int y = 0; y <= n / 2; ++y) {
output[y * n] = input[2 * y * n];
output[y * n + 1] = input[2 * (y * n + n / 2)];
}
for (int y = n / 2 + 1; y < n; ++y) {
output[y * n] = input[2 * (y - n / 2) * n + 1];
output[y * n + 1] = input[2 * ((y - n / 2) * n + n / 2) + 1];
}
for (int i = 0; i < 2; i += vec_size) {
ifft_multi(output + i, temp + i, n);
}
for (int y = 0; y < n; ++y) {
for (int x = 1; x < n / 2; ++x) {
output[y * n + (x + 1)] = input[2 * (y * n + x)];
}
for (int x = 1; x < n / 2; ++x) {
output[y * n + (x + n / 2)] = input[2 * (y * n + x) + 1];
}
}
for (int y = 2; y < vec_size; y++) {
fft_single(output + y, temp + y, n);
}
for (int y = (((2) > (vec_size)) ? (2) : (vec_size)); y < n; y += vec_size) {
fft_multi(output + y, temp + y, n);
}
for (int x = 0; x < n; ++x) {
output[x] = temp[x * n];
output[(n / 2) * n + x] = temp[x * n + 1];
}
for (int y = 1; y < n / 2; ++y) {
for (int x = 0; x <= n / 2; ++x) {
output[x + y * n] =
temp[(y + 1) + x * n] +
((x > 0 && x < n / 2) ? temp[(y + n / 2) + (x + n / 2) * n] : 0);
}
for (int x = n / 2 + 1; x < n; ++x) {
output[x + y * n] = temp[(y + 1) + (n - x) * n] -
temp[(y + n / 2) + ((n - x) + n / 2) * n];
}
for (int x = 0; x <= n / 2; ++x) {
output[x + (y + n / 2) * n] =
temp[(y + n / 2) + x * n] -
((x > 0 && x < n / 2) ? temp[(y + 1) + (x + n / 2) * n] : 0);
}
for (int x = n / 2 + 1; x < n; ++x) {
output[x + (y + n / 2) * n] = temp[(y + 1) + ((n - x) + n / 2) * n] +
temp[(y + n / 2) + (n - x) * n];
}
}
for (int y = 0; y < n; y += vec_size) {
ifft_multi(output + y, temp + y, n);
}
transpose(temp, output, n);
}
void aom_ifft1d_2_float(const float *input, float *output, int stride) { const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); store_float(output + 0 * stride, i0 + i1); store_float(output + 1 * stride, i0 - i1); };
void aom_ifft1d_4_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float w2 = add_float(i0, i2); const float w3 = sub_float(i0, i2); const float w4[2] = { add_float(i1, i1), sub_float(i3, i3) }; const float w5[2] = { sub_float(i1, i1), sub_float(sub_float(kWeight0, i3), i3) }; store_float(output + 0 * stride, add_float(w2, w4[0])); store_float(output + 1 * stride, add_float(w3, w5[1])); store_float(output + 2 * stride, sub_float(w2, w4[0])); store_float(output + 3 * stride, sub_float(w3, w5[1])); };
void aom_ifft1d_8_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float kWeight2 = (float)(0.707107f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float i4 = *(input + 4 * stride); const float i5 = *(input + 5 * stride); const float i6 = *(input + 6 * stride); const float i7 = *(input + 7 * stride); const float w6 = add_float(i0, i4); const float w7 = sub_float(i0, i4); const float w8[2] = { add_float(i2, i2), sub_float(i6, i6) }; const float w9[2] = { sub_float(i2, i2), sub_float(sub_float(kWeight0, i6), i6) }; const float w10[2] = { add_float(w6, w8[0]), w8[1] }; const float w11[2] = { sub_float(w6, w8[0]), sub_float(kWeight0, w8[1]) }; const float w12[2] = { add_float(w7, w9[1]), sub_float(kWeight0, w9[0]) }; const float w13[2] = { sub_float(w7, w9[1]), w9[0] }; const float w14[2] = { add_float(i1, i3), sub_float(i7, i5) }; const float w15[2] = { sub_float(i1, i3), sub_float(sub_float(kWeight0, i5), i7) }; const float w16[2] = { add_float(i3, i1), sub_float(i5, i7) }; const float w17[2] = { sub_float(i3, i1), sub_float(sub_float(kWeight0, i7), i5) }; const float w18[2] = { add_float(w14[0], w16[0]), add_float(w14[1], w16[1]) }; const float w19[2] = { sub_float(w14[0], w16[0]), sub_float(w14[1], w16[1]) }; const float w20[2] = { add_float(w15[0], w17[1]), sub_float(w15[1], w17[0]) }; const float w21[2] = { sub_float(w15[0], w17[1]), add_float(w15[1], w17[0]) }; store_float(output + 0 * stride, add_float(w10[0], w18[0])); store_float(output + 1 * stride, add_float(w12[0], mul_float(kWeight2, add_float(w20[0], w20[1])))); store_float(output + 2 * stride, add_float(w11[0], w19[1])); store_float(output + 3 * stride, sub_float(w13[0], mul_float(kWeight2, sub_float(w21[0], w21[1])))); store_float(output + 4 * stride, sub_float(w10[0], w18[0])); store_float(output + 5 * stride, add_float(w12[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w20[0])), mul_float(kWeight2, w20[1])))); store_float(output + 6 * stride, sub_float(w11[0], w19[1])); store_float(output + 7 * stride, add_float(w13[0], mul_float(kWeight2, sub_float(w21[0], w21[1])))); };
void aom_ifft1d_16_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float kWeight2 = (float)(0.707107f); const float kWeight3 = (float)(0.92388f); const float kWeight4 = (float)(0.382683f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float i4 = *(input + 4 * stride); const float i5 = *(input + 5 * stride); const float i6 = *(input + 6 * stride); const float i7 = *(input + 7 * stride); const float i8 = *(input + 8 * stride); const float i9 = *(input + 9 * stride); const float i10 = *(input + 10 * stride); const float i11 = *(input + 11 * stride); const float i12 = *(input + 12 * stride); const float i13 = *(input + 13 * stride); const float i14 = *(input + 14 * stride); const float i15 = *(input + 15 * stride); const float w14 = add_float(i0, i8); const float w15 = sub_float(i0, i8); const float w16[2] = { add_float(i4, i4), sub_float(i12, i12) }; const float w17[2] = { sub_float(i4, i4), sub_float(sub_float(kWeight0, i12), i12) }; const float w18[2] = { add_float(w14, w16[0]), w16[1] }; const float w19[2] = { sub_float(w14, w16[0]), sub_float(kWeight0, w16[1]) }; const float w20[2] = { add_float(w15, w17[1]), sub_float(kWeight0, w17[0]) }; const float w21[2] = { sub_float(w15, w17[1]), w17[0] }; const float w22[2] = { add_float(i2, i6), sub_float(i14, i10) }; const float w23[2] = { sub_float(i2, i6), sub_float(sub_float(kWeight0, i10), i14) }; const float w24[2] = { add_float(i6, i2), sub_float(i10, i14) }; const float w25[2] = { sub_float(i6, i2), sub_float(sub_float(kWeight0, i14), i10) }; const float w26[2] = { add_float(w22[0], w24[0]), add_float(w22[1], w24[1]) }; const float w27[2] = { sub_float(w22[0], w24[0]), sub_float(w22[1], w24[1]) }; const float w28[2] = { add_float(w23[0], w25[1]), sub_float(w23[1], w25[0]) }; const float w29[2] = { sub_float(w23[0], w25[1]), add_float(w23[1], w25[0]) }; const float w30[2] = { add_float(w18[0], w26[0]), add_float(w18[1], w26[1]) }; const float w31[2] = { sub_float(w18[0], w26[0]), sub_float(w18[1], w26[1]) }; const float w32[2] = { add_float(w20[0], mul_float(kWeight2, add_float(w28[0], w28[1]))), add_float(w20[1], mul_float(kWeight2, sub_float(w28[1], w28[0]))) }; const float w33[2] = { add_float(w20[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w28[0])), mul_float(kWeight2, w28[1]))), add_float(w20[1], mul_float(kWeight2, sub_float(w28[0], w28[1]))) }; const float w34[2] = { add_float(w19[0], w27[1]), sub_float(w19[1], w27[0]) }; const float w35[2] = { sub_float(w19[0], w27[1]), add_float(w19[1], w27[0]) }; const float w36[2] = { sub_float(w21[0], mul_float(kWeight2, sub_float(w29[0], w29[1]))), sub_float(w21[1], mul_float(kWeight2, add_float(w29[1], w29[0]))) }; const float w37[2] = { add_float(w21[0], mul_float(kWeight2, sub_float(w29[0], w29[1]))), add_float(w21[1], mul_float(kWeight2, add_float(w29[1], w29[0]))) }; const float w38[2] = { add_float(i1, i7), sub_float(i15, i9) }; const float w39[2] = { sub_float(i1, i7), sub_float(sub_float(kWeight0, i9), i15) }; const float w40[2] = { add_float(i5, i3), sub_float(i11, i13) }; const float w41[2] = { sub_float(i5, i3), sub_float(sub_float(kWeight0, i13), i11) }; const float w42[2] = { add_float(w38[0], w40[0]), add_float(w38[1], w40[1]) }; const float w43[2] = { sub_float(w38[0], w40[0]), sub_float(w38[1], w40[1]) }; const float w44[2] = { add_float(w39[0], w41[1]), sub_float(w39[1], w41[0]) }; const float w45[2] = { sub_float(w39[0], w41[1]), add_float(w39[1], w41[0]) }; const float w46[2] = { add_float(i3, i5), sub_float(i13, i11) }; const float w47[2] = { sub_float(i3, i5), sub_float(sub_float(kWeight0, i11), i13) }; const float w48[2] = { add_float(i7, i1), sub_float(i9, i15) }; const float w49[2] = { sub_float(i7, i1), sub_float(sub_float(kWeight0, i15), i9) }; const float w50[2] = { add_float(w46[0], w48[0]), add_float(w46[1], w48[1]) }; const float w51[2] = { sub_float(w46[0], w48[0]), sub_float(w46[1], w48[1]) }; const float w52[2] = { add_float(w47[0], w49[1]), sub_float(w47[1], w49[0]) }; const float w53[2] = { sub_float(w47[0], w49[1]), add_float(w47[1], w49[0]) }; const float w54[2] = { add_float(w42[0], w50[0]), add_float(w42[1], w50[1]) }; const float w55[2] = { sub_float(w42[0], w50[0]), sub_float(w42[1], w50[1]) }; const float w56[2] = { add_float(w44[0], mul_float(kWeight2, add_float(w52[0], w52[1]))), add_float(w44[1], mul_float(kWeight2, sub_float(w52[1], w52[0]))) }; const float w57[2] = { add_float(w44[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w52[0])), mul_float(kWeight2, w52[1]))), add_float(w44[1], mul_float(kWeight2, sub_float(w52[0], w52[1]))) }; const float w58[2] = { add_float(w43[0], w51[1]), sub_float(w43[1], w51[0]) }; const float w59[2] = { sub_float(w43[0], w51[1]), add_float(w43[1], w51[0]) }; const float w60[2] = { sub_float(w45[0], mul_float(kWeight2, sub_float(w53[0], w53[1]))), sub_float(w45[1], mul_float(kWeight2, add_float(w53[1], w53[0]))) }; const float w61[2] = { add_float(w45[0], mul_float(kWeight2, sub_float(w53[0], w53[1]))), add_float(w45[1], mul_float(kWeight2, add_float(w53[1], w53[0]))) }; store_float(output + 0 * stride, add_float(w30[0], w54[0])); store_float(output + 1 * stride, add_float(w32[0], add_float(mul_float(kWeight3, w56[0]), mul_float(kWeight4, w56[1])))); store_float(output + 2 * stride, add_float(w34[0], mul_float(kWeight2, add_float(w58[0], w58[1])))); store_float(output + 3 * stride, add_float(w36[0], add_float(mul_float(kWeight4, w60[0]), mul_float(kWeight3, w60[1])))); store_float(output + 4 * stride, add_float(w31[0], w55[1])); store_float(output + 5 * stride, sub_float(w33[0], sub_float(mul_float(kWeight4, w57[0]), mul_float(kWeight3, w57[1])))); store_float(output + 6 * stride, sub_float(w35[0], mul_float(kWeight2, sub_float(w59[0], w59[1])))); store_float(output + 7 * stride, sub_float(w37[0], sub_float(mul_float(kWeight3, w61[0]), mul_float(kWeight4, w61[1])))); store_float(output + 8 * stride, sub_float(w30[0], w54[0])); store_float(output + 9 * stride, add_float(w32[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w56[0])), mul_float(kWeight4, w56[1])))); store_float(output + 10 * stride, add_float(w34[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w58[0])), mul_float(kWeight2, w58[1])))); store_float(output + 11 * stride, add_float(w36[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w60[0])), mul_float(kWeight3, w60[1])))); store_float(output + 12 * stride, sub_float(w31[0], w55[1])); store_float(output + 13 * stride, add_float(w33[0], sub_float(mul_float(kWeight4, w57[0]), mul_float(kWeight3, w57[1])))); store_float(output + 14 * stride, add_float(w35[0], mul_float(kWeight2, sub_float(w59[0], w59[1])))); store_float(output + 15 * stride, add_float(w37[0], sub_float(mul_float(kWeight3, w61[0]), mul_float(kWeight4, w61[1])))); };
void aom_ifft1d_32_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float kWeight2 = (float)(0.707107f); const float kWeight3 = (float)(0.92388f); const float kWeight4 = (float)(0.382683f); const float kWeight5 = (float)(0.980785f); const float kWeight6 = (float)(0.19509f); const float kWeight7 = (float)(0.83147f); const float kWeight8 = (float)(0.55557f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float i4 = *(input + 4 * stride); const float i5 = *(input + 5 * stride); const float i6 = *(input + 6 * stride); const float i7 = *(input + 7 * stride); const float i8 = *(input + 8 * stride); const float i9 = *(input + 9 * stride); const float i10 = *(input + 10 * stride); const float i11 = *(input + 11 * stride); const float i12 = *(input + 12 * stride); const float i13 = *(input + 13 * stride); const float i14 = *(input + 14 * stride); const float i15 = *(input + 15 * stride); const float i16 = *(input + 16 * stride); const float i17 = *(input + 17 * stride); const float i18 = *(input + 18 * stride); const float i19 = *(input + 19 * stride); const float i20 = *(input + 20 * stride); const float i21 = *(input + 21 * stride); const float i22 = *(input + 22 * stride); const float i23 = *(input + 23 * stride); const float i24 = *(input + 24 * stride); const float i25 = *(input + 25 * stride); const float i26 = *(input + 26 * stride); const float i27 = *(input + 27 * stride); const float i28 = *(input + 28 * stride); const float i29 = *(input + 29 * stride); const float i30 = *(input + 30 * stride); const float i31 = *(input + 31 * stride); const float w30 = add_float(i0, i16); const float w31 = sub_float(i0, i16); const float w32[2] = { add_float(i8, i8), sub_float(i24, i24) }; const float w33[2] = { sub_float(i8, i8), sub_float(sub_float(kWeight0, i24), i24) }; const float w34[2] = { add_float(w30, w32[0]), w32[1] }; const float w35[2] = { sub_float(w30, w32[0]), sub_float(kWeight0, w32[1]) }; const float w36[2] = { add_float(w31, w33[1]), sub_float(kWeight0, w33[0]) }; const float w37[2] = { sub_float(w31, w33[1]), w33[0] }; const float w38[2] = { add_float(i4, i12), sub_float(i28, i20) }; const float w39[2] = { sub_float(i4, i12), sub_float(sub_float(kWeight0, i20), i28) }; const float w40[2] = { add_float(i12, i4), sub_float(i20, i28) }; const float w41[2] = { sub_float(i12, i4), sub_float(sub_float(kWeight0, i28), i20) }; const float w42[2] = { add_float(w38[0], w40[0]), add_float(w38[1], w40[1]) }; const float w43[2] = { sub_float(w38[0], w40[0]), sub_float(w38[1], w40[1]) }; const float w44[2] = { add_float(w39[0], w41[1]), sub_float(w39[1], w41[0]) }; const float w45[2] = { sub_float(w39[0], w41[1]), add_float(w39[1], w41[0]) }; const float w46[2] = { add_float(w34[0], w42[0]), add_float(w34[1], w42[1]) }; const float w47[2] = { sub_float(w34[0], w42[0]), sub_float(w34[1], w42[1]) }; const float w48[2] = { add_float(w36[0], mul_float(kWeight2, add_float(w44[0], w44[1]))), add_float(w36[1], mul_float(kWeight2, sub_float(w44[1], w44[0]))) }; const float w49[2] = { add_float(w36[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w44[0])), mul_float(kWeight2, w44[1]))), add_float(w36[1], mul_float(kWeight2, sub_float(w44[0], w44[1]))) }; const float w50[2] = { add_float(w35[0], w43[1]), sub_float(w35[1], w43[0]) }; const float w51[2] = { sub_float(w35[0], w43[1]), add_float(w35[1], w43[0]) }; const float w52[2] = { sub_float(w37[0], mul_float(kWeight2, sub_float(w45[0], w45[1]))), sub_float(w37[1], mul_float(kWeight2, add_float(w45[1], w45[0]))) }; const float w53[2] = { add_float(w37[0], mul_float(kWeight2, sub_float(w45[0], w45[1]))), add_float(w37[1], mul_float(kWeight2, add_float(w45[1], w45[0]))) }; const float w54[2] = { add_float(i2, i14), sub_float(i30, i18) }; const float w55[2] = { sub_float(i2, i14), sub_float(sub_float(kWeight0, i18), i30) }; const float w56[2] = { add_float(i10, i6), sub_float(i22, i26) }; const float w57[2] = { sub_float(i10, i6), sub_float(sub_float(kWeight0, i26), i22) }; const float w58[2] = { add_float(w54[0], w56[0]), add_float(w54[1], w56[1]) }; const float w59[2] = { sub_float(w54[0], w56[0]), sub_float(w54[1], w56[1]) }; const float w60[2] = { add_float(w55[0], w57[1]), sub_float(w55[1], w57[0]) }; const float w61[2] = { sub_float(w55[0], w57[1]), add_float(w55[1], w57[0]) }; const float w62[2] = { add_float(i6, i10), sub_float(i26, i22) }; const float w63[2] = { sub_float(i6, i10), sub_float(sub_float(kWeight0, i22), i26) }; const float w64[2] = { add_float(i14, i2), sub_float(i18, i30) }; const float w65[2] = { sub_float(i14, i2), sub_float(sub_float(kWeight0, i30), i18) }; const float w66[2] = { add_float(w62[0], w64[0]), add_float(w62[1], w64[1]) }; const float w67[2] = { sub_float(w62[0], w64[0]), sub_float(w62[1], w64[1]) }; const float w68[2] = { add_float(w63[0], w65[1]), sub_float(w63[1], w65[0]) }; const float w69[2] = { sub_float(w63[0], w65[1]), add_float(w63[1], w65[0]) }; const float w70[2] = { add_float(w58[0], w66[0]), add_float(w58[1], w66[1]) }; const float w71[2] = { sub_float(w58[0], w66[0]), sub_float(w58[1], w66[1]) }; const float w72[2] = { add_float(w60[0], mul_float(kWeight2, add_float(w68[0], w68[1]))), add_float(w60[1], mul_float(kWeight2, sub_float(w68[1], w68[0]))) }; const float w73[2] = { add_float(w60[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w68[0])), mul_float(kWeight2, w68[1]))), add_float(w60[1], mul_float(kWeight2, sub_float(w68[0], w68[1]))) }; const float w74[2] = { add_float(w59[0], w67[1]), sub_float(w59[1], w67[0]) }; const float w75[2] = { sub_float(w59[0], w67[1]), add_float(w59[1], w67[0]) }; const float w76[2] = { sub_float(w61[0], mul_float(kWeight2, sub_float(w69[0], w69[1]))), sub_float(w61[1], mul_float(kWeight2, add_float(w69[1], w69[0]))) }; const float w77[2] = { add_float(w61[0], mul_float(kWeight2, sub_float(w69[0], w69[1]))), add_float(w61[1], mul_float(kWeight2, add_float(w69[1], w69[0]))) }; const float w78[2] = { add_float(w46[0], w70[0]), add_float(w46[1], w70[1]) }; const float w79[2] = { sub_float(w46[0], w70[0]), sub_float(w46[1], w70[1]) }; const float w80[2] = { add_float(w48[0], add_float(mul_float(kWeight3, w72[0]), mul_float(kWeight4, w72[1]))), add_float(w48[1], sub_float(mul_float(kWeight3, w72[1]), mul_float(kWeight4, w72[0]))) }; const float w81[2] = { add_float(w48[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w72[0])), mul_float(kWeight4, w72[1]))), add_float(w48[1], sub_float(mul_float(kWeight4, w72[0]), mul_float(kWeight3, w72[1]))) }; const float w82[2] = { add_float(w50[0], mul_float(kWeight2, add_float(w74[0], w74[1]))), add_float(w50[1], mul_float(kWeight2, sub_float(w74[1], w74[0]))) }; const float w83[2] = { add_float(w50[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w74[0])), mul_float(kWeight2, w74[1]))), add_float(w50[1], mul_float(kWeight2, sub_float(w74[0], w74[1]))) }; const float w84[2] = { add_float(w52[0], add_float(mul_float(kWeight4, w76[0]), mul_float(kWeight3, w76[1]))), add_float(w52[1], sub_float(mul_float(kWeight4, w76[1]), mul_float(kWeight3, w76[0]))) }; const float w85[2] = { add_float(w52[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w76[0])), mul_float(kWeight3, w76[1]))), add_float(w52[1], sub_float(mul_float(kWeight3, w76[0]), mul_float(kWeight4, w76[1]))) }; const float w86[2] = { add_float(w47[0], w71[1]), sub_float(w47[1], w71[0]) }; const float w87[2] = { sub_float(w47[0], w71[1]), add_float(w47[1], w71[0]) }; const float w88[2] = { sub_float(w49[0], sub_float(mul_float(kWeight4, w73[0]), mul_float(kWeight3, w73[1]))), add_float(w49[1], sub_float(sub_float(kWeight0, mul_float(kWeight4, w73[1])), mul_float(kWeight3, w73[0]))) }; const float w89[2] = { add_float(w49[0], sub_float(mul_float(kWeight4, w73[0]), mul_float(kWeight3, w73[1]))), add_float(w49[1], add_float(mul_float(kWeight4, w73[1]), mul_float(kWeight3, w73[0]))) }; const float w90[2] = { sub_float(w51[0], mul_float(kWeight2, sub_float(w75[0], w75[1]))), sub_float(w51[1], mul_float(kWeight2, add_float(w75[1], w75[0]))) }; const float w91[2] = { add_float(w51[0], mul_float(kWeight2, sub_float(w75[0], w75[1]))), add_float(w51[1], mul_float(kWeight2, add_float(w75[1], w75[0]))) }; const float w92[2] = { sub_float(w53[0], sub_float(mul_float(kWeight3, w77[0]), mul_float(kWeight4, w77[1]))), add_float(w53[1], sub_float(sub_float(kWeight0, mul_float(kWeight3, w77[1])), mul_float(kWeight4, w77[0]))) }; const float w93[2] = { add_float(w53[0], sub_float(mul_float(kWeight3, w77[0]), mul_float(kWeight4, w77[1]))), add_float(w53[1], add_float(mul_float(kWeight3, w77[1]), mul_float(kWeight4, w77[0]))) }; const float w94[2] = { add_float(i1, i15), sub_float(i31, i17) }; const float w95[2] = { sub_float(i1, i15), sub_float(sub_float(kWeight0, i17), i31) }; const float w96[2] = { add_float(i9, i7), sub_float(i23, i25) }; const float w97[2] = { sub_float(i9, i7), sub_float(sub_float(kWeight0, i25), i23) }; const float w98[2] = { add_float(w94[0], w96[0]), add_float(w94[1], w96[1]) }; const float w99[2] = { sub_float(w94[0], w96[0]), sub_float(w94[1], w96[1]) }; const float w100[2] = { add_float(w95[0], w97[1]), sub_float(w95[1], w97[0]) }; const float w101[2] = { sub_float(w95[0], w97[1]), add_float(w95[1], w97[0]) }; const float w102[2] = { add_float(i5, i11), sub_float(i27, i21) }; const float w103[2] = { sub_float(i5, i11), sub_float(sub_float(kWeight0, i21), i27) }; const float w104[2] = { add_float(i13, i3), sub_float(i19, i29) }; const float w105[2] = { sub_float(i13, i3), sub_float(sub_float(kWeight0, i29), i19) }; const float w106[2] = { add_float(w102[0], w104[0]), add_float(w102[1], w104[1]) }; const float w107[2] = { sub_float(w102[0], w104[0]), sub_float(w102[1], w104[1]) }; const float w108[2] = { add_float(w103[0], w105[1]), sub_float(w103[1], w105[0]) }; const float w109[2] = { sub_float(w103[0], w105[1]), add_float(w103[1], w105[0]) }; const float w110[2] = { add_float(w98[0], w106[0]), add_float(w98[1], w106[1]) }; const float w111[2] = { sub_float(w98[0], w106[0]), sub_float(w98[1], w106[1]) }; const float w112[2] = { add_float(w100[0], mul_float(kWeight2, add_float(w108[0], w108[1]))), add_float(w100[1], mul_float(kWeight2, sub_float(w108[1], w108[0]))) }; const float w113[2] = { add_float(w100[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w108[0])), mul_float(kWeight2, w108[1]))), add_float(w100[1], mul_float(kWeight2, sub_float(w108[0], w108[1]))) }; const float w114[2] = { add_float(w99[0], w107[1]), sub_float(w99[1], w107[0]) }; const float w115[2] = { sub_float(w99[0], w107[1]), add_float(w99[1], w107[0]) }; const float w116[2] = { sub_float(w101[0], mul_float(kWeight2, sub_float(w109[0], w109[1]))), sub_float(w101[1], mul_float(kWeight2, add_float(w109[1], w109[0]))) }; const float w117[2] = { add_float(w101[0], mul_float(kWeight2, sub_float(w109[0], w109[1]))), add_float(w101[1], mul_float(kWeight2, add_float(w109[1], w109[0]))) }; const float w118[2] = { add_float(i3, i13), sub_float(i29, i19) }; const float w119[2] = { sub_float(i3, i13), sub_float(sub_float(kWeight0, i19), i29) }; const float w120[2] = { add_float(i11, i5), sub_float(i21, i27) }; const float w121[2] = { sub_float(i11, i5), sub_float(sub_float(kWeight0, i27), i21) }; const float w122[2] = { add_float(w118[0], w120[0]), add_float(w118[1], w120[1]) }; const float w123[2] = { sub_float(w118[0], w120[0]), sub_float(w118[1], w120[1]) }; const float w124[2] = { add_float(w119[0], w121[1]), sub_float(w119[1], w121[0]) }; const float w125[2] = { sub_float(w119[0], w121[1]), add_float(w119[1], w121[0]) }; const float w126[2] = { add_float(i7, i9), sub_float(i25, i23) }; const float w127[2] = { sub_float(i7, i9), sub_float(sub_float(kWeight0, i23), i25) }; const float w128[2] = { add_float(i15, i1), sub_float(i17, i31) }; const float w129[2] = { sub_float(i15, i1), sub_float(sub_float(kWeight0, i31), i17) }; const float w130[2] = { add_float(w126[0], w128[0]), add_float(w126[1], w128[1]) }; const float w131[2] = { sub_float(w126[0], w128[0]), sub_float(w126[1], w128[1]) }; const float w132[2] = { add_float(w127[0], w129[1]), sub_float(w127[1], w129[0]) }; const float w133[2] = { sub_float(w127[0], w129[1]), add_float(w127[1], w129[0]) }; const float w134[2] = { add_float(w122[0], w130[0]), add_float(w122[1], w130[1]) }; const float w135[2] = { sub_float(w122[0], w130[0]), sub_float(w122[1], w130[1]) }; const float w136[2] = { add_float(w124[0], mul_float(kWeight2, add_float(w132[0], w132[1]))), add_float(w124[1], mul_float(kWeight2, sub_float(w132[1], w132[0]))) }; const float w137[2] = { add_float(w124[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w132[0])), mul_float(kWeight2, w132[1]))), add_float(w124[1], mul_float(kWeight2, sub_float(w132[0], w132[1]))) }; const float w138[2] = { add_float(w123[0], w131[1]), sub_float(w123[1], w131[0]) }; const float w139[2] = { sub_float(w123[0], w131[1]), add_float(w123[1], w131[0]) }; const float w140[2] = { sub_float(w125[0], mul_float(kWeight2, sub_float(w133[0], w133[1]))), sub_float(w125[1], mul_float(kWeight2, add_float(w133[1], w133[0]))) }; const float w141[2] = { add_float(w125[0], mul_float(kWeight2, sub_float(w133[0], w133[1]))), add_float(w125[1], mul_float(kWeight2, add_float(w133[1], w133[0]))) }; const float w142[2] = { add_float(w110[0], w134[0]), add_float(w110[1], w134[1]) }; const float w143[2] = { sub_float(w110[0], w134[0]), sub_float(w110[1], w134[1]) }; const float w144[2] = { add_float(w112[0], add_float(mul_float(kWeight3, w136[0]), mul_float(kWeight4, w136[1]))), add_float(w112[1], sub_float(mul_float(kWeight3, w136[1]), mul_float(kWeight4, w136[0]))) }; const float w145[2] = { add_float(w112[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w136[0])), mul_float(kWeight4, w136[1]))), add_float(w112[1], sub_float(mul_float(kWeight4, w136[0]), mul_float(kWeight3, w136[1]))) }; const float w146[2] = { add_float(w114[0], mul_float(kWeight2, add_float(w138[0], w138[1]))), add_float(w114[1], mul_float(kWeight2, sub_float(w138[1], w138[0]))) }; const float w147[2] = { add_float(w114[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w138[0])), mul_float(kWeight2, w138[1]))), add_float(w114[1], mul_float(kWeight2, sub_float(w138[0], w138[1]))) }; const float w148[2] = { add_float(w116[0], add_float(mul_float(kWeight4, w140[0]), mul_float(kWeight3, w140[1]))), add_float(w116[1], sub_float(mul_float(kWeight4, w140[1]), mul_float(kWeight3, w140[0]))) }; const float w149[2] = { add_float(w116[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w140[0])), mul_float(kWeight3, w140[1]))), add_float(w116[1], sub_float(mul_float(kWeight3, w140[0]), mul_float(kWeight4, w140[1]))) }; const float w150[2] = { add_float(w111[0], w135[1]), sub_float(w111[1], w135[0]) }; const float w151[2] = { sub_float(w111[0], w135[1]), add_float(w111[1], w135[0]) }; const float w152[2] = { sub_float(w113[0], sub_float(mul_float(kWeight4, w137[0]), mul_float(kWeight3, w137[1]))), add_float(w113[1], sub_float(sub_float(kWeight0, mul_float(kWeight4, w137[1])), mul_float(kWeight3, w137[0]))) }; const float w153[2] = { add_float(w113[0], sub_float(mul_float(kWeight4, w137[0]), mul_float(kWeight3, w137[1]))), add_float(w113[1], add_float(mul_float(kWeight4, w137[1]), mul_float(kWeight3, w137[0]))) }; const float w154[2] = { sub_float(w115[0], mul_float(kWeight2, sub_float(w139[0], w139[1]))), sub_float(w115[1], mul_float(kWeight2, add_float(w139[1], w139[0]))) }; const float w155[2] = { add_float(w115[0], mul_float(kWeight2, sub_float(w139[0], w139[1]))), add_float(w115[1], mul_float(kWeight2, add_float(w139[1], w139[0]))) }; const float w156[2] = { sub_float(w117[0], sub_float(mul_float(kWeight3, w141[0]), mul_float(kWeight4, w141[1]))), add_float(w117[1], sub_float(sub_float(kWeight0, mul_float(kWeight3, w141[1])), mul_float(kWeight4, w141[0]))) }; const float w157[2] = { add_float(w117[0], sub_float(mul_float(kWeight3, w141[0]), mul_float(kWeight4, w141[1]))), add_float(w117[1], add_float(mul_float(kWeight3, w141[1]), mul_float(kWeight4, w141[0]))) }; store_float(output + 0 * stride, add_float(w78[0], w142[0])); store_float(output + 1 * stride, add_float(w80[0], add_float(mul_float(kWeight5, w144[0]), mul_float(kWeight6, w144[1])))); store_float(output + 2 * stride, add_float(w82[0], add_float(mul_float(kWeight3, w146[0]), mul_float(kWeight4, w146[1])))); store_float(output + 3 * stride, add_float(w84[0], add_float(mul_float(kWeight7, w148[0]), mul_float(kWeight8, w148[1])))); store_float(output + 4 * stride, add_float(w86[0], mul_float(kWeight2, add_float(w150[0], w150[1])))); store_float(output + 5 * stride, add_float(w88[0], add_float(mul_float(kWeight8, w152[0]), mul_float(kWeight7, w152[1])))); store_float(output + 6 * stride, add_float(w90[0], add_float(mul_float(kWeight4, w154[0]), mul_float(kWeight3, w154[1])))); store_float(output + 7 * stride, add_float(w92[0], add_float(mul_float(kWeight6, w156[0]), mul_float(kWeight5, w156[1])))); store_float(output + 8 * stride, add_float(w79[0], w143[1])); store_float(output + 9 * stride, sub_float(w81[0], sub_float(mul_float(kWeight6, w145[0]), mul_float(kWeight5, w145[1])))); store_float(output + 10 * stride, sub_float(w83[0], sub_float(mul_float(kWeight4, w147[0]), mul_float(kWeight3, w147[1])))); store_float(output + 11 * stride, sub_float(w85[0], sub_float(mul_float(kWeight8, w149[0]), mul_float(kWeight7, w149[1])))); store_float(output + 12 * stride, sub_float(w87[0], mul_float(kWeight2, sub_float(w151[0], w151[1])))); store_float(output + 13 * stride, sub_float(w89[0], sub_float(mul_float(kWeight7, w153[0]), mul_float(kWeight8, w153[1])))); store_float(output + 14 * stride, sub_float(w91[0], sub_float(mul_float(kWeight3, w155[0]), mul_float(kWeight4, w155[1])))); store_float(output + 15 * stride, sub_float(w93[0], sub_float(mul_float(kWeight5, w157[0]), mul_float(kWeight6, w157[1])))); store_float(output + 16 * stride, sub_float(w78[0], w142[0])); store_float(output + 17 * stride, add_float(w80[0], sub_float(sub_float(kWeight0, mul_float(kWeight5, w144[0])), mul_float(kWeight6, w144[1])))); store_float(output + 18 * stride, add_float(w82[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w146[0])), mul_float(kWeight4, w146[1])))); store_float(output + 19 * stride, add_float(w84[0], sub_float(sub_float(kWeight0, mul_float(kWeight7, w148[0])), mul_float(kWeight8, w148[1])))); store_float(output + 20 * stride, add_float(w86[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w150[0])), mul_float(kWeight2, w150[1])))); store_float(output + 21 * stride, add_float(w88[0], sub_float(sub_float(kWeight0, mul_float(kWeight8, w152[0])), mul_float(kWeight7, w152[1])))); store_float(output + 22 * stride, add_float(w90[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w154[0])), mul_float(kWeight3, w154[1])))); store_float(output + 23 * stride, add_float(w92[0], sub_float(sub_float(kWeight0, mul_float(kWeight6, w156[0])), mul_float(kWeight5, w156[1])))); store_float(output + 24 * stride, sub_float(w79[0], w143[1])); store_float(output + 25 * stride, add_float(w81[0], sub_float(mul_float(kWeight6, w145[0]), mul_float(kWeight5, w145[1])))); store_float(output + 26 * stride, add_float(w83[0], sub_float(mul_float(kWeight4, w147[0]), mul_float(kWeight3, w147[1])))); store_float(output + 27 * stride, add_float(w85[0], sub_float(mul_float(kWeight8, w149[0]), mul_float(kWeight7, w149[1])))); store_float(output + 28 * stride, add_float(w87[0], mul_float(kWeight2, sub_float(w151[0], w151[1])))); store_float(output + 29 * stride, add_float(w89[0], sub_float(mul_float(kWeight7, w153[0]), mul_float(kWeight8, w153[1])))); store_float(output + 30 * stride, add_float(w91[0], sub_float(mul_float(kWeight3, w155[0]), mul_float(kWeight4, w155[1])))); store_float(output + 31 * stride, add_float(w93[0], sub_float(mul_float(kWeight5, w157[0]), mul_float(kWeight6, w157[1])))); };
void aom_ifft2x2_float_c(const float *input, float *temp, float *output) {
aom_ifft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, aom_fft1d_2_float,
aom_ifft1d_2_float, simple_transpose, 1);
}
void aom_ifft4x4_float_c(const float *input, float *temp, float *output) {
aom_ifft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, aom_fft1d_4_float,
aom_ifft1d_4_float, simple_transpose, 1);
}
void aom_ifft8x8_float_c(const float *input, float *temp, float *output) {
aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_float,
aom_ifft1d_8_float, simple_transpose, 1);
}
void aom_ifft16x16_float_c(const float *input, float *temp, float *output) {
aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float,
aom_fft1d_16_float, aom_ifft1d_16_float, simple_transpose, 1);
}
void aom_ifft32x32_float_c(const float *input, float *temp, float *output) {
aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float,
aom_fft1d_32_float, aom_ifft1d_32_float, simple_transpose, 1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment