-
-
Save glandium/273f01e7c825750854e68d9bf6049d4f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
clang -std=gnu99 --target=arm-linux-androideabi -o fft.o -c -march=armv7-a -mthumb -Oz fft.i |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 1 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft.c" | |
# 1 "<built-in>" 1 | |
# 1 "<built-in>" 3 | |
# 388 "<built-in>" 3 | |
# 1 "<command line>" 1 | |
# 1 "<built-in>" 2 | |
# 1 "/builds/worker/checkouts/gecko/config/gcc_hidden.h" 1 | |
#pragma GCC visibility push(hidden) | |
# 2 "<built-in>" 2 | |
# 1 "/builds/worker/workspace/obj-build/mozilla-config.h" 1 | |
# 10 "/builds/worker/workspace/obj-build/mozilla-config.h" | |
#pragma clang diagnostic push | |
#pragma clang diagnostic ignored "-Wreserved-id-macro" | |
# 215 "/builds/worker/workspace/obj-build/mozilla-config.h" | |
#pragma clang diagnostic pop | |
# 3 "<built-in>" 2 | |
# 1 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft.c" 2 | |
# 12 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft.c" | |
# 1 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/aom_dsp_common.h" 1 | |
# 15 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/aom_dsp_common.h" | |
# 1 "/builds/worker/checkouts/gecko/media/libaom/config/linux/arm/config/aom_config.h" 1 | |
# 16 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/aom_dsp_common.h" 2 | |
# 1 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h" 1 | |
# 15 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h" | |
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/stddef.h" 1 | |
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/stddef.h" 3 | |
#pragma GCC visibility push(default) | |
# 1 "/builds/worker/fetches/clang/lib/clang/16.0.0/include/stddef.h" 1 3 | |
# 35 "/builds/worker/fetches/clang/lib/clang/16.0.0/include/stddef.h" 3 | |
typedef int ptrdiff_t; | |
# 46 "/builds/worker/fetches/clang/lib/clang/16.0.0/include/stddef.h" 3 | |
typedef unsigned int size_t; | |
# 74 "/builds/worker/fetches/clang/lib/clang/16.0.0/include/stddef.h" 3 | |
typedef unsigned int wchar_t; | |
# 4 "/builds/worker/workspace/obj-build/dist/system_wrappers/stddef.h" 2 3 | |
#pragma GCC visibility pop | |
# 16 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h" 2 | |
# 52 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h" | |
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/stdint.h" 1 | |
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/stdint.h" 3 | |
#pragma GCC visibility push(default) | |
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/stdint.h" 1 3 | |
# 32 "/builds/worker/fetches/android-ndk/sysroot/usr/include/stdint.h" 3 | |
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/bits/wchar_limits.h" 1 3 | |
# 36 "/builds/worker/fetches/android-ndk/sysroot/usr/include/bits/wchar_limits.h" 3 | |
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 1 3 | |
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 3 | |
#pragma GCC visibility push(default) | |
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/sys/cdefs.h" 1 3 | |
# 356 "/builds/worker/fetches/android-ndk/sysroot/usr/include/sys/cdefs.h" 3 | |
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/versioning.h" 1 3 | |
# 357 "/builds/worker/fetches/android-ndk/sysroot/usr/include/sys/cdefs.h" 2 3 | |
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/android/api-level.h" 1 3 | |
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/android/api-level.h" 3 | |
#pragma GCC visibility push(default) | |
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/api-level.h" 1 3 | |
# 39 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/api-level.h" 3 | |
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 1 3 | |
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 3 | |
#pragma GCC visibility push(default) | |
#pragma GCC visibility pop | |
# 40 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/api-level.h" 2 3 | |
# 150 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/api-level.h" 3 | |
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/bits/get_device_api_level_inlines.h" 1 3 | |
# 33 "/builds/worker/fetches/android-ndk/sysroot/usr/include/bits/get_device_api_level_inlines.h" 3 | |
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 1 3 | |
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 3 | |
#pragma GCC visibility push(default) | |
#pragma GCC visibility pop | |
# 34 "/builds/worker/fetches/android-ndk/sysroot/usr/include/bits/get_device_api_level_inlines.h" 2 3 | |
int __system_property_get(const char* __name, char* __value); | |
int atoi(const char* __s) __attribute__((__pure__)); | |
static __inline int android_get_device_api_level() { | |
char value[92] = { 0 }; | |
if (__system_property_get("ro.build.version.sdk", value) < 1) return -1; | |
int api_level = atoi(value); | |
return (api_level > 0) ? api_level : -1; | |
} | |
# 151 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/api-level.h" 2 3 | |
# 4 "/builds/worker/workspace/obj-build/dist/system_wrappers/android/api-level.h" 2 3 | |
#pragma GCC visibility pop | |
# 358 "/builds/worker/fetches/android-ndk/sysroot/usr/include/sys/cdefs.h" 2 3 | |
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/android/ndk-version.h" 1 3 | |
# 360 "/builds/worker/fetches/android-ndk/sysroot/usr/include/sys/cdefs.h" 2 3 | |
# 4 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 2 3 | |
#pragma GCC visibility pop | |
# 37 "/builds/worker/fetches/android-ndk/sysroot/usr/include/bits/wchar_limits.h" 2 3 | |
# 33 "/builds/worker/fetches/android-ndk/sysroot/usr/include/stdint.h" 2 3 | |
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/stddef.h" 1 3 | |
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/stddef.h" 3 | |
#pragma GCC visibility push(default) | |
# 1 "/builds/worker/fetches/clang/lib/clang/16.0.0/include/stddef.h" 1 3 | |
# 4 "/builds/worker/workspace/obj-build/dist/system_wrappers/stddef.h" 2 3 | |
#pragma GCC visibility pop | |
# 34 "/builds/worker/fetches/android-ndk/sysroot/usr/include/stdint.h" 2 3 | |
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 1 3 | |
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 3 | |
#pragma GCC visibility push(default) | |
#pragma GCC visibility pop | |
# 35 "/builds/worker/fetches/android-ndk/sysroot/usr/include/stdint.h" 2 3 | |
typedef signed char __int8_t; | |
typedef unsigned char __uint8_t; | |
typedef short __int16_t; | |
typedef unsigned short __uint16_t; | |
typedef int __int32_t; | |
typedef unsigned int __uint32_t; | |
typedef long long __int64_t; | |
typedef unsigned long long __uint64_t; | |
typedef int __intptr_t; | |
typedef unsigned int __uintptr_t; | |
typedef __int8_t int8_t; | |
typedef __uint8_t uint8_t; | |
typedef __int16_t int16_t; | |
typedef __uint16_t uint16_t; | |
typedef __int32_t int32_t; | |
typedef __uint32_t uint32_t; | |
typedef __int64_t int64_t; | |
typedef __uint64_t uint64_t; | |
typedef __intptr_t intptr_t; | |
typedef __uintptr_t uintptr_t; | |
typedef int8_t int_least8_t; | |
typedef uint8_t uint_least8_t; | |
typedef int16_t int_least16_t; | |
typedef uint16_t uint_least16_t; | |
typedef int32_t int_least32_t; | |
typedef uint32_t uint_least32_t; | |
typedef int64_t int_least64_t; | |
typedef uint64_t uint_least64_t; | |
typedef int8_t int_fast8_t; | |
typedef uint8_t uint_fast8_t; | |
typedef int64_t int_fast64_t; | |
typedef uint64_t uint_fast64_t; | |
typedef int32_t int_fast16_t; | |
typedef uint32_t uint_fast16_t; | |
typedef int32_t int_fast32_t; | |
typedef uint32_t uint_fast32_t; | |
typedef uint64_t uintmax_t; | |
typedef int64_t intmax_t; | |
# 4 "/builds/worker/workspace/obj-build/dist/system_wrappers/stdint.h" 2 3 | |
#pragma GCC visibility pop | |
# 53 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h" 2 | |
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/inttypes.h" 1 | |
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/inttypes.h" 3 | |
#pragma GCC visibility push(default) | |
# 1 "/builds/worker/fetches/android-ndk/sysroot/usr/include/inttypes.h" 1 3 | |
# 22 "/builds/worker/fetches/android-ndk/sysroot/usr/include/inttypes.h" 3 | |
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/stdint.h" 1 3 | |
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/stdint.h" 3 | |
#pragma GCC visibility push(default) | |
#pragma GCC visibility pop | |
# 23 "/builds/worker/fetches/android-ndk/sysroot/usr/include/inttypes.h" 2 3 | |
# 1 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 1 3 | |
# 2 "/builds/worker/workspace/obj-build/dist/system_wrappers/sys/cdefs.h" 3 | |
#pragma GCC visibility push(default) | |
#pragma GCC visibility pop | |
# 24 "/builds/worker/fetches/android-ndk/sysroot/usr/include/inttypes.h" 2 3 | |
# 252 "/builds/worker/fetches/android-ndk/sysroot/usr/include/inttypes.h" 3 | |
typedef struct { | |
intmax_t quot; | |
intmax_t rem; | |
} imaxdiv_t; | |
# 264 "/builds/worker/fetches/android-ndk/sysroot/usr/include/inttypes.h" 3 | |
intmax_t strtoimax(const char* __s, char** __end_ptr, int __base); | |
uintmax_t strtoumax(const char* __s, char** __end_ptr, int __base); | |
# 4 "/builds/worker/workspace/obj-build/dist/system_wrappers/inttypes.h" 2 3 | |
#pragma GCC visibility pop | |
# 61 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h" 2 | |
# 82 "/builds/worker/checkouts/gecko/third_party/aom/aom/aom_integer.h" | |
size_t aom_uleb_size_in_bytes(uint64_t value); | |
int aom_uleb_decode(const uint8_t *buffer, size_t available, uint64_t *value, | |
size_t *length); | |
int aom_uleb_encode(uint64_t value, size_t available, uint8_t *coded_value, | |
size_t *coded_size); | |
int aom_uleb_encode_fixed_size(uint64_t value, size_t available, | |
size_t pad_to_size, uint8_t *coded_value, | |
size_t *coded_size); | |
# 18 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/aom_dsp_common.h" 2 | |
# 1 "/builds/worker/checkouts/gecko/third_party/aom/aom_ports/mem.h" 1 | |
# 19 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/aom_dsp_common.h" 2 | |
# 52 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/aom_dsp_common.h" | |
typedef uint8_t qm_val_t; | |
typedef int64_t tran_high_t; | |
typedef int32_t tran_low_t; | |
static inline uint8_t clip_pixel(int val) { | |
return (val > 255) ? 255 : (val < 0) ? 0 : val; | |
} | |
static inline int clamp(int value, int low, int high) { | |
return value < low ? low : (value > high ? high : value); | |
} | |
static inline int64_t clamp64(int64_t value, int64_t low, int64_t high) { | |
return value < low ? low : (value > high ? high : value); | |
} | |
static inline double fclamp(double value, double low, double high) { | |
return value < low ? low : (value > high ? high : value); | |
} | |
static inline uint16_t clip_pixel_highbd(int val, int bd) { | |
switch (bd) { | |
case 8: | |
default: return (uint16_t)clamp(val, 0, 255); | |
case 10: return (uint16_t)clamp(val, 0, 1023); | |
case 12: return (uint16_t)clamp(val, 0, 4095); | |
} | |
} | |
static inline unsigned int negative_to_zero(int value) { | |
return value & ~(value >> (sizeof(value) * 8 - 1)); | |
} | |
# 13 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft.c" 2 | |
# 1 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft_common.h" 1 | |
# 45 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft_common.h" | |
typedef void (*aom_fft_1d_func_t)(const float *input, float *output, | |
int stride); | |
void aom_fft1d_4_float(const float *input, float *output, int stride); | |
void aom_fft1d_8_float(const float *input, float *output, int stride); | |
void aom_fft1d_16_float(const float *input, float *output, int stride); | |
void aom_fft1d_32_float(const float *input, float *output, int stride); | |
typedef void (*aom_fft_transpose_func_t)(const float *input, float *output, | |
int n); | |
# 73 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft_common.h" | |
typedef void (*aom_fft_unpack_func_t)(const float *input, float *output, int n); | |
# 90 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft_common.h" | |
void aom_fft_2d_gen(const float *input, float *temp, float *output, int n, | |
aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose, | |
aom_fft_unpack_func_t unpack, int vec_size); | |
# 106 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft_common.h" | |
void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n, | |
aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi, | |
aom_fft_1d_func_t ifft_multi, | |
aom_fft_transpose_func_t transpose, int vec_size); | |
# 14 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft.c" 2 | |
static inline void simple_transpose(const float *A, float *B, int n) { | |
for (int y = 0; y < n; y++) { | |
for (int x = 0; x < n; x++) { | |
B[y * n + x] = A[x * n + y]; | |
} | |
} | |
} | |
# 33 "/builds/worker/checkouts/gecko/third_party/aom/aom_dsp/fft.c" | |
static inline void unpack_2d_output(const float *col_fft, float *output, | |
int n) { | |
for (int y = 0; y <= n / 2; ++y) { | |
const int y2 = y + n / 2; | |
const int y_extra = y2 > n / 2 && y2 < n; | |
for (int x = 0; x <= n / 2; ++x) { | |
const int x2 = x + n / 2; | |
const int x_extra = x2 > n / 2 && x2 < n; | |
output[2 * (y * n + x)] = | |
col_fft[y * n + x] - (x_extra && y_extra ? col_fft[y2 * n + x2] : 0); | |
output[2 * (y * n + x) + 1] = (y_extra ? col_fft[y2 * n + x] : 0) + | |
(x_extra ? col_fft[y * n + x2] : 0); | |
if (y_extra) { | |
output[2 * ((n - y) * n + x)] = | |
col_fft[y * n + x] + | |
(x_extra && y_extra ? col_fft[y2 * n + x2] : 0); | |
output[2 * ((n - y) * n + x) + 1] = | |
-(y_extra ? col_fft[y2 * n + x] : 0) + | |
(x_extra ? col_fft[y * n + x2] : 0); | |
} | |
} | |
} | |
} | |
void aom_fft_2d_gen(const float *input, float *temp, float *output, int n, | |
aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose, | |
aom_fft_unpack_func_t unpack, int vec_size) { | |
for (int x = 0; x < n; x += vec_size) { | |
tform(input + x, output + x, n); | |
} | |
transpose(output, temp, n); | |
for (int x = 0; x < n; x += vec_size) { | |
tform(temp + x, output + x, n); | |
} | |
transpose(output, temp, n); | |
unpack(temp, output, n); | |
} | |
static inline void store_float(float *output, float input) { *output = input; } | |
static inline float add_float(float a, float b) { return a + b; } | |
static inline float sub_float(float a, float b) { return a - b; } | |
static inline float mul_float(float a, float b) { return a * b; } | |
void aom_fft1d_2_float(const float *input, float *output, int stride) { const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); store_float(output + 0 * stride, i0 + i1); store_float(output + 1 * stride, i0 - i1); }; | |
void aom_fft1d_4_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float w0 = add_float(i0, i2); const float w1 = sub_float(i0, i2); const float w2 = add_float(i1, i3); const float w3 = sub_float(i1, i3); store_float(output + 0 * stride, add_float(w0, w2)); store_float(output + 1 * stride, w1); store_float(output + 2 * stride, sub_float(w0, w2)); store_float(output + 3 * stride, sub_float(kWeight0, w3)); }; | |
void aom_fft1d_8_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float kWeight2 = (float)(0.707107f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float i4 = *(input + 4 * stride); const float i5 = *(input + 5 * stride); const float i6 = *(input + 6 * stride); const float i7 = *(input + 7 * stride); const float w0 = add_float(i0, i4); const float w1 = sub_float(i0, i4); const float w2 = add_float(i2, i6); const float w3 = sub_float(i2, i6); const float w4 = add_float(w0, w2); const float w5 = sub_float(w0, w2); const float w7 = add_float(i1, i5); const float w8 = sub_float(i1, i5); const float w9 = add_float(i3, i7); const float w10 = sub_float(i3, i7); const float w11 = add_float(w7, w9); const float w12 = sub_float(w7, w9); store_float(output + 0 * stride, add_float(w4, w11)); store_float(output + 1 * stride, add_float(w1, mul_float(kWeight2, sub_float(w8, w10)))); store_float(output + 2 * stride, w5); store_float(output + 3 * stride, sub_float(w1, mul_float(kWeight2, sub_float(w8, w10)))); store_float(output + 4 * stride, sub_float(w4, w11)); store_float(output + 5 * stride, sub_float(sub_float(kWeight0, w3), mul_float(kWeight2, add_float(w10, w8)))); store_float(output + 6 * stride, sub_float(kWeight0, w12)); store_float(output + 7 * stride, sub_float(w3, mul_float(kWeight2, add_float(w10, w8)))); }; | |
void aom_fft1d_16_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float kWeight2 = (float)(0.707107f); const float kWeight3 = (float)(0.92388f); const float kWeight4 = (float)(0.382683f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float i4 = *(input + 4 * stride); const float i5 = *(input + 5 * stride); const float i6 = *(input + 6 * stride); const float i7 = *(input + 7 * stride); const float i8 = *(input + 8 * stride); const float i9 = *(input + 9 * stride); const float i10 = *(input + 10 * stride); const float i11 = *(input + 11 * stride); const float i12 = *(input + 12 * stride); const float i13 = *(input + 13 * stride); const float i14 = *(input + 14 * stride); const float i15 = *(input + 15 * stride); const float w0 = add_float(i0, i8); const float w1 = sub_float(i0, i8); const float w2 = add_float(i4, i12); const float w3 = sub_float(i4, i12); const float w4 = add_float(w0, w2); const float w5 = sub_float(w0, w2); const float w7 = add_float(i2, i10); const float w8 = sub_float(i2, i10); const float w9 = add_float(i6, i14); const float w10 = sub_float(i6, i14); const float w11 = add_float(w7, w9); const float w12 = sub_float(w7, w9); const float w14 = add_float(w4, w11); const float w15 = sub_float(w4, w11); const float w16[2] = { add_float(w1, mul_float(kWeight2, sub_float(w8, w10))), sub_float(sub_float(kWeight0, w3), mul_float(kWeight2, add_float(w10, w8))) }; const float w18[2] = { sub_float(w1, mul_float(kWeight2, sub_float(w8, w10))), sub_float(w3, mul_float(kWeight2, add_float(w10, w8))) }; const float w19 = add_float(i1, i9); const float w20 = sub_float(i1, i9); const float w21 = add_float(i5, i13); const float w22 = sub_float(i5, i13); const float w23 = add_float(w19, w21); const float w24 = sub_float(w19, w21); const float w26 = add_float(i3, i11); const float w27 = sub_float(i3, i11); const float w28 = add_float(i7, i15); const float w29 = sub_float(i7, i15); const float w30 = add_float(w26, w28); const float w31 = sub_float(w26, w28); const float w33 = add_float(w23, w30); const float w34 = sub_float(w23, w30); const float w35[2] = { add_float(w20, mul_float(kWeight2, sub_float(w27, w29))), sub_float(sub_float(kWeight0, w22), mul_float(kWeight2, add_float(w29, w27))) }; const float w37[2] = { sub_float(w20, mul_float(kWeight2, sub_float(w27, w29))), sub_float(w22, mul_float(kWeight2, add_float(w29, w27))) }; store_float(output + 0 * stride, add_float(w14, w33)); store_float(output + 1 * stride, add_float(w16[0], add_float(mul_float(kWeight3, w35[0]), mul_float(kWeight4, w35[1])))); store_float(output + 2 * stride, add_float(w5, mul_float(kWeight2, sub_float(w24, w31)))); store_float(output + 3 * stride, add_float(w18[0], add_float(mul_float(kWeight4, w37[0]), mul_float(kWeight3, w37[1])))); store_float(output + 4 * stride, w15); store_float(output + 5 * stride, add_float(w18[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w37[0])), mul_float(kWeight3, w37[1])))); store_float(output + 6 * stride, sub_float(w5, mul_float(kWeight2, sub_float(w24, w31)))); store_float(output + 7 * stride, add_float(w16[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w35[0])), mul_float(kWeight4, w35[1])))); store_float(output + 8 * stride, sub_float(w14, w33)); store_float(output + 9 * stride, add_float(w16[1], sub_float(mul_float(kWeight3, w35[1]), mul_float(kWeight4, w35[0])))); store_float(output + 10 * stride, sub_float(sub_float(kWeight0, w12), mul_float(kWeight2, add_float(w31, w24)))); store_float(output + 11 * stride, add_float(w18[1], sub_float(mul_float(kWeight4, w37[1]), mul_float(kWeight3, w37[0])))); store_float(output + 12 * stride, sub_float(kWeight0, w34)); store_float(output + 13 * stride, sub_float(sub_float(kWeight0, w18[1]), sub_float(mul_float(kWeight3, w37[0]), mul_float(kWeight4, w37[1])))); store_float(output + 14 * stride, sub_float(w12, mul_float(kWeight2, add_float(w31, w24)))); store_float(output + 15 * stride, sub_float(sub_float(kWeight0, w16[1]), sub_float(mul_float(kWeight4, w35[0]), mul_float(kWeight3, w35[1])))); }; | |
void aom_fft1d_32_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float kWeight2 = (float)(0.707107f); const float kWeight3 = (float)(0.92388f); const float kWeight4 = (float)(0.382683f); const float kWeight5 = (float)(0.980785f); const float kWeight6 = (float)(0.19509f); const float kWeight7 = (float)(0.83147f); const float kWeight8 = (float)(0.55557f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float i4 = *(input + 4 * stride); const float i5 = *(input + 5 * stride); const float i6 = *(input + 6 * stride); const float i7 = *(input + 7 * stride); const float i8 = *(input + 8 * stride); const float i9 = *(input + 9 * stride); const float i10 = *(input + 10 * stride); const float i11 = *(input + 11 * stride); const float i12 = *(input + 12 * stride); const float i13 = *(input + 13 * stride); const float i14 = *(input + 14 * stride); const float i15 = *(input + 15 * stride); const float i16 = *(input + 16 * stride); const float i17 = *(input + 17 * stride); const float i18 = *(input + 18 * stride); const float i19 = *(input + 19 * stride); const float i20 = *(input + 20 * stride); const float i21 = *(input + 21 * stride); const float i22 = *(input + 22 * stride); const float i23 = *(input + 23 * stride); const float i24 = *(input + 24 * stride); const float i25 = *(input + 25 * stride); const float i26 = *(input + 26 * stride); const float i27 = *(input + 27 * stride); const float i28 = *(input + 28 * stride); const float i29 = *(input + 29 * stride); const float i30 = *(input + 30 * stride); const float i31 = *(input + 31 * stride); const float w0 = add_float(i0, i16); const float w1 = sub_float(i0, i16); const float w2 = add_float(i8, i24); const float w3 = sub_float(i8, i24); const float w4 = add_float(w0, w2); const float w5 = sub_float(w0, w2); const float w7 = add_float(i4, i20); const float w8 = sub_float(i4, i20); const float w9 = add_float(i12, i28); const float w10 = sub_float(i12, i28); const float w11 = add_float(w7, w9); const float w12 = sub_float(w7, w9); const float w14 = add_float(w4, w11); const float w15 = sub_float(w4, w11); const float w16[2] = { add_float(w1, mul_float(kWeight2, sub_float(w8, w10))), sub_float(sub_float(kWeight0, w3), mul_float(kWeight2, add_float(w10, w8))) }; const float w18[2] = { sub_float(w1, mul_float(kWeight2, sub_float(w8, w10))), sub_float(w3, mul_float(kWeight2, add_float(w10, w8))) }; const float w19 = add_float(i2, i18); const float w20 = sub_float(i2, i18); const float w21 = add_float(i10, i26); const float w22 = sub_float(i10, i26); const float w23 = add_float(w19, w21); const float w24 = sub_float(w19, w21); const float w26 = add_float(i6, i22); const float w27 = sub_float(i6, i22); const float w28 = add_float(i14, i30); const float w29 = sub_float(i14, i30); const float w30 = add_float(w26, w28); const float w31 = sub_float(w26, w28); const float w33 = add_float(w23, w30); const float w34 = sub_float(w23, w30); const float w35[2] = { add_float(w20, mul_float(kWeight2, sub_float(w27, w29))), sub_float(sub_float(kWeight0, w22), mul_float(kWeight2, add_float(w29, w27))) }; const float w37[2] = { sub_float(w20, mul_float(kWeight2, sub_float(w27, w29))), sub_float(w22, mul_float(kWeight2, add_float(w29, w27))) }; const float w38 = add_float(w14, w33); const float w39 = sub_float(w14, w33); const float w40[2] = { add_float(w16[0], add_float(mul_float(kWeight3, w35[0]), mul_float(kWeight4, w35[1]))), add_float(w16[1], sub_float(mul_float(kWeight3, w35[1]), mul_float(kWeight4, w35[0]))) }; const float w41[2] = { add_float(w5, mul_float(kWeight2, sub_float(w24, w31))), sub_float(sub_float(kWeight0, w12), mul_float(kWeight2, add_float(w31, w24))) }; const float w42[2] = { add_float(w18[0], add_float(mul_float(kWeight4, w37[0]), mul_float(kWeight3, w37[1]))), add_float(w18[1], sub_float(mul_float(kWeight4, w37[1]), mul_float(kWeight3, w37[0]))) }; const float w44[2] = { add_float(w18[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w37[0])), mul_float(kWeight3, w37[1]))), sub_float(sub_float(kWeight0, w18[1]), sub_float(mul_float(kWeight3, w37[0]), mul_float(kWeight4, w37[1]))) }; const float w45[2] = { sub_float(w5, mul_float(kWeight2, sub_float(w24, w31))), sub_float(w12, mul_float(kWeight2, add_float(w31, w24))) }; const float w46[2] = { add_float(w16[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w35[0])), mul_float(kWeight4, w35[1]))), sub_float(sub_float(kWeight0, w16[1]), sub_float(mul_float(kWeight4, w35[0]), mul_float(kWeight3, w35[1]))) }; const float w47 = add_float(i1, i17); const float w48 = sub_float(i1, i17); const float w49 = add_float(i9, i25); const float w50 = sub_float(i9, i25); const float w51 = add_float(w47, w49); const float w52 = sub_float(w47, w49); const float w54 = add_float(i5, i21); const float w55 = sub_float(i5, i21); const float w56 = add_float(i13, i29); const float w57 = sub_float(i13, i29); const float w58 = add_float(w54, w56); const float w59 = sub_float(w54, w56); const float w61 = add_float(w51, w58); const float w62 = sub_float(w51, w58); const float w63[2] = { add_float(w48, mul_float(kWeight2, sub_float(w55, w57))), sub_float(sub_float(kWeight0, w50), mul_float(kWeight2, add_float(w57, w55))) }; const float w65[2] = { sub_float(w48, mul_float(kWeight2, sub_float(w55, w57))), sub_float(w50, mul_float(kWeight2, add_float(w57, w55))) }; const float w66 = add_float(i3, i19); const float w67 = sub_float(i3, i19); const float w68 = add_float(i11, i27); const float w69 = sub_float(i11, i27); const float w70 = add_float(w66, w68); const float w71 = sub_float(w66, w68); const float w73 = add_float(i7, i23); const float w74 = sub_float(i7, i23); const float w75 = add_float(i15, i31); const float w76 = sub_float(i15, i31); const float w77 = add_float(w73, w75); const float w78 = sub_float(w73, w75); const float w80 = add_float(w70, w77); const float w81 = sub_float(w70, w77); const float w82[2] = { add_float(w67, mul_float(kWeight2, sub_float(w74, w76))), sub_float(sub_float(kWeight0, w69), mul_float(kWeight2, add_float(w76, w74))) }; const float w84[2] = { sub_float(w67, mul_float(kWeight2, sub_float(w74, w76))), sub_float(w69, mul_float(kWeight2, add_float(w76, w74))) }; const float w85 = add_float(w61, w80); const float w86 = sub_float(w61, w80); const float w87[2] = { add_float(w63[0], add_float(mul_float(kWeight3, w82[0]), mul_float(kWeight4, w82[1]))), add_float(w63[1], sub_float(mul_float(kWeight3, w82[1]), mul_float(kWeight4, w82[0]))) }; const float w88[2] = { add_float(w52, mul_float(kWeight2, sub_float(w71, w78))), sub_float(sub_float(kWeight0, w59), mul_float(kWeight2, add_float(w78, w71))) }; const float w89[2] = { add_float(w65[0], add_float(mul_float(kWeight4, w84[0]), mul_float(kWeight3, w84[1]))), add_float(w65[1], sub_float(mul_float(kWeight4, w84[1]), mul_float(kWeight3, w84[0]))) }; const float w91[2] = { add_float(w65[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w84[0])), mul_float(kWeight3, w84[1]))), sub_float(sub_float(kWeight0, w65[1]), sub_float(mul_float(kWeight3, w84[0]), mul_float(kWeight4, w84[1]))) }; const float w92[2] = { sub_float(w52, mul_float(kWeight2, sub_float(w71, w78))), sub_float(w59, mul_float(kWeight2, add_float(w78, w71))) }; const float w93[2] = { add_float(w63[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w82[0])), mul_float(kWeight4, w82[1]))), sub_float(sub_float(kWeight0, w63[1]), sub_float(mul_float(kWeight4, w82[0]), mul_float(kWeight3, w82[1]))) }; store_float(output + 0 * stride, add_float(w38, w85)); store_float(output + 1 * stride, add_float(w40[0], add_float(mul_float(kWeight5, w87[0]), mul_float(kWeight6, w87[1])))); store_float(output + 2 * stride, add_float(w41[0], add_float(mul_float(kWeight3, w88[0]), mul_float(kWeight4, w88[1])))); store_float(output + 3 * stride, add_float(w42[0], add_float(mul_float(kWeight7, w89[0]), mul_float(kWeight8, w89[1])))); store_float(output + 4 * stride, add_float(w15, mul_float(kWeight2, sub_float(w62, w81)))); store_float(output + 5 * stride, add_float(w44[0], add_float(mul_float(kWeight8, w91[0]), mul_float(kWeight7, w91[1])))); store_float(output + 6 * stride, add_float(w45[0], add_float(mul_float(kWeight4, w92[0]), mul_float(kWeight3, w92[1])))); store_float(output + 7 * stride, add_float(w46[0], add_float(mul_float(kWeight6, w93[0]), mul_float(kWeight5, w93[1])))); store_float(output + 8 * stride, w39); store_float(output + 9 * stride, add_float(w46[0], sub_float(sub_float(kWeight0, mul_float(kWeight6, w93[0])), mul_float(kWeight5, w93[1])))); store_float(output + 10 * stride, add_float(w45[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w92[0])), mul_float(kWeight3, w92[1])))); store_float(output + 11 * stride, add_float(w44[0], sub_float(sub_float(kWeight0, mul_float(kWeight8, w91[0])), mul_float(kWeight7, w91[1])))); store_float(output + 12 * stride, sub_float(w15, mul_float(kWeight2, sub_float(w62, w81)))); store_float(output + 13 * stride, add_float(w42[0], sub_float(sub_float(kWeight0, mul_float(kWeight7, w89[0])), mul_float(kWeight8, w89[1])))); store_float(output + 14 * stride, add_float(w41[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w88[0])), mul_float(kWeight4, w88[1])))); store_float(output + 15 * stride, add_float(w40[0], sub_float(sub_float(kWeight0, mul_float(kWeight5, w87[0])), mul_float(kWeight6, w87[1])))); store_float(output + 16 * stride, sub_float(w38, w85)); store_float(output + 17 * stride, add_float(w40[1], sub_float(mul_float(kWeight5, w87[1]), mul_float(kWeight6, w87[0])))); store_float(output + 18 * stride, add_float(w41[1], sub_float(mul_float(kWeight3, w88[1]), mul_float(kWeight4, w88[0])))); store_float(output + 19 * stride, add_float(w42[1], sub_float(mul_float(kWeight7, w89[1]), mul_float(kWeight8, w89[0])))); store_float(output + 20 * stride, sub_float(sub_float(kWeight0, w34), mul_float(kWeight2, add_float(w81, w62)))); store_float(output + 21 * stride, add_float(w44[1], sub_float(mul_float(kWeight8, w91[1]), mul_float(kWeight7, w91[0])))); store_float(output + 22 * stride, add_float(w45[1], sub_float(mul_float(kWeight4, w92[1]), mul_float(kWeight3, w92[0])))); store_float(output + 23 * stride, add_float(w46[1], sub_float(mul_float(kWeight6, w93[1]), mul_float(kWeight5, w93[0])))); store_float(output + 24 * stride, sub_float(kWeight0, w86)); store_float(output + 25 * stride, sub_float(sub_float(kWeight0, w46[1]), sub_float(mul_float(kWeight5, w93[0]), mul_float(kWeight6, w93[1])))); store_float(output + 26 * stride, sub_float(sub_float(kWeight0, w45[1]), sub_float(mul_float(kWeight3, w92[0]), mul_float(kWeight4, w92[1])))); store_float(output + 27 * stride, sub_float(sub_float(kWeight0, w44[1]), sub_float(mul_float(kWeight7, w91[0]), mul_float(kWeight8, w91[1])))); store_float(output + 28 * stride, sub_float(w34, mul_float(kWeight2, add_float(w81, w62)))); store_float(output + 29 * stride, sub_float(sub_float(kWeight0, w42[1]), sub_float(mul_float(kWeight8, w89[0]), mul_float(kWeight7, w89[1])))); store_float(output + 30 * stride, sub_float(sub_float(kWeight0, w41[1]), sub_float(mul_float(kWeight4, w88[0]), mul_float(kWeight3, w88[1])))); store_float(output + 31 * stride, sub_float(sub_float(kWeight0, w40[1]), sub_float(mul_float(kWeight6, w87[0]), mul_float(kWeight5, w87[1])))); }; | |
void aom_fft2x2_float_c(const float *input, float *temp, float *output) { | |
aom_fft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, simple_transpose, | |
unpack_2d_output, 1); | |
} | |
void aom_fft4x4_float_c(const float *input, float *temp, float *output) { | |
aom_fft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, simple_transpose, | |
unpack_2d_output, 1); | |
} | |
void aom_fft8x8_float_c(const float *input, float *temp, float *output) { | |
aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, simple_transpose, | |
unpack_2d_output, 1); | |
} | |
void aom_fft16x16_float_c(const float *input, float *temp, float *output) { | |
aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_float, simple_transpose, | |
unpack_2d_output, 1); | |
} | |
void aom_fft32x32_float_c(const float *input, float *temp, float *output) { | |
aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_float, simple_transpose, | |
unpack_2d_output, 1); | |
} | |
void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n, | |
aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi, | |
aom_fft_1d_func_t ifft_multi, | |
aom_fft_transpose_func_t transpose, int vec_size) { | |
for (int y = 0; y <= n / 2; ++y) { | |
output[y * n] = input[2 * y * n]; | |
output[y * n + 1] = input[2 * (y * n + n / 2)]; | |
} | |
for (int y = n / 2 + 1; y < n; ++y) { | |
output[y * n] = input[2 * (y - n / 2) * n + 1]; | |
output[y * n + 1] = input[2 * ((y - n / 2) * n + n / 2) + 1]; | |
} | |
for (int i = 0; i < 2; i += vec_size) { | |
ifft_multi(output + i, temp + i, n); | |
} | |
for (int y = 0; y < n; ++y) { | |
for (int x = 1; x < n / 2; ++x) { | |
output[y * n + (x + 1)] = input[2 * (y * n + x)]; | |
} | |
for (int x = 1; x < n / 2; ++x) { | |
output[y * n + (x + n / 2)] = input[2 * (y * n + x) + 1]; | |
} | |
} | |
for (int y = 2; y < vec_size; y++) { | |
fft_single(output + y, temp + y, n); | |
} | |
for (int y = (((2) > (vec_size)) ? (2) : (vec_size)); y < n; y += vec_size) { | |
fft_multi(output + y, temp + y, n); | |
} | |
for (int x = 0; x < n; ++x) { | |
output[x] = temp[x * n]; | |
output[(n / 2) * n + x] = temp[x * n + 1]; | |
} | |
for (int y = 1; y < n / 2; ++y) { | |
for (int x = 0; x <= n / 2; ++x) { | |
output[x + y * n] = | |
temp[(y + 1) + x * n] + | |
((x > 0 && x < n / 2) ? temp[(y + n / 2) + (x + n / 2) * n] : 0); | |
} | |
for (int x = n / 2 + 1; x < n; ++x) { | |
output[x + y * n] = temp[(y + 1) + (n - x) * n] - | |
temp[(y + n / 2) + ((n - x) + n / 2) * n]; | |
} | |
for (int x = 0; x <= n / 2; ++x) { | |
output[x + (y + n / 2) * n] = | |
temp[(y + n / 2) + x * n] - | |
((x > 0 && x < n / 2) ? temp[(y + 1) + (x + n / 2) * n] : 0); | |
} | |
for (int x = n / 2 + 1; x < n; ++x) { | |
output[x + (y + n / 2) * n] = temp[(y + 1) + ((n - x) + n / 2) * n] + | |
temp[(y + n / 2) + (n - x) * n]; | |
} | |
} | |
for (int y = 0; y < n; y += vec_size) { | |
ifft_multi(output + y, temp + y, n); | |
} | |
transpose(temp, output, n); | |
} | |
void aom_ifft1d_2_float(const float *input, float *output, int stride) { const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); store_float(output + 0 * stride, i0 + i1); store_float(output + 1 * stride, i0 - i1); }; | |
void aom_ifft1d_4_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float w2 = add_float(i0, i2); const float w3 = sub_float(i0, i2); const float w4[2] = { add_float(i1, i1), sub_float(i3, i3) }; const float w5[2] = { sub_float(i1, i1), sub_float(sub_float(kWeight0, i3), i3) }; store_float(output + 0 * stride, add_float(w2, w4[0])); store_float(output + 1 * stride, add_float(w3, w5[1])); store_float(output + 2 * stride, sub_float(w2, w4[0])); store_float(output + 3 * stride, sub_float(w3, w5[1])); }; | |
void aom_ifft1d_8_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float kWeight2 = (float)(0.707107f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float i4 = *(input + 4 * stride); const float i5 = *(input + 5 * stride); const float i6 = *(input + 6 * stride); const float i7 = *(input + 7 * stride); const float w6 = add_float(i0, i4); const float w7 = sub_float(i0, i4); const float w8[2] = { add_float(i2, i2), sub_float(i6, i6) }; const float w9[2] = { sub_float(i2, i2), sub_float(sub_float(kWeight0, i6), i6) }; const float w10[2] = { add_float(w6, w8[0]), w8[1] }; const float w11[2] = { sub_float(w6, w8[0]), sub_float(kWeight0, w8[1]) }; const float w12[2] = { add_float(w7, w9[1]), sub_float(kWeight0, w9[0]) }; const float w13[2] = { sub_float(w7, w9[1]), w9[0] }; const float w14[2] = { add_float(i1, i3), sub_float(i7, i5) }; const float w15[2] = { sub_float(i1, i3), sub_float(sub_float(kWeight0, i5), i7) }; const float w16[2] = { add_float(i3, i1), sub_float(i5, i7) }; const float w17[2] = { sub_float(i3, i1), sub_float(sub_float(kWeight0, i7), i5) }; const float w18[2] = { add_float(w14[0], w16[0]), add_float(w14[1], w16[1]) }; const float w19[2] = { sub_float(w14[0], w16[0]), sub_float(w14[1], w16[1]) }; const float w20[2] = { add_float(w15[0], w17[1]), sub_float(w15[1], w17[0]) }; const float w21[2] = { sub_float(w15[0], w17[1]), add_float(w15[1], w17[0]) }; store_float(output + 0 * stride, add_float(w10[0], w18[0])); store_float(output + 1 * stride, add_float(w12[0], mul_float(kWeight2, add_float(w20[0], w20[1])))); store_float(output + 2 * stride, add_float(w11[0], w19[1])); store_float(output + 3 * stride, sub_float(w13[0], mul_float(kWeight2, sub_float(w21[0], w21[1])))); store_float(output + 4 * stride, sub_float(w10[0], w18[0])); store_float(output + 5 * stride, add_float(w12[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w20[0])), mul_float(kWeight2, w20[1])))); store_float(output + 6 * stride, sub_float(w11[0], w19[1])); store_float(output + 7 * stride, add_float(w13[0], mul_float(kWeight2, sub_float(w21[0], w21[1])))); }; | |
void aom_ifft1d_16_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float kWeight2 = (float)(0.707107f); const float kWeight3 = (float)(0.92388f); const float kWeight4 = (float)(0.382683f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float i4 = *(input + 4 * stride); const float i5 = *(input + 5 * stride); const float i6 = *(input + 6 * stride); const float i7 = *(input + 7 * stride); const float i8 = *(input + 8 * stride); const float i9 = *(input + 9 * stride); const float i10 = *(input + 10 * stride); const float i11 = *(input + 11 * stride); const float i12 = *(input + 12 * stride); const float i13 = *(input + 13 * stride); const float i14 = *(input + 14 * stride); const float i15 = *(input + 15 * stride); const float w14 = add_float(i0, i8); const float w15 = sub_float(i0, i8); const float w16[2] = { add_float(i4, i4), sub_float(i12, i12) }; const float w17[2] = { sub_float(i4, i4), sub_float(sub_float(kWeight0, i12), i12) }; const float w18[2] = { add_float(w14, w16[0]), w16[1] }; const float w19[2] = { sub_float(w14, w16[0]), sub_float(kWeight0, w16[1]) }; const float w20[2] = { add_float(w15, w17[1]), sub_float(kWeight0, w17[0]) }; const float w21[2] = { sub_float(w15, w17[1]), w17[0] }; const float w22[2] = { add_float(i2, i6), sub_float(i14, i10) }; const float w23[2] = { sub_float(i2, i6), sub_float(sub_float(kWeight0, i10), i14) }; const float w24[2] = { add_float(i6, i2), sub_float(i10, i14) }; const float w25[2] = { sub_float(i6, i2), sub_float(sub_float(kWeight0, i14), i10) }; const float w26[2] = { add_float(w22[0], w24[0]), add_float(w22[1], w24[1]) }; const float w27[2] = { sub_float(w22[0], w24[0]), sub_float(w22[1], w24[1]) }; const float w28[2] = { add_float(w23[0], w25[1]), sub_float(w23[1], w25[0]) }; const float w29[2] = { sub_float(w23[0], w25[1]), add_float(w23[1], w25[0]) }; const float w30[2] = { add_float(w18[0], w26[0]), add_float(w18[1], w26[1]) }; const float w31[2] = { sub_float(w18[0], w26[0]), sub_float(w18[1], w26[1]) }; const float w32[2] = { add_float(w20[0], mul_float(kWeight2, add_float(w28[0], w28[1]))), add_float(w20[1], mul_float(kWeight2, sub_float(w28[1], w28[0]))) }; const float w33[2] = { add_float(w20[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w28[0])), mul_float(kWeight2, w28[1]))), add_float(w20[1], mul_float(kWeight2, sub_float(w28[0], w28[1]))) }; const float w34[2] = { add_float(w19[0], w27[1]), sub_float(w19[1], w27[0]) }; const float w35[2] = { sub_float(w19[0], w27[1]), add_float(w19[1], w27[0]) }; const float w36[2] = { sub_float(w21[0], mul_float(kWeight2, sub_float(w29[0], w29[1]))), sub_float(w21[1], mul_float(kWeight2, add_float(w29[1], w29[0]))) }; const float w37[2] = { add_float(w21[0], mul_float(kWeight2, sub_float(w29[0], w29[1]))), add_float(w21[1], mul_float(kWeight2, add_float(w29[1], w29[0]))) }; const float w38[2] = { add_float(i1, i7), sub_float(i15, i9) }; const float w39[2] = { sub_float(i1, i7), sub_float(sub_float(kWeight0, i9), i15) }; const float w40[2] = { add_float(i5, i3), sub_float(i11, i13) }; const float w41[2] = { sub_float(i5, i3), sub_float(sub_float(kWeight0, i13), i11) }; const float w42[2] = { add_float(w38[0], w40[0]), add_float(w38[1], w40[1]) }; const float w43[2] = { sub_float(w38[0], w40[0]), sub_float(w38[1], w40[1]) }; const float w44[2] = { add_float(w39[0], w41[1]), sub_float(w39[1], w41[0]) }; const float w45[2] = { sub_float(w39[0], w41[1]), add_float(w39[1], w41[0]) }; const float w46[2] = { add_float(i3, i5), sub_float(i13, i11) }; const float w47[2] = { sub_float(i3, i5), sub_float(sub_float(kWeight0, i11), i13) }; const float w48[2] = { add_float(i7, i1), sub_float(i9, i15) }; const float w49[2] = { sub_float(i7, i1), sub_float(sub_float(kWeight0, i15), i9) }; const float w50[2] = { add_float(w46[0], w48[0]), add_float(w46[1], w48[1]) }; const float w51[2] = { sub_float(w46[0], w48[0]), sub_float(w46[1], w48[1]) }; const float w52[2] = { add_float(w47[0], w49[1]), sub_float(w47[1], w49[0]) }; const float w53[2] = { sub_float(w47[0], w49[1]), add_float(w47[1], w49[0]) }; const float w54[2] = { add_float(w42[0], w50[0]), add_float(w42[1], w50[1]) }; const float w55[2] = { sub_float(w42[0], w50[0]), sub_float(w42[1], w50[1]) }; const float w56[2] = { add_float(w44[0], mul_float(kWeight2, add_float(w52[0], w52[1]))), add_float(w44[1], mul_float(kWeight2, sub_float(w52[1], w52[0]))) }; const float w57[2] = { add_float(w44[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w52[0])), mul_float(kWeight2, w52[1]))), add_float(w44[1], mul_float(kWeight2, sub_float(w52[0], w52[1]))) }; const float w58[2] = { add_float(w43[0], w51[1]), sub_float(w43[1], w51[0]) }; const float w59[2] = { sub_float(w43[0], w51[1]), add_float(w43[1], w51[0]) }; const float w60[2] = { sub_float(w45[0], mul_float(kWeight2, sub_float(w53[0], w53[1]))), sub_float(w45[1], mul_float(kWeight2, add_float(w53[1], w53[0]))) }; const float w61[2] = { add_float(w45[0], mul_float(kWeight2, sub_float(w53[0], w53[1]))), add_float(w45[1], mul_float(kWeight2, add_float(w53[1], w53[0]))) }; store_float(output + 0 * stride, add_float(w30[0], w54[0])); store_float(output + 1 * stride, add_float(w32[0], add_float(mul_float(kWeight3, w56[0]), mul_float(kWeight4, w56[1])))); store_float(output + 2 * stride, add_float(w34[0], mul_float(kWeight2, add_float(w58[0], w58[1])))); store_float(output + 3 * stride, add_float(w36[0], add_float(mul_float(kWeight4, w60[0]), mul_float(kWeight3, w60[1])))); store_float(output + 4 * stride, add_float(w31[0], w55[1])); store_float(output + 5 * stride, sub_float(w33[0], sub_float(mul_float(kWeight4, w57[0]), mul_float(kWeight3, w57[1])))); store_float(output + 6 * stride, sub_float(w35[0], mul_float(kWeight2, sub_float(w59[0], w59[1])))); store_float(output + 7 * stride, sub_float(w37[0], sub_float(mul_float(kWeight3, w61[0]), mul_float(kWeight4, w61[1])))); store_float(output + 8 * stride, sub_float(w30[0], w54[0])); store_float(output + 9 * stride, add_float(w32[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w56[0])), mul_float(kWeight4, w56[1])))); store_float(output + 10 * stride, add_float(w34[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w58[0])), mul_float(kWeight2, w58[1])))); store_float(output + 11 * stride, add_float(w36[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w60[0])), mul_float(kWeight3, w60[1])))); store_float(output + 12 * stride, sub_float(w31[0], w55[1])); store_float(output + 13 * stride, add_float(w33[0], sub_float(mul_float(kWeight4, w57[0]), mul_float(kWeight3, w57[1])))); store_float(output + 14 * stride, add_float(w35[0], mul_float(kWeight2, sub_float(w59[0], w59[1])))); store_float(output + 15 * stride, add_float(w37[0], sub_float(mul_float(kWeight3, w61[0]), mul_float(kWeight4, w61[1])))); }; | |
void aom_ifft1d_32_float(const float *input, float *output, int stride) { const float kWeight0 = (float)(0.0f); const float kWeight2 = (float)(0.707107f); const float kWeight3 = (float)(0.92388f); const float kWeight4 = (float)(0.382683f); const float kWeight5 = (float)(0.980785f); const float kWeight6 = (float)(0.19509f); const float kWeight7 = (float)(0.83147f); const float kWeight8 = (float)(0.55557f); const float i0 = *(input + 0 * stride); const float i1 = *(input + 1 * stride); const float i2 = *(input + 2 * stride); const float i3 = *(input + 3 * stride); const float i4 = *(input + 4 * stride); const float i5 = *(input + 5 * stride); const float i6 = *(input + 6 * stride); const float i7 = *(input + 7 * stride); const float i8 = *(input + 8 * stride); const float i9 = *(input + 9 * stride); const float i10 = *(input + 10 * stride); const float i11 = *(input + 11 * stride); const float i12 = *(input + 12 * stride); const float i13 = *(input + 13 * stride); const float i14 = *(input + 14 * stride); const float i15 = *(input + 15 * stride); const float i16 = *(input + 16 * stride); const float i17 = *(input + 17 * stride); const float i18 = *(input + 18 * stride); const float i19 = *(input + 19 * stride); const float i20 = *(input + 20 * stride); const float i21 = *(input + 21 * stride); const float i22 = *(input + 22 * stride); const float i23 = *(input + 23 * stride); const float i24 = *(input + 24 * stride); const float i25 = *(input + 25 * stride); const float i26 = *(input + 26 * stride); const float i27 = *(input + 27 * stride); const float i28 = *(input + 28 * stride); const float i29 = *(input + 29 * stride); const float i30 = *(input + 30 * stride); const float i31 = *(input + 31 * stride); const float w30 = add_float(i0, i16); const float w31 = sub_float(i0, i16); const float w32[2] = { add_float(i8, i8), sub_float(i24, i24) }; const float w33[2] = { sub_float(i8, i8), sub_float(sub_float(kWeight0, i24), i24) }; const float w34[2] = { add_float(w30, w32[0]), w32[1] }; const float w35[2] = { sub_float(w30, w32[0]), sub_float(kWeight0, w32[1]) }; const float w36[2] = { add_float(w31, w33[1]), sub_float(kWeight0, w33[0]) }; const float w37[2] = { sub_float(w31, w33[1]), w33[0] }; const float w38[2] = { add_float(i4, i12), sub_float(i28, i20) }; const float w39[2] = { sub_float(i4, i12), sub_float(sub_float(kWeight0, i20), i28) }; const float w40[2] = { add_float(i12, i4), sub_float(i20, i28) }; const float w41[2] = { sub_float(i12, i4), sub_float(sub_float(kWeight0, i28), i20) }; const float w42[2] = { add_float(w38[0], w40[0]), add_float(w38[1], w40[1]) }; const float w43[2] = { sub_float(w38[0], w40[0]), sub_float(w38[1], w40[1]) }; const float w44[2] = { add_float(w39[0], w41[1]), sub_float(w39[1], w41[0]) }; const float w45[2] = { sub_float(w39[0], w41[1]), add_float(w39[1], w41[0]) }; const float w46[2] = { add_float(w34[0], w42[0]), add_float(w34[1], w42[1]) }; const float w47[2] = { sub_float(w34[0], w42[0]), sub_float(w34[1], w42[1]) }; const float w48[2] = { add_float(w36[0], mul_float(kWeight2, add_float(w44[0], w44[1]))), add_float(w36[1], mul_float(kWeight2, sub_float(w44[1], w44[0]))) }; const float w49[2] = { add_float(w36[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w44[0])), mul_float(kWeight2, w44[1]))), add_float(w36[1], mul_float(kWeight2, sub_float(w44[0], w44[1]))) }; const float w50[2] = { add_float(w35[0], w43[1]), sub_float(w35[1], w43[0]) }; const float w51[2] = { sub_float(w35[0], w43[1]), add_float(w35[1], w43[0]) }; const float w52[2] = { sub_float(w37[0], mul_float(kWeight2, sub_float(w45[0], w45[1]))), sub_float(w37[1], mul_float(kWeight2, add_float(w45[1], w45[0]))) }; const float w53[2] = { add_float(w37[0], mul_float(kWeight2, sub_float(w45[0], w45[1]))), add_float(w37[1], mul_float(kWeight2, add_float(w45[1], w45[0]))) }; const float w54[2] = { add_float(i2, i14), sub_float(i30, i18) }; const float w55[2] = { sub_float(i2, i14), sub_float(sub_float(kWeight0, i18), i30) }; const float w56[2] = { add_float(i10, i6), sub_float(i22, i26) }; const float w57[2] = { sub_float(i10, i6), sub_float(sub_float(kWeight0, i26), i22) }; const float w58[2] = { add_float(w54[0], w56[0]), add_float(w54[1], w56[1]) }; const float w59[2] = { sub_float(w54[0], w56[0]), sub_float(w54[1], w56[1]) }; const float w60[2] = { add_float(w55[0], w57[1]), sub_float(w55[1], w57[0]) }; const float w61[2] = { sub_float(w55[0], w57[1]), add_float(w55[1], w57[0]) }; const float w62[2] = { add_float(i6, i10), sub_float(i26, i22) }; const float w63[2] = { sub_float(i6, i10), sub_float(sub_float(kWeight0, i22), i26) }; const float w64[2] = { add_float(i14, i2), sub_float(i18, i30) }; const float w65[2] = { sub_float(i14, i2), sub_float(sub_float(kWeight0, i30), i18) }; const float w66[2] = { add_float(w62[0], w64[0]), add_float(w62[1], w64[1]) }; const float w67[2] = { sub_float(w62[0], w64[0]), sub_float(w62[1], w64[1]) }; const float w68[2] = { add_float(w63[0], w65[1]), sub_float(w63[1], w65[0]) }; const float w69[2] = { sub_float(w63[0], w65[1]), add_float(w63[1], w65[0]) }; const float w70[2] = { add_float(w58[0], w66[0]), add_float(w58[1], w66[1]) }; const float w71[2] = { sub_float(w58[0], w66[0]), sub_float(w58[1], w66[1]) }; const float w72[2] = { add_float(w60[0], mul_float(kWeight2, add_float(w68[0], w68[1]))), add_float(w60[1], mul_float(kWeight2, sub_float(w68[1], w68[0]))) }; const float w73[2] = { add_float(w60[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w68[0])), mul_float(kWeight2, w68[1]))), add_float(w60[1], mul_float(kWeight2, sub_float(w68[0], w68[1]))) }; const float w74[2] = { add_float(w59[0], w67[1]), sub_float(w59[1], w67[0]) }; const float w75[2] = { sub_float(w59[0], w67[1]), add_float(w59[1], w67[0]) }; const float w76[2] = { sub_float(w61[0], mul_float(kWeight2, sub_float(w69[0], w69[1]))), sub_float(w61[1], mul_float(kWeight2, add_float(w69[1], w69[0]))) }; const float w77[2] = { add_float(w61[0], mul_float(kWeight2, sub_float(w69[0], w69[1]))), add_float(w61[1], mul_float(kWeight2, add_float(w69[1], w69[0]))) }; const float w78[2] = { add_float(w46[0], w70[0]), add_float(w46[1], w70[1]) }; const float w79[2] = { sub_float(w46[0], w70[0]), sub_float(w46[1], w70[1]) }; const float w80[2] = { add_float(w48[0], add_float(mul_float(kWeight3, w72[0]), mul_float(kWeight4, w72[1]))), add_float(w48[1], sub_float(mul_float(kWeight3, w72[1]), mul_float(kWeight4, w72[0]))) }; const float w81[2] = { add_float(w48[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w72[0])), mul_float(kWeight4, w72[1]))), add_float(w48[1], sub_float(mul_float(kWeight4, w72[0]), mul_float(kWeight3, w72[1]))) }; const float w82[2] = { add_float(w50[0], mul_float(kWeight2, add_float(w74[0], w74[1]))), add_float(w50[1], mul_float(kWeight2, sub_float(w74[1], w74[0]))) }; const float w83[2] = { add_float(w50[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w74[0])), mul_float(kWeight2, w74[1]))), add_float(w50[1], mul_float(kWeight2, sub_float(w74[0], w74[1]))) }; const float w84[2] = { add_float(w52[0], add_float(mul_float(kWeight4, w76[0]), mul_float(kWeight3, w76[1]))), add_float(w52[1], sub_float(mul_float(kWeight4, w76[1]), mul_float(kWeight3, w76[0]))) }; const float w85[2] = { add_float(w52[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w76[0])), mul_float(kWeight3, w76[1]))), add_float(w52[1], sub_float(mul_float(kWeight3, w76[0]), mul_float(kWeight4, w76[1]))) }; const float w86[2] = { add_float(w47[0], w71[1]), sub_float(w47[1], w71[0]) }; const float w87[2] = { sub_float(w47[0], w71[1]), add_float(w47[1], w71[0]) }; const float w88[2] = { sub_float(w49[0], sub_float(mul_float(kWeight4, w73[0]), mul_float(kWeight3, w73[1]))), add_float(w49[1], sub_float(sub_float(kWeight0, mul_float(kWeight4, w73[1])), mul_float(kWeight3, w73[0]))) }; const float w89[2] = { add_float(w49[0], sub_float(mul_float(kWeight4, w73[0]), mul_float(kWeight3, w73[1]))), add_float(w49[1], add_float(mul_float(kWeight4, w73[1]), mul_float(kWeight3, w73[0]))) }; const float w90[2] = { sub_float(w51[0], mul_float(kWeight2, sub_float(w75[0], w75[1]))), sub_float(w51[1], mul_float(kWeight2, add_float(w75[1], w75[0]))) }; const float w91[2] = { add_float(w51[0], mul_float(kWeight2, sub_float(w75[0], w75[1]))), add_float(w51[1], mul_float(kWeight2, add_float(w75[1], w75[0]))) }; const float w92[2] = { sub_float(w53[0], sub_float(mul_float(kWeight3, w77[0]), mul_float(kWeight4, w77[1]))), add_float(w53[1], sub_float(sub_float(kWeight0, mul_float(kWeight3, w77[1])), mul_float(kWeight4, w77[0]))) }; const float w93[2] = { add_float(w53[0], sub_float(mul_float(kWeight3, w77[0]), mul_float(kWeight4, w77[1]))), add_float(w53[1], add_float(mul_float(kWeight3, w77[1]), mul_float(kWeight4, w77[0]))) }; const float w94[2] = { add_float(i1, i15), sub_float(i31, i17) }; const float w95[2] = { sub_float(i1, i15), sub_float(sub_float(kWeight0, i17), i31) }; const float w96[2] = { add_float(i9, i7), sub_float(i23, i25) }; const float w97[2] = { sub_float(i9, i7), sub_float(sub_float(kWeight0, i25), i23) }; const float w98[2] = { add_float(w94[0], w96[0]), add_float(w94[1], w96[1]) }; const float w99[2] = { sub_float(w94[0], w96[0]), sub_float(w94[1], w96[1]) }; const float w100[2] = { add_float(w95[0], w97[1]), sub_float(w95[1], w97[0]) }; const float w101[2] = { sub_float(w95[0], w97[1]), add_float(w95[1], w97[0]) }; const float w102[2] = { add_float(i5, i11), sub_float(i27, i21) }; const float w103[2] = { sub_float(i5, i11), sub_float(sub_float(kWeight0, i21), i27) }; const float w104[2] = { add_float(i13, i3), sub_float(i19, i29) }; const float w105[2] = { sub_float(i13, i3), sub_float(sub_float(kWeight0, i29), i19) }; const float w106[2] = { add_float(w102[0], w104[0]), add_float(w102[1], w104[1]) }; const float w107[2] = { sub_float(w102[0], w104[0]), sub_float(w102[1], w104[1]) }; const float w108[2] = { add_float(w103[0], w105[1]), sub_float(w103[1], w105[0]) }; const float w109[2] = { sub_float(w103[0], w105[1]), add_float(w103[1], w105[0]) }; const float w110[2] = { add_float(w98[0], w106[0]), add_float(w98[1], w106[1]) }; const float w111[2] = { sub_float(w98[0], w106[0]), sub_float(w98[1], w106[1]) }; const float w112[2] = { add_float(w100[0], mul_float(kWeight2, add_float(w108[0], w108[1]))), add_float(w100[1], mul_float(kWeight2, sub_float(w108[1], w108[0]))) }; const float w113[2] = { add_float(w100[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w108[0])), mul_float(kWeight2, w108[1]))), add_float(w100[1], mul_float(kWeight2, sub_float(w108[0], w108[1]))) }; const float w114[2] = { add_float(w99[0], w107[1]), sub_float(w99[1], w107[0]) }; const float w115[2] = { sub_float(w99[0], w107[1]), add_float(w99[1], w107[0]) }; const float w116[2] = { sub_float(w101[0], mul_float(kWeight2, sub_float(w109[0], w109[1]))), sub_float(w101[1], mul_float(kWeight2, add_float(w109[1], w109[0]))) }; const float w117[2] = { add_float(w101[0], mul_float(kWeight2, sub_float(w109[0], w109[1]))), add_float(w101[1], mul_float(kWeight2, add_float(w109[1], w109[0]))) }; const float w118[2] = { add_float(i3, i13), sub_float(i29, i19) }; const float w119[2] = { sub_float(i3, i13), sub_float(sub_float(kWeight0, i19), i29) }; const float w120[2] = { add_float(i11, i5), sub_float(i21, i27) }; const float w121[2] = { sub_float(i11, i5), sub_float(sub_float(kWeight0, i27), i21) }; const float w122[2] = { add_float(w118[0], w120[0]), add_float(w118[1], w120[1]) }; const float w123[2] = { sub_float(w118[0], w120[0]), sub_float(w118[1], w120[1]) }; const float w124[2] = { add_float(w119[0], w121[1]), sub_float(w119[1], w121[0]) }; const float w125[2] = { sub_float(w119[0], w121[1]), add_float(w119[1], w121[0]) }; const float w126[2] = { add_float(i7, i9), sub_float(i25, i23) }; const float w127[2] = { sub_float(i7, i9), sub_float(sub_float(kWeight0, i23), i25) }; const float w128[2] = { add_float(i15, i1), sub_float(i17, i31) }; const float w129[2] = { sub_float(i15, i1), sub_float(sub_float(kWeight0, i31), i17) }; const float w130[2] = { add_float(w126[0], w128[0]), add_float(w126[1], w128[1]) }; const float w131[2] = { sub_float(w126[0], w128[0]), sub_float(w126[1], w128[1]) }; const float w132[2] = { add_float(w127[0], w129[1]), sub_float(w127[1], w129[0]) }; const float w133[2] = { sub_float(w127[0], w129[1]), add_float(w127[1], w129[0]) }; const float w134[2] = { add_float(w122[0], w130[0]), add_float(w122[1], w130[1]) }; const float w135[2] = { sub_float(w122[0], w130[0]), sub_float(w122[1], w130[1]) }; const float w136[2] = { add_float(w124[0], mul_float(kWeight2, add_float(w132[0], w132[1]))), add_float(w124[1], mul_float(kWeight2, sub_float(w132[1], w132[0]))) }; const float w137[2] = { add_float(w124[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w132[0])), mul_float(kWeight2, w132[1]))), add_float(w124[1], mul_float(kWeight2, sub_float(w132[0], w132[1]))) }; const float w138[2] = { add_float(w123[0], w131[1]), sub_float(w123[1], w131[0]) }; const float w139[2] = { sub_float(w123[0], w131[1]), add_float(w123[1], w131[0]) }; const float w140[2] = { sub_float(w125[0], mul_float(kWeight2, sub_float(w133[0], w133[1]))), sub_float(w125[1], mul_float(kWeight2, add_float(w133[1], w133[0]))) }; const float w141[2] = { add_float(w125[0], mul_float(kWeight2, sub_float(w133[0], w133[1]))), add_float(w125[1], mul_float(kWeight2, add_float(w133[1], w133[0]))) }; const float w142[2] = { add_float(w110[0], w134[0]), add_float(w110[1], w134[1]) }; const float w143[2] = { sub_float(w110[0], w134[0]), sub_float(w110[1], w134[1]) }; const float w144[2] = { add_float(w112[0], add_float(mul_float(kWeight3, w136[0]), mul_float(kWeight4, w136[1]))), add_float(w112[1], sub_float(mul_float(kWeight3, w136[1]), mul_float(kWeight4, w136[0]))) }; const float w145[2] = { add_float(w112[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w136[0])), mul_float(kWeight4, w136[1]))), add_float(w112[1], sub_float(mul_float(kWeight4, w136[0]), mul_float(kWeight3, w136[1]))) }; const float w146[2] = { add_float(w114[0], mul_float(kWeight2, add_float(w138[0], w138[1]))), add_float(w114[1], mul_float(kWeight2, sub_float(w138[1], w138[0]))) }; const float w147[2] = { add_float(w114[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w138[0])), mul_float(kWeight2, w138[1]))), add_float(w114[1], mul_float(kWeight2, sub_float(w138[0], w138[1]))) }; const float w148[2] = { add_float(w116[0], add_float(mul_float(kWeight4, w140[0]), mul_float(kWeight3, w140[1]))), add_float(w116[1], sub_float(mul_float(kWeight4, w140[1]), mul_float(kWeight3, w140[0]))) }; const float w149[2] = { add_float(w116[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w140[0])), mul_float(kWeight3, w140[1]))), add_float(w116[1], sub_float(mul_float(kWeight3, w140[0]), mul_float(kWeight4, w140[1]))) }; const float w150[2] = { add_float(w111[0], w135[1]), sub_float(w111[1], w135[0]) }; const float w151[2] = { sub_float(w111[0], w135[1]), add_float(w111[1], w135[0]) }; const float w152[2] = { sub_float(w113[0], sub_float(mul_float(kWeight4, w137[0]), mul_float(kWeight3, w137[1]))), add_float(w113[1], sub_float(sub_float(kWeight0, mul_float(kWeight4, w137[1])), mul_float(kWeight3, w137[0]))) }; const float w153[2] = { add_float(w113[0], sub_float(mul_float(kWeight4, w137[0]), mul_float(kWeight3, w137[1]))), add_float(w113[1], add_float(mul_float(kWeight4, w137[1]), mul_float(kWeight3, w137[0]))) }; const float w154[2] = { sub_float(w115[0], mul_float(kWeight2, sub_float(w139[0], w139[1]))), sub_float(w115[1], mul_float(kWeight2, add_float(w139[1], w139[0]))) }; const float w155[2] = { add_float(w115[0], mul_float(kWeight2, sub_float(w139[0], w139[1]))), add_float(w115[1], mul_float(kWeight2, add_float(w139[1], w139[0]))) }; const float w156[2] = { sub_float(w117[0], sub_float(mul_float(kWeight3, w141[0]), mul_float(kWeight4, w141[1]))), add_float(w117[1], sub_float(sub_float(kWeight0, mul_float(kWeight3, w141[1])), mul_float(kWeight4, w141[0]))) }; const float w157[2] = { add_float(w117[0], sub_float(mul_float(kWeight3, w141[0]), mul_float(kWeight4, w141[1]))), add_float(w117[1], add_float(mul_float(kWeight3, w141[1]), mul_float(kWeight4, w141[0]))) }; store_float(output + 0 * stride, add_float(w78[0], w142[0])); store_float(output + 1 * stride, add_float(w80[0], add_float(mul_float(kWeight5, w144[0]), mul_float(kWeight6, w144[1])))); store_float(output + 2 * stride, add_float(w82[0], add_float(mul_float(kWeight3, w146[0]), mul_float(kWeight4, w146[1])))); store_float(output + 3 * stride, add_float(w84[0], add_float(mul_float(kWeight7, w148[0]), mul_float(kWeight8, w148[1])))); store_float(output + 4 * stride, add_float(w86[0], mul_float(kWeight2, add_float(w150[0], w150[1])))); store_float(output + 5 * stride, add_float(w88[0], add_float(mul_float(kWeight8, w152[0]), mul_float(kWeight7, w152[1])))); store_float(output + 6 * stride, add_float(w90[0], add_float(mul_float(kWeight4, w154[0]), mul_float(kWeight3, w154[1])))); store_float(output + 7 * stride, add_float(w92[0], add_float(mul_float(kWeight6, w156[0]), mul_float(kWeight5, w156[1])))); store_float(output + 8 * stride, add_float(w79[0], w143[1])); store_float(output + 9 * stride, sub_float(w81[0], sub_float(mul_float(kWeight6, w145[0]), mul_float(kWeight5, w145[1])))); store_float(output + 10 * stride, sub_float(w83[0], sub_float(mul_float(kWeight4, w147[0]), mul_float(kWeight3, w147[1])))); store_float(output + 11 * stride, sub_float(w85[0], sub_float(mul_float(kWeight8, w149[0]), mul_float(kWeight7, w149[1])))); store_float(output + 12 * stride, sub_float(w87[0], mul_float(kWeight2, sub_float(w151[0], w151[1])))); store_float(output + 13 * stride, sub_float(w89[0], sub_float(mul_float(kWeight7, w153[0]), mul_float(kWeight8, w153[1])))); store_float(output + 14 * stride, sub_float(w91[0], sub_float(mul_float(kWeight3, w155[0]), mul_float(kWeight4, w155[1])))); store_float(output + 15 * stride, sub_float(w93[0], sub_float(mul_float(kWeight5, w157[0]), mul_float(kWeight6, w157[1])))); store_float(output + 16 * stride, sub_float(w78[0], w142[0])); store_float(output + 17 * stride, add_float(w80[0], sub_float(sub_float(kWeight0, mul_float(kWeight5, w144[0])), mul_float(kWeight6, w144[1])))); store_float(output + 18 * stride, add_float(w82[0], sub_float(sub_float(kWeight0, mul_float(kWeight3, w146[0])), mul_float(kWeight4, w146[1])))); store_float(output + 19 * stride, add_float(w84[0], sub_float(sub_float(kWeight0, mul_float(kWeight7, w148[0])), mul_float(kWeight8, w148[1])))); store_float(output + 20 * stride, add_float(w86[0], sub_float(sub_float(kWeight0, mul_float(kWeight2, w150[0])), mul_float(kWeight2, w150[1])))); store_float(output + 21 * stride, add_float(w88[0], sub_float(sub_float(kWeight0, mul_float(kWeight8, w152[0])), mul_float(kWeight7, w152[1])))); store_float(output + 22 * stride, add_float(w90[0], sub_float(sub_float(kWeight0, mul_float(kWeight4, w154[0])), mul_float(kWeight3, w154[1])))); store_float(output + 23 * stride, add_float(w92[0], sub_float(sub_float(kWeight0, mul_float(kWeight6, w156[0])), mul_float(kWeight5, w156[1])))); store_float(output + 24 * stride, sub_float(w79[0], w143[1])); store_float(output + 25 * stride, add_float(w81[0], sub_float(mul_float(kWeight6, w145[0]), mul_float(kWeight5, w145[1])))); store_float(output + 26 * stride, add_float(w83[0], sub_float(mul_float(kWeight4, w147[0]), mul_float(kWeight3, w147[1])))); store_float(output + 27 * stride, add_float(w85[0], sub_float(mul_float(kWeight8, w149[0]), mul_float(kWeight7, w149[1])))); store_float(output + 28 * stride, add_float(w87[0], mul_float(kWeight2, sub_float(w151[0], w151[1])))); store_float(output + 29 * stride, add_float(w89[0], sub_float(mul_float(kWeight7, w153[0]), mul_float(kWeight8, w153[1])))); store_float(output + 30 * stride, add_float(w91[0], sub_float(mul_float(kWeight3, w155[0]), mul_float(kWeight4, w155[1])))); store_float(output + 31 * stride, add_float(w93[0], sub_float(mul_float(kWeight5, w157[0]), mul_float(kWeight6, w157[1])))); }; | |
void aom_ifft2x2_float_c(const float *input, float *temp, float *output) { | |
aom_ifft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, aom_fft1d_2_float, | |
aom_ifft1d_2_float, simple_transpose, 1); | |
} | |
void aom_ifft4x4_float_c(const float *input, float *temp, float *output) { | |
aom_ifft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, aom_fft1d_4_float, | |
aom_ifft1d_4_float, simple_transpose, 1); | |
} | |
void aom_ifft8x8_float_c(const float *input, float *temp, float *output) { | |
aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_float, | |
aom_ifft1d_8_float, simple_transpose, 1); | |
} | |
void aom_ifft16x16_float_c(const float *input, float *temp, float *output) { | |
aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float, | |
aom_fft1d_16_float, aom_ifft1d_16_float, simple_transpose, 1); | |
} | |
void aom_ifft32x32_float_c(const float *input, float *temp, float *output) { | |
aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float, | |
aom_fft1d_32_float, aom_ifft1d_32_float, simple_transpose, 1); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment