Created
July 1, 2020 21:36
-
-
Save Const-me/9ae1fda194a6f653a7a6a52fb6190d05 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 384.0 in float: https://www.h-schmidt.net/FloatConverter/IEEE754.html | |
// Which was the bias value passed to a52_frame in C# code | |
constexpr int zeroLevel = 0x43c00000; | |
#ifdef _MSC_VER | |
using int16x8_t = __m128i; | |
using int32x4_t = __m128i; | |
__forceinline int32x4_t load4( const float* src ) | |
{ | |
return _mm_loadu_si128( ( const __m128i * )src ); | |
} | |
__forceinline int32x4_t zeroVector() | |
{ | |
return _mm_set1_epi32( zeroLevel ); | |
} | |
__forceinline void store8( int16_t* dest, int16x8_t v ) | |
{ | |
_mm_storeu_si128( ( __m128i* )dest, v ); | |
} | |
#else | |
__forceinline int32x4_t load4( const float* src ) | |
{ | |
return vld1q_s32( (const int*)(const void*)src ); | |
} | |
__forceinline int32x4_t zeroVector() | |
{ | |
return vdupq_n_s32( zeroLevel ); | |
} | |
__forceinline void store8( int16_t* dest, int16x8_t v ) | |
{ | |
vst1q_s16( dest, v ); | |
} | |
#endif | |
// Read 8 floats, convert them into signed integers with signed saturation | |
__forceinline int16x8_t convert8( const float* src, const int32x4_t zero ) | |
{ | |
int32x4_t low = load4( src ); | |
int32x4_t high = load4( src + 4 ); | |
#ifdef _MSC_VER | |
low = _mm_sub_epi32( low, zero ); | |
high = _mm_sub_epi32( high, zero ); | |
return _mm_packs_epi32( low, high ); | |
#else | |
low = vsubq_s32( low, zero ); | |
high = vsubq_s32( high, zero ); | |
// Signed saturating extract Narrow. | |
// This instruction reads each vector element from the source SIMD&FP register, saturates the value to half the original width, | |
// places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. | |
// The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values. | |
int16x4_t low16 = vqmovn_s32( low ); | |
int16x4_t high16 = vqmovn_s32( high ); | |
return vcombine_s16( low16, high16 ); | |
#endif | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment