Skip to content

Instantly share code, notes, and snippets.

@Const-me
Created July 1, 2020 21:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Const-me/9ae1fda194a6f653a7a6a52fb6190d05 to your computer and use it in GitHub Desktop.
Save Const-me/9ae1fda194a6f653a7a6a52fb6190d05 to your computer and use it in GitHub Desktop.
// 384.0 in float: https://www.h-schmidt.net/FloatConverter/IEEE754.html
// Which was the bias value passed to a52_frame in C# code
constexpr int zeroLevel = 0x43c00000;
#ifdef _MSC_VER
using int16x8_t = __m128i;
using int32x4_t = __m128i;
__forceinline int32x4_t load4( const float* src )
{
return _mm_loadu_si128( ( const __m128i * )src );
}
__forceinline int32x4_t zeroVector()
{
return _mm_set1_epi32( zeroLevel );
}
__forceinline void store8( int16_t* dest, int16x8_t v )
{
_mm_storeu_si128( ( __m128i* )dest, v );
}
#else
__forceinline int32x4_t load4( const float* src )
{
return vld1q_s32( (const int*)(const void*)src );
}
__forceinline int32x4_t zeroVector()
{
return vdupq_n_s32( zeroLevel );
}
__forceinline void store8( int16_t* dest, int16x8_t v )
{
vst1q_s16( dest, v );
}
#endif
// Read 8 floats, convert them into signed integers with signed saturation
__forceinline int16x8_t convert8( const float* src, const int32x4_t zero )
{
int32x4_t low = load4( src );
int32x4_t high = load4( src + 4 );
#ifdef _MSC_VER
low = _mm_sub_epi32( low, zero );
high = _mm_sub_epi32( high, zero );
return _mm_packs_epi32( low, high );
#else
low = vsubq_s32( low, zero );
high = vsubq_s32( high, zero );
// Signed saturating extract Narrow.
// This instruction reads each vector element from the source SIMD&FP register, saturates the value to half the original width,
// places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.
// The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.
int16x4_t low16 = vqmovn_s32( low );
int16x4_t high16 = vqmovn_s32( high );
return vcombine_s16( low16, high16 );
#endif
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment