Skip to content

Instantly share code, notes, and snippets.

@nikreiman
Created December 12, 2013 09:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nikreiman/7925222 to your computer and use it in GitHub Desktop.
Save nikreiman/7925222 to your computer and use it in GitHub Desktop.
Converting 16-bit shorts to floating point samples with SSE
#include <stdlib.h>
#include <malloc.h>
#include <xmmintrin.h>
#if 1
#define ITERATIONS 1
#define BUFSIZE 32
#else
#define ITERATIONS 1000000
#define BUFSIZE 512
#endif
int main() {
// Visual studio: __declspec(align(16))
short shortSamples[BUFSIZE] __attribute((aligned(16)));
float floatSamples[BUFSIZE + 16] __attribute((aligned(16)));
//__attribute((aligned(16))) short* shortSamples;
//__attribute((aligned(16))) float *floatSamples;
// Visual studio: _aligned_malloc
//posix_memalign((void**)&shortSamples, 16, sizeof(short) * BUFSIZE);
//posix_memalign((void**)&floatSamples, 16, sizeof(float) * BUFSIZE);
// Short -> float
#if 0
for(int i = 0; i < BUFSIZE; ++i) {
shortSamples[i] = i;
}
#if 0
for(int i = 0; i < BUFSIZE; ++i) {
shortSamples[i] = i;
floatSamples[i] = 0.0f;
}
for(int j = 0; j < ITERATIONS; j++) {
for(int i = 0; i < BUFSIZE; i++) {
floatSamples[i] = (float)shortSamples[i] / 32767.0f;
}
}
#else
for(int i = 0; i < BUFSIZE; ++i) {
shortSamples[i] = i;
floatSamples[i] = 0.0f;
}
__m128 factor = { 1.0f/32767, 1.0f/32767, 1.0f/32767, 1.0f/32767 };
__m64 zero1 = { 0,0 };
__m128i zero2 = { 0,0 };
__m128i intValue;
__m128 floatValue;
for(int j = 0; j < ITERATIONS; j++) {
__m64 *ps = (__m64*)shortSamples;
__m128 *pd = (__m128*)floatSamples;
for (int i = 0; i < BUFSIZE; i+=4) {
intValue = _mm_unpacklo_epi16(_mm_set_epi64(zero1, *ps), zero2);
intValue = _mm_srai_epi32(_mm_slli_epi32(intValue, 16), 16);
floatValue = _mm_cvtepi32_ps(intValue);
*pd = _mm_mul_ps(floatValue, factor);
pd++;
ps++;
}
}
#endif
#endif
// Float -> short
#if 1
for(int i = 0; i < BUFSIZE; i++) {
shortSamples[i] = 1.0f / (float)i;
}
#if 0
for(int j = 0; j < ITERATIONS; j++) {
for(int i = 0; i < BUFSIZE; i++) {
shortSamples[i] = (short)(floatSamples[i] * 32767.0f);
printf("%f, ", floatSamples[i]);
}
}
#else
__m128 factor = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
__m64 zero1 = { 0, 0 };
__m128i zero2 = { 0, 0 };
__m128i intValue;
__m128 floatValue;
for(int j = 0; j < ITERATIONS; j++) {
__m64 *ps = (__m64*)shortSamples;
__m128 *pd = (__m128*)floatSamples;
for (int i = 0; i < BUFSIZE; i+=4) {
floatValue = _mm_mul_ps(*floatSamples, value);
intValue = _mm_unpacklo_epi16(_mm_set_epi64(zero1, *ps), zero2);
intValue = _mm_srai_epi32(_mm_slli_epi32(intValue, 16), 16);
floatValue = _mm_cvtepi32_ps(intValue);
}
}
#endif
#endif
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment