Skip to content

Instantly share code, notes, and snippets.

@pkorpine
Last active October 8, 2018 08:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pkorpine/0b3a768e018827039f862cd6228e5e7e to your computer and use it in GitHub Desktop.
Save pkorpine/0b3a768e018827039f862cd6228e5e7e to your computer and use it in GitHub Desktop.
memcpy 16-bit values to 32-bit values (zero extend using SSE4.1)
#include <smmintrin.h>
void memcpy_16to32(uint32_t *pdst_, const uint16_t *psrc_, size_t n)
{
size_t items = n / sizeof(uint16_t);
__m128i *psrc = (__m128i *) psrc_;
__m128i *pdst = (__m128i *) pdst_;
while (items >= 8) {
// Read 8x 16-bit
__m128i src = *psrc++;
// Zero extend 4 first elements from 16-bit to 32-bit
*pdst++ = _mm_cvtepu16_epi32(src);
// Shift right 8 bytes
src = _mm_srli_si128(src, 8);
// Zero extend 4 last elements from 16-bit to 32-bit
*pdst++ = _mm_cvtepu16_epi32(src);
items -= 8;
}
// Copy last 7 or less items using for-loop
uint16_t *psrc_u16 = (uint16_t*) psrc;
uint32_t *pdst_u32 = (uint32_t*) pdst;
while (items-- > 0) {
*pdst_u32++ = *psrc_u16++;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment