Skip to content

Instantly share code, notes, and snippets.

@astojanov
Created July 23, 2018 00:28
Show Gist options
  • Save astojanov/ab610f41aa77d5d0021930372e50d734 to your computer and use it in GitHub Desktop.
Save astojanov/ab610f41aa77d5d0021930372e50d734 to your computer and use it in GitHub Desktop.
#include <immintrin.h>
//
// Performs division on 16-bit elements, using floating point division
//
static inline __m128i sse_mm_div_epi16_division(const __m128i &a_epi16, const __m128i &b_epi16) {
const __m128i lo_mask = _mm_set1_epi32(0xFFFF);
//
// Convert to two 32-bit integers
//
const __m128i a_hi_epi32 = _mm_srai_epi32(a_epi16, 16);
const __m128i a_lo_epi32_shift = _mm_slli_epi32(a_epi16, 16);
const __m128i a_lo_epi32 = _mm_srai_epi32(a_lo_epi32_shift, 16);
const __m128i b_hi_epi32 = _mm_srai_epi32(b_epi16, 16);
const __m128i b_lo_epi32_shift = _mm_slli_epi32(b_epi16, 16);
const __m128i b_lo_epi32 = _mm_srai_epi32(b_lo_epi32_shift, 16);
//
// Convert to 32-bit floats
//
const __m128 a_hi = _mm_cvtepi32_ps(a_hi_epi32);
const __m128 a_lo = _mm_cvtepi32_ps(a_lo_epi32);
const __m128 b_hi = _mm_cvtepi32_ps(b_hi_epi32);
const __m128 b_lo = _mm_cvtepi32_ps(b_lo_epi32);
//
// Use division instead
//
const __m128 hi = _mm_div_ps(a_hi, b_hi);
const __m128 lo = _mm_div_ps(a_lo, b_lo);
//
// Convert back to integers
//
const __m128i hi_epi32 = _mm_cvttps_epi32(hi);
const __m128i lo_epi32 = _mm_cvttps_epi32(lo);
//
// Zero-out the unnecessary parts
//
const __m128i hi_epi32_shift = _mm_slli_epi32(hi_epi32, 16);
const __m128i lo_epi32_mask = _mm_and_si128(lo_epi32, lo_mask);
//
// Mix together the bits, and return
//
return _mm_or_si128(hi_epi32_shift, lo_epi32_mask);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment