Created
July 23, 2018 00:28
-
-
Save astojanov/ab610f41aa77d5d0021930372e50d734 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <immintrin.h> | |
// | |
// Performs division on 16-bit elements, using floating point division | |
// | |
static inline __m128i sse_mm_div_epi16_division(const __m128i &a_epi16, const __m128i &b_epi16) { | |
const __m128i lo_mask = _mm_set1_epi32(0xFFFF); | |
// | |
// Convert to two 32-bit integers | |
// | |
const __m128i a_hi_epi32 = _mm_srai_epi32(a_epi16, 16); | |
const __m128i a_lo_epi32_shift = _mm_slli_epi32(a_epi16, 16); | |
const __m128i a_lo_epi32 = _mm_srai_epi32(a_lo_epi32_shift, 16); | |
const __m128i b_hi_epi32 = _mm_srai_epi32(b_epi16, 16); | |
const __m128i b_lo_epi32_shift = _mm_slli_epi32(b_epi16, 16); | |
const __m128i b_lo_epi32 = _mm_srai_epi32(b_lo_epi32_shift, 16); | |
// | |
// Convert to 32-bit floats | |
// | |
const __m128 a_hi = _mm_cvtepi32_ps(a_hi_epi32); | |
const __m128 a_lo = _mm_cvtepi32_ps(a_lo_epi32); | |
const __m128 b_hi = _mm_cvtepi32_ps(b_hi_epi32); | |
const __m128 b_lo = _mm_cvtepi32_ps(b_lo_epi32); | |
// | |
// Use division instead | |
// | |
const __m128 hi = _mm_div_ps(a_hi, b_hi); | |
const __m128 lo = _mm_div_ps(a_lo, b_lo); | |
// | |
// Convert back to integers | |
// | |
const __m128i hi_epi32 = _mm_cvttps_epi32(hi); | |
const __m128i lo_epi32 = _mm_cvttps_epi32(lo); | |
// | |
// Zero-out the unnecessary parts | |
// | |
const __m128i hi_epi32_shift = _mm_slli_epi32(hi_epi32, 16); | |
const __m128i lo_epi32_mask = _mm_and_si128(lo_epi32, lo_mask); | |
// | |
// Mix together the bits, and return | |
// | |
return _mm_or_si128(hi_epi32_shift, lo_epi32_mask); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment