Created
July 23, 2018 00:53
-
-
Save astojanov/5dfcfdf01632f0bd48eba998c3017bf4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <immintrin.h> | |
static inline __m256i avx_mm256_div_epi16_division(const __m256i &a_epi16, const __m256i &b_epi16) { | |
// | |
// Convert to two 32-bit integers | |
// | |
const __m256i a_hi_epi32 = _mm256_srai_epi32(a_epi16, 16); | |
const __m256i a_lo_epi32_shift = _mm256_slli_epi32(a_epi16, 16); | |
const __m256i a_lo_epi32 = _mm256_srai_epi32(a_lo_epi32_shift, 16); | |
const __m256i b_hi_epi32 = _mm256_srai_epi32(b_epi16, 16); | |
const __m256i b_lo_epi32_shift = _mm256_slli_epi32(b_epi16, 16); | |
const __m256i b_lo_epi32 = _mm256_srai_epi32(b_lo_epi32_shift, 16); | |
// | |
// Convert to 32-bit floats | |
// | |
const __m256 a_hi = _mm256_cvtepi32_ps(a_hi_epi32); | |
const __m256 a_lo = _mm256_cvtepi32_ps(a_lo_epi32); | |
const __m256 b_hi = _mm256_cvtepi32_ps(b_hi_epi32); | |
const __m256 b_lo = _mm256_cvtepi32_ps(b_lo_epi32); | |
const __m256 hi = _mm256_div_ps(a_hi, b_hi); | |
const __m256 lo = _mm256_div_ps(a_lo, b_lo); | |
// | |
// Convert back to integers | |
// | |
const __m256i hi_epi32 = _mm256_cvttps_epi32(hi); | |
const __m256i lo_epi32 = _mm256_cvttps_epi32(lo); | |
// | |
// Blend the low and the high-parts | |
// | |
const __m256i hi_epi32_shift = _mm256_slli_epi32(hi_epi32, 16); | |
return _mm256_blend_epi16(lo_epi32, hi_epi32_shift, 0xAA); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment