Skip to content

Instantly share code, notes, and snippets.

@astojanov
Created July 23, 2018 00:53
Show Gist options
  • Save astojanov/5dfcfdf01632f0bd48eba998c3017bf4 to your computer and use it in GitHub Desktop.
Save astojanov/5dfcfdf01632f0bd48eba998c3017bf4 to your computer and use it in GitHub Desktop.
#include <immintrin.h>
static inline __m256i avx_mm256_div_epi16_division(const __m256i &a_epi16, const __m256i &b_epi16) {
//
// Convert to two 32-bit integers
//
const __m256i a_hi_epi32 = _mm256_srai_epi32(a_epi16, 16);
const __m256i a_lo_epi32_shift = _mm256_slli_epi32(a_epi16, 16);
const __m256i a_lo_epi32 = _mm256_srai_epi32(a_lo_epi32_shift, 16);
const __m256i b_hi_epi32 = _mm256_srai_epi32(b_epi16, 16);
const __m256i b_lo_epi32_shift = _mm256_slli_epi32(b_epi16, 16);
const __m256i b_lo_epi32 = _mm256_srai_epi32(b_lo_epi32_shift, 16);
//
// Convert to 32-bit floats
//
const __m256 a_hi = _mm256_cvtepi32_ps(a_hi_epi32);
const __m256 a_lo = _mm256_cvtepi32_ps(a_lo_epi32);
const __m256 b_hi = _mm256_cvtepi32_ps(b_hi_epi32);
const __m256 b_lo = _mm256_cvtepi32_ps(b_lo_epi32);
const __m256 hi = _mm256_div_ps(a_hi, b_hi);
const __m256 lo = _mm256_div_ps(a_lo, b_lo);
//
// Convert back to integers
//
const __m256i hi_epi32 = _mm256_cvttps_epi32(hi);
const __m256i lo_epi32 = _mm256_cvttps_epi32(lo);
//
// Blend the low and the high-parts
//
const __m256i hi_epi32_shift = _mm256_slli_epi32(hi_epi32, 16);
return _mm256_blend_epi16(lo_epi32, hi_epi32_shift, 0xAA);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment