Skip to content

Instantly share code, notes, and snippets.

@bave
Last active October 22, 2020 11:45
Show Gist options
  • Save bave/d65382f6f0641489f3d603265edc7e2b to your computer and use it in GitHub Desktop.
Save bave/d65382f6f0641489f3d603265edc7e2b to your computer and use it in GitHub Desktop.
tmp
// +avx, +popcnt, +bmi2
unsafe fn mm256_compressstroeu_ps(array: *mut f32, mask: u8, src: __m256)
{
let convert = _pext_u64(0x0706050403020100, _pdep_u64(mask as u64, 0x0101010101010101)*0xFF);
let permute = _mm256_cvtepu8_epi32(_mm_cvtsi64_si128(convert as i64));
let compress = _mm256_permutevar8x32_ps(src, permute);
let imm8 = _popcnt64(mask as i64);
let ret = match imm8 {
0 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x00) },
1 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x01) },
2 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x03) },
3 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x07) },
4 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x0f) },
5 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x1f) },
6 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x3f) },
7 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x7f) },
8 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0xff) },
_ => { _mm256_loadu_ps(array) },
};
_mm256_storeu_ps(array, ret);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment