Last active
January 18, 2022 14:31
-
-
Save inv2004/099321a52b54a9410b54446b3ca9a9c2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
proc counter*[T: byte](a: openArray[T]): Counter[T] = | |
let mask0 = set1_epi8(0) | |
let mask1 = set1_epi8(1) | |
let mask2 = set1_epi8(2) | |
let mask3 = set1_epi8(3) | |
var i = 0 | |
while i < a.len-avx.width: | |
let ymm = loadu_byte(unsafeAddr a[i]) | |
if 0 < popcnt_u32 movemask_epi8 cmpgt_epi8(ymm, mask3): | |
unroll for off in 0..<(avx.width): | |
result.flat256[extract_epi8(ymm, off)].inc | |
else: | |
result.flat256[0] += popcnt_u32 movemask_epi8 cmpeq_epi8(ymm, mask0) | |
result.flat256[1] += popcnt_u32 movemask_epi8 cmpeq_epi8(ymm, mask1) | |
result.flat256[2] += popcnt_u32 movemask_epi8 cmpeq_epi8(ymm, mask2) | |
result.flat256[3] += popcnt_u32 movemask_epi8 cmpeq_epi8(ymm, mask3) | |
i += avx.width | |
for i in i..<a.len: | |
result.flat256[a[i]].inc |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment