Skip to content

Instantly share code, notes, and snippets.

@aqrit
Last active September 9, 2023 21:39
Show Gist options
  • Save aqrit/ebcbd13a43ac4ee4ef05578074ad3631 to your computer and use it in GitHub Desktop.
Save aqrit/ebcbd13a43ac4ee4ef05578074ad3631 to your computer and use it in GitHub Desktop.
half baked pseudo code
if (*src < 0xC0) error; // stream starts with continuation byte
for(...){
__m256i v0 = _mm256_loadu_si256((__m256i const*)src);
__m256i v1 = _mm256_loadu_si256((__m256i const*)&src[1]); // peek 1 byte for validation of 2 byte sequence
src += 32;
// Classifications for utf-8 to latin1 (0x00..0xFF)
const __m256i xC2 = _mm256_set1_epi8(0xC2); // lead byte for 0b10xxxxxx codepoints
const __m256i xC3 = _mm256_set1_epi8(0xC3); // lead byte for 0b11xxxxxx codepoints
const __m256i xC0 = _mm256_set1_epi8(0xC0); // continuation byte: 0x80..0xBF
__m256i cmpeq_xC2 = _mm256_cmpeq_epi8(v0, xC2);
__m256i cmpeq_xC3 = _mm256_cmpeq_epi8(v0, xC3);
__m256i multibyte_lead = _mm256_or_si256(cmpeq_xC2, cmpeq_xC3);
__m256i multibyte_continuation0 = _mm256_cmpgt_epi8(xC0, v0);
__m256i multibyte_continuation1 = _mm256_cmpgt_epi8(xC0, v1);
__m256i bad_range = _mm256_xor_si256(_mm256_or_si256(multibyte_continuation0, multibyte_lead), v0);
__m256i bad_sequence = _mm256_xor_si256(multibyte_continuation1, multibyte_lead);
unsigned bad_mask = (unsigned)_mm256_movemask_epi8(_mm256_or_si256(bad_range, bad_sequence));
__m256i t = _mm256_blendv_epi8(v0, v1, v0); // move continuation_byte to the lead_byte postion
t = _mm256_or_si256(t, _mm256_and_si256(cmpeq_xC3, xC0)); // set bit6 if lead_byte was 0xC3
// strip `multibyte_continuation0` bytes (compress_store)
todo
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment