Skip to content

Instantly share code, notes, and snippets.

@skejeton
Last active June 19, 2022 17:08
Show Gist options
  • Save skejeton/88dc140ca3641d5ef1b595d7f05c3ea2 to your computer and use it in GitHub Desktop.
Save skejeton/88dc140ca3641d5ef1b595d7f05c3ea2 to your computer and use it in GitHub Desktop.
static inline size_t Utf8_Fetch(uint32_t *out, const char *s_)
{
const unsigned char *s = (const unsigned char*)s_;
if ((*s & 0xC0) != 0xC0) {
*out = *s;
return *s > 0;
}
const static size_t clas[32] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2,3,3,4,5};
size_t cl = clas[*s>>3];
for (size_t i = 1; i < cl; ++i) {
if ((s[i] & 0xC0) == 0xC0 || (s[i] & 0x80) == 0) {
*out = s[0];
return 1;
}
}
switch (cl) {
case 2: *out = ((s[0]&0x1f)<<6) | (s[1]&0x3f); break;
case 3: *out = ((s[0]&0xf)<<12) | ((s[1]&0x3f)<<6) | (s[2]&0x3f); break;
case 4: *out = ((s[0]&0x7)<<18) | ((s[1]&0x3f)<<12) | ((s[2]&0x3f)<<6) | (s[3]&0x3f); break;
default: *out = s[0]; return 1; // NOTE(skejeton): class 5 invalid, even if possible, just return it raw and map to class 1 (ascii)
}
return cl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment