Skip to content

Instantly share code, notes, and snippets.

@irrationalRock
Created April 18, 2018 02:43
Show Gist options
  • Save irrationalRock/75eebdbf464e9b6ea1cc527af8bbff0e to your computer and use it in GitHub Desktop.
Save irrationalRock/75eebdbf464e9b6ea1cc527af8bbff0e to your computer and use it in GitHub Desktop.
BrotliParseAsUTF8(14.90)
static size_t BrotliParseAsUTF8(
int* symbol, const uint8_t* input, size_t size) {
/* ASCII */
if ((input[0] & 0x80) == 0) {
*symbol = input[0];
if (*symbol > 0) {
return 1;
}
}
/* 2-byte UTF8 */
if (size > 1u &&
(input[0] & 0xE0) == 0xC0 &&
(input[1] & 0xC0) == 0x80) {
*symbol = (((input[0] & 0x1F) << 6) |
(input[1] & 0x3F));
if (*symbol > 0x7F) {
return 2;
}
}
/* 3-byte UFT8 */
if (size > 2u &&
(input[0] & 0xF0) == 0xE0 &&
(input[1] & 0xC0) == 0x80 &&
(input[2] & 0xC0) == 0x80) {
*symbol = (((input[0] & 0x0F) << 12) |
((input[1] & 0x3F) << 6) |
(input[2] & 0x3F));
if (*symbol > 0x7FF) {
return 3;
}
}
/* 4-byte UFT8 */
if (size > 3u &&
(input[0] & 0xF8) == 0xF0 &&
(input[1] & 0xC0) == 0x80 &&
(input[2] & 0xC0) == 0x80 &&
(input[3] & 0xC0) == 0x80) {
*symbol = (((input[0] & 0x07) << 18) |
((input[1] & 0x3F) << 12) |
((input[2] & 0x3F) << 6) |
(input[3] & 0x3F));
if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {
return 4;
}
}
/* Not UTF8, emit a special symbol above the UTF8-code space */
*symbol = 0x110000 | input[0];
return 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment