Skip to content

Instantly share code, notes, and snippets.

@mikeash
Created November 8, 2015 07:09
Show Gist options
  • Save mikeash/1991612528d706f7cc4a to your computer and use it in GitHub Desktop.
Save mikeash/1991612528d706f7cc4a to your computer and use it in GitHub Desktop.
struct DecodeResult {
uint32_t codepoint;
uint32_t decodedBytes;
};
static inline struct DecodeResult DecodeOneUTF8(const uint8_t *utf8) {
static uint8_t byteCountTable[16] = {
// 0xxx
[0x0] = 1,
[0x1] = 1,
[0x2] = 1,
[0x3] = 1,
[0x4] = 1,
[0x5] = 1,
[0x6] = 1,
[0x7] = 1,
// 110x
[0xc] = 2,
[0xd] = 2,
// 1110
[0xe] = 3,
// 1111
[0xf] = 4
};
uint8_t byteOne = utf8[0];
uint8_t byteTwo = utf8[1];
uint8_t byteThree = utf8[2];
uint8_t byteFour = utf8[3];
uint32_t codepoints[5] = {
-1, // dummy
byteOne,
((byteOne & 0x1f) << 6) | (byteTwo & 0x3f),
((byteOne & 0x0f) << 12) | ((byteTwo & 0x3f) << 6) | (byteThree & 0x3f),
((byteOne & 0x03) << 18) | ((byteTwo & 0x3f) << 12) | ((byteThree & 0x3f) << 6) | (byteFour & 0x3f)
};
uint8_t byteCount = byteCountTable[byteOne >> 4];
return (struct DecodeResult){ codepoints[byteCount], byteCount };
}
size_t DecodeUTF8(const uint8_t *utf8, uint32_t *destination) {
size_t length = 0;
while(*utf8 != '\0') {
struct DecodeResult result = DecodeOneUTF8(utf8);
*destination++ = result.codepoint;
utf8 += result.decodedBytes;
length++;
}
return length;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment