Created
May 25, 2024 03:32
-
-
Save dkosmari/2bcb33a47cf2b137bd9549c9d89dfba2 to your computer and use it in GitHub Desktop.
C++ code to convert UTF-16 encoding to UTF-8 encoding.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
std::string | |
to_utf8(const std::u16string& input) | |
{ | |
#if 0 | |
// TODO: try using std::c16rtomb() | |
#else | |
std::string output; | |
char32_t codepoint; | |
for (auto i = input.begin(); i != input.end(); ++i) { | |
auto ic = *i; | |
if (ic <= 0xd7ff) | |
codepoint = ic; | |
else { | |
char32_t high_surrogate = ic; | |
if ((high_surrogate & 0xfc00) != 0xd800) | |
break; // error: invalid high surrogate | |
if (++i == input.end()) | |
break; // error: reached the end too soon. | |
char32_t low_surrogate = *i; | |
if ((low_surrogate & 0xfc00) != 0xdc00) | |
break; // error: invalid low surrogate | |
codepoint = | |
((high_surrogate & 0x3ff) << 10) | |
| (low_surrogate & 0x3ff); | |
} | |
if (!codepoint) | |
break; // null terminator | |
// now encode as UTF-8 | |
char8_t oc; | |
if (codepoint <= 0x7f) { | |
output.push_back(codepoint); | |
} else if (codepoint <= 0x7ff) { | |
oc = 0xc0 | (codepoint >> 6); | |
output.push_back(oc); | |
oc = 0x80 | ((codepoint >> 0) & 0x3f); | |
output.push_back(oc); | |
} else if (codepoint <= 0xffff) { | |
oc = 0xe0 | (codepoint >> 12); | |
output.push_back(oc); | |
oc = 0x80 | ((codepoint >> 6) & 0x3f); | |
output.push_back(oc); | |
oc = 0x80 | ((codepoint >> 0) & 0x3f); | |
output.push_back(oc); | |
} else { | |
oc = 0xf0 | (codepoint >> 18); | |
output.push_back(oc); | |
oc = 0x80 | ((codepoint >> 12) & 0x3f); | |
output.push_back(oc); | |
oc = 0x80 | ((codepoint >> 6) & 0x3f); | |
output.push_back(oc); | |
oc = 0x80 | ((codepoint >> 0) & 0x3f); | |
output.push_back(oc); | |
} | |
} | |
#endif | |
return output; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment