Skip to content

Instantly share code, notes, and snippets.

@edobashira
Created April 1, 2011 06:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edobashira/897811 to your computer and use it in GitHub Desktop.
Save edobashira/897811 to your computer and use it in GitHub Desktop.
In C++ convert UT8 to UTF16 and vice versa
wstring UTF8ToUTF16(const string& utf8) {
wstring utf16;
utf16.reserve(utf8.size());
for ( size_t i = 0; i < utf8.size(); ++i ) {
unsigned char ch0 = utf8[i];
if ( (ch0 & 0x80) == 0x00 ) {
utf16 += ((ch0 & 0x7f));
} else {
if ((ch0 & 0xe0) == 0xc0) {
unsigned char ch1 = utf8[++i];
utf16 += ((ch0 & 0x3f) << 6)|((ch1 & 0x3f));
} else {
unsigned char ch1 = utf8[++i];
unsigned char ch2 = utf8[++i];
utf16 += ((ch0 & 0x0f)<<12)|((ch1 & 0x3f)<<6)|((ch2 & 0x3f));
}
}
}
return utf16;
}
void BreakUTF8(const string& utf8, vector<string>* out) {
for (size_t i = 0; i < utf8.size(); ++i) {
unsigned char c = utf8[i];
stringstream ss;
ss << c;
if ((c & 0x80) == 0x00 ) {
} else if ((c & 0xe0) == 0xc0) {
ss << utf8[++i];
} else if ((c & 0xf0) == 0xe0) {
ss << utf8[++i];
ss << utf8[++i];
} else if ((c & 0xf8) == 0xf0) {
ss << utf8[++i];
ss << utf8[++i];
ss << utf8[++i];
} else {
//Handle error here
}
out->push_back(ss.str());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment