Created
April 1, 2011 06:08
-
-
Save edobashira/897811 to your computer and use it in GitHub Desktop.
In C++ convert UT8 to UTF16 and vice versa
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
wstring UTF8ToUTF16(const string& utf8) { | |
wstring utf16; | |
utf16.reserve(utf8.size()); | |
for ( size_t i = 0; i < utf8.size(); ++i ) { | |
unsigned char ch0 = utf8[i]; | |
if ( (ch0 & 0x80) == 0x00 ) { | |
utf16 += ((ch0 & 0x7f)); | |
} else { | |
if ((ch0 & 0xe0) == 0xc0) { | |
unsigned char ch1 = utf8[++i]; | |
utf16 += ((ch0 & 0x3f) << 6)|((ch1 & 0x3f)); | |
} else { | |
unsigned char ch1 = utf8[++i]; | |
unsigned char ch2 = utf8[++i]; | |
utf16 += ((ch0 & 0x0f)<<12)|((ch1 & 0x3f)<<6)|((ch2 & 0x3f)); | |
} | |
} | |
} | |
return utf16; | |
} | |
void BreakUTF8(const string& utf8, vector<string>* out) { | |
for (size_t i = 0; i < utf8.size(); ++i) { | |
unsigned char c = utf8[i]; | |
stringstream ss; | |
ss << c; | |
if ((c & 0x80) == 0x00 ) { | |
} else if ((c & 0xe0) == 0xc0) { | |
ss << utf8[++i]; | |
} else if ((c & 0xf0) == 0xe0) { | |
ss << utf8[++i]; | |
ss << utf8[++i]; | |
} else if ((c & 0xf8) == 0xf0) { | |
ss << utf8[++i]; | |
ss << utf8[++i]; | |
ss << utf8[++i]; | |
} else { | |
//Handle error here | |
} | |
out->push_back(ss.str()); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment