Skip to content

Instantly share code, notes, and snippets.

@alibitek
Last active March 13, 2017 13:47
Show Gist options
  • Save alibitek/44e0895490f663afeccc5b062c12ce81 to your computer and use it in GitHub Desktop.
Save alibitek/44e0895490f663afeccc5b062c12ce81 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <string>
#include <cassert>
# https://stackoverflow.com/questions/395832/how-to-get-code-point-number-for-a-given-character-in-a-utf-8-string
wchar_t utf8_char_to_ucs2(const unsigned char *utf8)
{
if(!(utf8[0] & 0x80)) // 0xxxxxxx
return (wchar_t)utf8[0];
else if((utf8[0] & 0xE0) == 0xC0) // 110xxxxx
return (wchar_t)(((utf8[0] & 0x1F) << 6) | (utf8[1] & 0x3F));
else if((utf8[0] & 0xF0) == 0xE0) // 1110xxxx
return (wchar_t)(((utf8[0] & 0x0F) << 12) | ((utf8[1] & 0x3F) << 6) | (utf8[2] & 0x3F));
else
return -1; // uh-oh, UCS-2 can't handle code points this high
}
int main()
{
// Nº 12680 https://codepoints.net/U+3188
auto result = utf8_char_to_ucs2(reinterpret_cast<const unsigned char*>(u8"ㆈ"));
std::cout << result << " " << std::hex << result << std::endl;
assert(result == 12680);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment