-
-
Save blockspacer/e5f32cb0c94fbaa6d8207e642ceb42bd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "utf.hpp" | |
#include <iostream> | |
#include <string> | |
int main() | |
{ | |
// A UTF-8 literal | |
std::string utf8 = u8"z\u00df\u6c34\U0001d10b"; // or u8"zß水𝄋" | |
// or "\x7a\xc3\x9f\xe6\xb0\xb4\xf0\x9d\x84\x8b"; | |
// Create a UTF-16 version in a 2-byte/16-bit char string | |
std::u16string utf16 = UTF::widen(utf8); | |
// Convert the UTF-16 version back to UTF-8 for fun | |
std::string new_utf8 = UTF::narrow(utf16); | |
// Print out the origional UTF-8 string as hex | |
std::cout << "Origional UTF8 string has " << std::dec << utf8.size() << " code units:" << std::endl; | |
for (unsigned char c : utf8) | |
std::cout << std::hex << std::showbase << (unsigned int)c << ' '; | |
std::cout << std::endl << std::endl; | |
// Print out the newly converted UTF-8 version | |
std::cout << "UTF8 conversion produced " << std::dec << new_utf8.size() << " code units:" << std::endl; | |
for (unsigned char c : new_utf8) | |
std::cout << std::hex << std::showbase << (unsigned int)c << ' '; | |
std::cout << std::endl << std::endl; | |
// Print out the UTF-16 version | |
std::cout << "UTF16 conversion produced " << std::dec << utf16.size() << " code units:" << std::endl; | |
for (char16_t c : utf16) | |
std::cout << std::hex << std::showbase << (unsigned long)c << ' '; | |
std::cout << std::endl << std::endl; | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Basic C++11 library to convert between UTF-8 and UTF-16 using the STL. | |
* | |
* | |
* By Matt Sturgeon 2016, may be used either as public domain or under the MIT Licence. | |
* | |
* TODO: | |
* - Consider adding some wrappers around cout, cerr and ifstream to use UTF-16 if on Windows | |
* - Consider adding support for converting to wstring and u32string (widestring will either be | |
* 16bit or 32 bit. On Windows it is 16bit.) | |
* | |
*/ | |
#ifndef CONVERT_UTF_H | |
#define CONVERT_UTF_H | |
#include <string> | |
#include <locale> | |
#include <codecvt> | |
namespace UTF { | |
// C++ wide string converter object | |
// Convert UTF-8 <--> UTF-16 | |
static std::wstring_convert <std::codecvt_utf8_utf16<char16_t>, char16_t> convert {}; | |
// Convert a UTF-8 string to a UTF-16 u16string (2 byte wide char) | |
static std::u16string widen (const std::string &str) { | |
return convert.from_bytes(str.data()); | |
} | |
static std::u16string widen (const char *str) { | |
return widen(std::string(str)); | |
} | |
// Convert a 2 byte wide char UTF-16 string into a 1 byte char UTF-8 string | |
static std::string narrow (const std::u16string &str) { | |
return convert.to_bytes(str.data()); | |
} | |
static std::string narrow (const char16_t *str) { | |
return narrow(std::u16string(str)); | |
} | |
}; // UTF | |
#endif // CONVERT_UTF_H |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
recommend using ICU library because codecvt will get deprecated