Skip to content

Instantly share code, notes, and snippets.

@blockspacer
Forked from MattSturgeon/example.cpp
Created March 18, 2021 15:51
Show Gist options
  • Save blockspacer/e5f32cb0c94fbaa6d8207e642ceb42bd to your computer and use it in GitHub Desktop.
Save blockspacer/e5f32cb0c94fbaa6d8207e642ceb42bd to your computer and use it in GitHub Desktop.
#include "utf.hpp"
#include <iostream>
#include <string>
int main()
{
// A UTF-8 literal
std::string utf8 = u8"z\u00df\u6c34\U0001d10b"; // or u8"zß水𝄋"
// or "\x7a\xc3\x9f\xe6\xb0\xb4\xf0\x9d\x84\x8b";
// Create a UTF-16 version in a 2-byte/16-bit char string
std::u16string utf16 = UTF::widen(utf8);
// Convert the UTF-16 version back to UTF-8 for fun
std::string new_utf8 = UTF::narrow(utf16);
// Print out the origional UTF-8 string as hex
std::cout << "Origional UTF8 string has " << std::dec << utf8.size() << " code units:" << std::endl;
for (unsigned char c : utf8)
std::cout << std::hex << std::showbase << (unsigned int)c << ' ';
std::cout << std::endl << std::endl;
// Print out the newly converted UTF-8 version
std::cout << "UTF8 conversion produced " << std::dec << new_utf8.size() << " code units:" << std::endl;
for (unsigned char c : new_utf8)
std::cout << std::hex << std::showbase << (unsigned int)c << ' ';
std::cout << std::endl << std::endl;
// Print out the UTF-16 version
std::cout << "UTF16 conversion produced " << std::dec << utf16.size() << " code units:" << std::endl;
for (char16_t c : utf16)
std::cout << std::hex << std::showbase << (unsigned long)c << ' ';
std::cout << std::endl << std::endl;
return 0;
}
/*
* Basic C++11 library to convert between UTF-8 and UTF-16 using the STL.
*
*
* By Matt Sturgeon 2016, may be used either as public domain or under the MIT Licence.
*
* TODO:
* - Consider adding some wrappers around cout, cerr and ifstream to use UTF-16 if on Windows
* - Consider adding support for converting to wstring and u32string (widestring will either be
* 16bit or 32 bit. On Windows it is 16bit.)
*
*/
#ifndef CONVERT_UTF_H
#define CONVERT_UTF_H
#include <string>
#include <locale>
#include <codecvt>
namespace UTF {
// C++ wide string converter object
// Convert UTF-8 <--> UTF-16
static std::wstring_convert <std::codecvt_utf8_utf16<char16_t>, char16_t> convert {};
// Convert a UTF-8 string to a UTF-16 u16string (2 byte wide char)
static std::u16string widen (const std::string &str) {
return convert.from_bytes(str.data());
}
static std::u16string widen (const char *str) {
return widen(std::string(str));
}
// Convert a 2 byte wide char UTF-16 string into a 1 byte char UTF-8 string
static std::string narrow (const std::u16string &str) {
return convert.to_bytes(str.data());
}
static std::string narrow (const char16_t *str) {
return narrow(std::u16string(str));
}
}; // UTF
#endif // CONVERT_UTF_H
@blockspacer
Copy link
Author

recommend using ICU library because codecvt will get deprecated

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment