Skip to content

Instantly share code, notes, and snippets.

@MattSturgeon
Created July 29, 2016 13:59
Show Gist options
  • Save MattSturgeon/a0817aa41e2c675d4239c9c1732156c8 to your computer and use it in GitHub Desktop.
Save MattSturgeon/a0817aa41e2c675d4239c9c1732156c8 to your computer and use it in GitHub Desktop.
#include "utf.hpp"
#include <iostream>
#include <string>
int main()
{
// A UTF-8 literal
std::string utf8 = u8"z\u00df\u6c34\U0001d10b"; // or u8"zß水𝄋"
// or "\x7a\xc3\x9f\xe6\xb0\xb4\xf0\x9d\x84\x8b";
// Create a UTF-16 version in a 2-byte/16-bit char string
std::u16string utf16 = UTF::widen(utf8);
// Convert the UTF-16 version back to UTF-8 for fun
std::string new_utf8 = UTF::narrow(utf16);
// Print out the origional UTF-8 string as hex
std::cout << "Origional UTF8 string has " << std::dec << utf8.size() << " code units:" << std::endl;
for (unsigned char c : utf8)
std::cout << std::hex << std::showbase << (unsigned int)c << ' ';
std::cout << std::endl << std::endl;
// Print out the newly converted UTF-8 version
std::cout << "UTF8 conversion produced " << std::dec << new_utf8.size() << " code units:" << std::endl;
for (unsigned char c : new_utf8)
std::cout << std::hex << std::showbase << (unsigned int)c << ' ';
std::cout << std::endl << std::endl;
// Print out the UTF-16 version
std::cout << "UTF16 conversion produced " << std::dec << utf16.size() << " code units:" << std::endl;
for (char16_t c : utf16)
std::cout << std::hex << std::showbase << (unsigned long)c << ' ';
std::cout << std::endl << std::endl;
return 0;
}
/*
* Basic C++11 library to convert between UTF-8 and UTF-16 using the STL.
*
*
* By Matt Sturgeon 2016, may be used either as public domain or under the MIT Licence.
*
* TODO:
* - Consider adding some wrappers around cout, cerr and ifstream to use UTF-16 if on Windows
* - Consider adding support for converting to wstring and u32string (widestring will either be
* 16bit or 32 bit. On Windows it is 16bit.)
*
*/
#ifndef CONVERT_UTF_H
#define CONVERT_UTF_H
#include <string>
#include <locale>
#include <codecvt>
namespace UTF {
// C++ wide string converter object
// Convert UTF-8 <--> UTF-16
static std::wstring_convert <std::codecvt_utf8_utf16<char16_t>, char16_t> convert {};
// Convert a UTF-8 string to a UTF-16 u16string (2 byte wide char)
static std::u16string widen (const std::string &str) {
return convert.from_bytes(str.data());
}
static std::u16string widen (const char *str) {
return widen(std::string(str));
}
// Convert a 2 byte wide char UTF-16 string into a 1 byte char UTF-8 string
static std::string narrow (const std::u16string &str) {
return convert.to_bytes(str.data());
}
static std::string narrow (const char16_t *str) {
return narrow(std::u16string(str));
}
}; // UTF
#endif // CONVERT_UTF_H
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment