Skip to content

Instantly share code, notes, and snippets.

@yamamushi
Last active September 11, 2021 04:55
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yamamushi/9357101 to your computer and use it in GitHub Desktop.
Save yamamushi/9357101 to your computer and use it in GitHub Desktop.
Int to UTF-8 C++
//
// IntToUTF8String.cpp
// Created by Jonathan Rumion on 3/4/13.
// Copyright (c) 2014.
//
// Consider this BSD licensed for all intents and purposes
// You are free to modify and use this as you wish without
// Attribution (though it would be appreciated it is not necessary).
//
#include <string>
std::string IntToUTF8String(int convertMe){
// We only care about plane 1 right now,
// but know that we have other options (0x10FFFF)
// Technically UTF-8 is "limited" to 4 bytes, so it's not
// Like it matters much anyways these days
if(convertMe == 0)
return " ";
if( (convertMe <= 0x7F) && (convertMe > 0x00) ){
std::string out(".");
std::bitset<8> x(convertMe);
unsigned long l = x.to_ulong();
unsigned char c = static_cast<unsigned char>(l);
out[0] = c;
return out;
} else if ( (convertMe >= 0x80) && (convertMe <= 0x07FF) ) {
std::string out("..");
int firstShift = (convertMe >> 0x06) ^ 0xC0;
int secondShift = ((convertMe ^ 0xFFC0) | 0x80) & ~0x40;
std::bitset<8> first(firstShift);
std::bitset<8> last(secondShift);
unsigned long l = first.to_ulong();
unsigned char c = static_cast<unsigned char>(l);
out[0] = c;
unsigned long ltwo = last.to_ulong();
unsigned char ctwo = static_cast<unsigned char>(ltwo);
out[1] = ctwo;
return out;
} else if( (convertMe >= 0x0800) && (convertMe <= 0xFFFF) ){
std::string out("...");
int firstShift = ((convertMe ^ 0xFC0FFF) >> 0x0C) | 0xE0;
int secondShift = (((convertMe ^ 0xFFF03F) >> 0x06) | 0x80) & ~0x40;
int thirdShift = ((convertMe ^ 0xFFFC0) | 0x80) & ~0x40;
std::bitset<8> first(firstShift);
std::bitset<8> second(secondShift);
std::bitset<8> third(thirdShift);
unsigned long lone = first.to_ulong();
unsigned char cone = static_cast<unsigned char>(lone);
out[0] = cone;
unsigned long ltwo = second.to_ulong();
unsigned char ctwo = static_cast<unsigned char>(ltwo);
out[1] = ctwo;
unsigned long lthree = third.to_ulong();
unsigned char cthree = static_cast<unsigned char>(lthree);
out[2] = cthree;
return out;
} else{
return " ";
}
}
Copy link

ghost commented Mar 7, 2014

great piece

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment