Skip to content

Instantly share code, notes, and snippets.

@programus
Created April 1, 2012 12:50
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save programus/2275148 to your computer and use it in GitHub Desktop.
Save programus/2275148 to your computer and use it in GitHub Desktop.
Functions to convert string(utf-8 in c++) to \uXXXX unicode escape
//
// convert utf-8 string to \uXXXX format.
// return actual length of the escaped string.
//
size_t escapeUnicode(const char* fromStr, char* const toStr, const size_t& maxSize)
{
// initialize iconv
iconv_t cd = iconv_open("UTF-16BE", "UTF-8");
// set iconv parameters
int arg = 1;
iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg);
iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg);
// get the size to be converted.
size_t fromLen = strlen(fromStr) + 1;
size_t afromLen = fromLen;
// set the size to contain the result as twice of utf-8 length
// (if there is all ascii, twice is needed.)
size_t toLen = fromLen << 1;
// prepare a buffer to contain the UTF-16 string.
char* buff = (char*)calloc(toLen, sizeof(char));
// because iconv() function will move the pointer, prepare a backup.
char* abuff = buff;
size_t size = iconv(cd, (const char**)&fromStr, &afromLen, &abuff, &toLen);
iconv_close(cd);
// buffer for escaped string
char* escBuff = (char*)calloc((fromLen << 1) + 1, sizeof(char));
// size of the escaped string.
size_t n = 0;
for (size_t i = 0; (buff[i] != 0 || buff[i+1] != 0); i += 2) {
const size_t UNI_ESC_SIZE = 6;
char uni[UNI_ESC_SIZE + 1] = {0}; // \uXXXX
if (buff[i] != 0) {
// unicode escape format
sprintf(uni, "\\u%02X%02X", (unsigned char)buff[i], (unsigned char)buff[i+1]);
// concat escaped unicode
memcpy(escBuff + n, uni, UNI_ESC_SIZE);
n += UNI_ESC_SIZE;
} else {
// ascii
escBuff[n++] = buff[i + 1];
}
}
escBuff[n] = '\0'; // terminate the string.
strncpy(toStr, escBuff, maxSize);
free(buff);
buff = 0;
free(escBuff);
escBuff = 0;
return n;
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Text;
namespace CIS_StdWebServices.Utils
{
public class JsonUtilities
{
private static Encoding unicode = Encoding.BigEndianUnicode;
public static string escapeUnicode(string uniStr)
{
byte[] unicodeBytes = unicode.GetBytes(uniStr);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < unicodeBytes.Length - 1; i += 2)
{
if (unicodeBytes[i] != 0)
{
sb.
Append("\\u").
Append(unicodeBytes[i].ToString("X2")).
Append(unicodeBytes[i + 1].ToString("X2"));
}
else
{
char ascii = (char) unicodeBytes[i + 1];
sb.Append(ascii);
}
}
return sb.ToString();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment