Created
December 24, 2010 12:54
-
-
Save shnya/754224 to your computer and use it in GitHub Desktop.
Surrogate Pair Test (For Linx)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <cstdlib> | |
#include <cstring> | |
#include <cstdio> | |
#include <wchar.h> | |
#include <string> | |
#include <iconv.h> | |
int convert(const char *to_code, const char *input, char **output){ | |
char *inbuf, *iptr, *outbuf, *wptr; | |
iconv_t cd; | |
size_t nconv, inlen, avail; | |
*output = NULL; | |
cd = iconv_open(to_code, "UTF-8"); | |
if(cd == (iconv_t)-1){ | |
perror("iconv error"); | |
return -1; | |
} | |
iptr = inbuf = strdup(input); | |
inlen = strlen(inbuf); | |
avail = inlen * 4 + 1; | |
wptr = outbuf = (char *)malloc(avail * sizeof(char)); | |
nconv = iconv(cd, &iptr, &inlen, &wptr, &avail); | |
if(nconv == (size_t) -1){ | |
perror("iconv error"); | |
free(inbuf); | |
free(outbuf); | |
return -1; | |
} | |
iconv(cd, NULL, NULL, &wptr, &avail); | |
if(avail >= sizeof(char)){ | |
*wptr = '\0'; | |
}else{ | |
perror("iconv error"); | |
free(inbuf); | |
free(outbuf); | |
return -1; | |
} | |
if(iconv_close(cd) != 0){ | |
perror("iconv error"); | |
free(inbuf); | |
free(outbuf); | |
return -1; | |
} | |
*output = outbuf; | |
return 1; | |
} | |
void PrintBinary(const void *str, size_t size){ | |
for(size_t i = 0; i < size; i++){ | |
printf("%02x", *(reinterpret_cast<const unsigned char *>(str) + i)); | |
} | |
printf("\n"); | |
} | |
using namespace std; | |
int main(int argc, char *argv[]) | |
{ | |
// http://unicode.org/cgi-bin/GetUnihanData.pl?codepoint=2000B | |
// This character use Surrogate Pair. | |
const char *utf8 = "\xF0\xA0\x80\x8B"; | |
char *output; | |
convert("wchar_t", utf8, &output); | |
wchar_t *wcstr = new wchar_t[2]; | |
mbstowcs(wcstr, output, 4); | |
free(output); | |
wstring wstr(wcstr); | |
delete[] wcstr; | |
printf("%lu\n", wstr.size()); | |
//PrintBinary(output, 32); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I compiled this source with the environment, "gcc 4.4.5, Ubuntu Linux 64bit"
It outputs "1", correctly.