Created
December 24, 2010 13:49
-
-
Save shnya/754247 to your computer and use it in GitHub Desktop.
Surrogate Pair Test (For Windows VC9)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "stdafx.h" | |
#include "Windows.h" | |
#include "winnls.h" | |
#include <string> | |
#include <cstdio> | |
using namespace std; | |
void PrintBinary(const void *str, size_t size){ | |
for(size_t i = 0; i < size; i++){ | |
printf("%02x", *(reinterpret_cast<const unsigned char *>(str) + i)); | |
} | |
printf("\n"); | |
} | |
void PrintSizeWchar(const char *utf8){ | |
wchar_t *wcstr = new wchar_t[3]; | |
memset(wcstr, 0, sizeof(wchar_t) * 3); | |
int res = 0; | |
res = ::MultiByteToWideChar(CP_UTF8, 0, utf8, strlen(utf8), wcstr, sizeof(wchar_t) * 3); | |
if(res == 0){ | |
printf("failed\n"); | |
exit(-1); | |
} | |
wstring wstr(wcstr); | |
printf("%lu\n", wstr.size()); | |
//PrintBinary(wstr.c_str(), wstr.size() * sizeof(wchar_t)); | |
delete[] wcstr; | |
} | |
int _tmain(int argc, _TCHAR *argv[]) | |
{ | |
// http://unicode.org/cgi-bin/GetUnihanData.pl?codepoint=3042 | |
// This character don't use Surrogate Pair. | |
PrintSizeWchar("\xE3\x81\x82"); | |
// http://unicode.org/cgi-bin/GetUnihanData.pl?codepoint=2000B | |
// This character use Surrogate Pair. | |
PrintSizeWchar("\xF0\xA0\x80\x8B"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I compiled this source with the environment, "Visual Studio 2008(VC9), Windows 7 Ultimate"
It outputs the following, wrongly.
---- OUTPUT -----
1
2