Created
February 25, 2014 09:54
-
-
Save ynakajima/9206063 to your computer and use it in GitHub Desktop.
C言語で、UTF-8 の文字列から Unicode のコードポイントを取得するやりかた ref: http://qiita.com/ynakajima/items/0510115ba19ab66e6670
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// UCS4に変換 | |
gunichar* codepoints = g_utf8_to_ucs4_fast(utf8_text, -1, NULL); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ brew install glib |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ gcc -Wall -I/usr/local/include/glib-2.0 -I/usr/local/lib/glib-2.0/include -lglib-2.0 -o utf8_to_codepoint utf8_to_codepoint.c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ ./utf8_to_codepoint "UTF-8の文字列を変換" | |
[U+0055] [U+0054] [U+0046] [U+002D] [U+0038] [U+306E] [U+6587] [U+5B57] [U+5217] [U+3092] [U+5909] [U+63DB] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ ./utf8_to_codepoint "𠀋𡈽𡌛𡑮" | |
[U+2000B] [U+2123D] [U+2131B] [U+2146E] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <glib.h> | |
gunichar* g_utf8_to_ucs4_fast(const gchar *str, | |
glong len, | |
glong *items_written); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <glib.h> | |
int main (int argc, char* argv[]) { | |
// 引き数が渡されてなかったら終了 | |
if (argc < 2) { | |
fprintf(stderr, "usage: %s text\n", argv[0]); | |
return 1; | |
} | |
// 引き数として渡された文字列を代入 | |
char* utf8_text = argv[1]; | |
// テキストの文字数を取得 | |
glong length = g_utf8_strlen(utf8_text, -1); | |
// UCS4に変換 | |
gunichar* codepoints = g_utf8_to_ucs4_fast(utf8_text, -1, NULL); | |
// 1文字づつ code point を出力 | |
for (int i = 0; i < length; i++) { | |
printf("[U+%04X] ", codepoints[i]); | |
} | |
printf("\n"); | |
// 終了処理 | |
g_free(codepoints); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment