Created
December 23, 2010 07:51
-
-
Save tkusano/752713 to your computer and use it in GitHub Desktop.
Unicode Normalization test using Unicode::Normalize
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/perl | |
use strict; | |
use local::lib; | |
use Unicode::Normalize; | |
use bytes; | |
no bytes; | |
use utf8; | |
use Encode qw(encode_utf8); | |
binmode STDOIUT, ':raw'; | |
our $str_list = [ | |
"漢字", "パンダ", | |
"ABC", "トントカイモ", | |
"パンダ", "#♯1", | |
"⓵\x{20DD}1", "①⑳", | |
"﹡", "ゔヴ", | |
"⒙", | |
]; | |
main(); | |
exit(0); | |
sub main { | |
my $first = 1; | |
foreach my $str (@$str_list) { | |
print "\n" unless $first; | |
foreach my $n (qw(NFD NFC NFKD NFKC)) { | |
my $cstr = eval "$n(\$str)"; | |
my $cstr_bytes = bytes::length($cstr); | |
my $cstr_len = length($cstr); | |
printf " %s: '%s' (%d bytes, %d chars)\n", $n, encode_utf8($cstr), | |
$cstr_bytes, $cstr_len; | |
} | |
$first = 0; | |
} | |
} | |
## end of script |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment