Skip to content

Instantly share code, notes, and snippets.

@tkusano
Created December 23, 2010 07:51
Show Gist options
  • Save tkusano/752713 to your computer and use it in GitHub Desktop.
Save tkusano/752713 to your computer and use it in GitHub Desktop.
Unicode Normalization test using Unicode::Normalize
#! /usr/bin/perl
use strict;
use local::lib;
use Unicode::Normalize;
use bytes;
no bytes;
use utf8;
use Encode qw(encode_utf8);
binmode STDOIUT, ':raw';
our $str_list = [
"漢字", "パンダ",
"ABC", "トントカイモ",
"パンダ", "#♯1",
"⓵\x{20DD}1", "①⑳",
"﹡", "ゔヴ",
"⒙",
];
main();
exit(0);
sub main {
my $first = 1;
foreach my $str (@$str_list) {
print "\n" unless $first;
foreach my $n (qw(NFD NFC NFKD NFKC)) {
my $cstr = eval "$n(\$str)";
my $cstr_bytes = bytes::length($cstr);
my $cstr_len = length($cstr);
printf " %s: '%s' (%d bytes, %d chars)\n", $n, encode_utf8($cstr),
$cstr_bytes, $cstr_len;
}
$first = 0;
}
}
## end of script
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment