Created
February 9, 2012 08:55
-
-
Save Ovid/1778591 to your computer and use it in GitHub Desktop.
Unicode Collation Algorithm in Perl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use strict; | |
use warnings; | |
use utf8::all; | |
use Test::More; | |
use Unicode::Collate; | |
my $a1 = "\N{U+212B}"; # Å | |
my $a2 = "\N{U+00C5}"; # Å | |
my $a3 = "\N{U+0041}\N{U+030A}"; # Å | |
subtest "Perl's eq() operator" => sub { | |
ok !( $a1 cmp $a1 ), "U+212B and U+212B should evaluate the same"; | |
ok !( $a2 cmp $a2 ), "U+00C5 and U+00C5 should evaluate the same"; | |
ok !( $a3 cmp $a3 ), "U+0041 U+030A and U+0041 U+030A should evaluate the same"; | |
TODO: { | |
local $TODO = "Perl's cmp operator is broken"; | |
ok !( $a1 cmp $a2 ), "U+212B and U+00C5 should evaluate the same"; | |
ok !( $a1 cmp $a3 ), "U+212B and U+0041 U+030A should evaluate the same"; | |
ok !( $a2 cmp $a3 ), "U+00C5 and U+0041 U+030A should evaluate the same"; | |
} | |
}; | |
subtest "Unicode Collation Algorithm" => sub { | |
my $c = Unicode::Collate->new; | |
ok !( $c->cmp( $a1, $a1 ) ), "U+212B and U+212B should evaluate the same"; | |
ok !( $c->cmp( $a2, $a2 ) ), "U+00C5 and U+00C5 should evaluate the same"; | |
ok !( $c->cmp( $a3, $a3 ) ), "U+0041 U+030A and U+0041 U+030A should evaluate the same"; | |
ok !( $c->cmp( $a1, $a2 ) ), "U+212B and U+00C5 should evaluate the same"; | |
ok !( $c->cmp( $a1, $a3 ) ), "U+212B and U+0041 U+030A should evaluate the same"; | |
ok !( $c->cmp( $a2, $a3 ) ), "U+00C5 and U+0041 U+030A should evaluate the same"; | |
}; | |
done_testing; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment