Skip to content

Instantly share code, notes, and snippets.

@Ovid
Created February 9, 2012 08:55
Show Gist options
  • Save Ovid/1778591 to your computer and use it in GitHub Desktop.
Save Ovid/1778591 to your computer and use it in GitHub Desktop.
Unicode Collation Algorithm in Perl
use strict;
use warnings;
use utf8::all;
use Test::More;
use Unicode::Collate;
my $a1 = "\N{U+212B}"; # Å
my $a2 = "\N{U+00C5}"; # Å
my $a3 = "\N{U+0041}\N{U+030A}"; # Å
subtest "Perl's eq() operator" => sub {
ok !( $a1 cmp $a1 ), "U+212B and U+212B should evaluate the same";
ok !( $a2 cmp $a2 ), "U+00C5 and U+00C5 should evaluate the same";
ok !( $a3 cmp $a3 ), "U+0041 U+030A and U+0041 U+030A should evaluate the same";
TODO: {
local $TODO = "Perl's cmp operator is broken";
ok !( $a1 cmp $a2 ), "U+212B and U+00C5 should evaluate the same";
ok !( $a1 cmp $a3 ), "U+212B and U+0041 U+030A should evaluate the same";
ok !( $a2 cmp $a3 ), "U+00C5 and U+0041 U+030A should evaluate the same";
}
};
subtest "Unicode Collation Algorithm" => sub {
my $c = Unicode::Collate->new;
ok !( $c->cmp( $a1, $a1 ) ), "U+212B and U+212B should evaluate the same";
ok !( $c->cmp( $a2, $a2 ) ), "U+00C5 and U+00C5 should evaluate the same";
ok !( $c->cmp( $a3, $a3 ) ), "U+0041 U+030A and U+0041 U+030A should evaluate the same";
ok !( $c->cmp( $a1, $a2 ) ), "U+212B and U+00C5 should evaluate the same";
ok !( $c->cmp( $a1, $a3 ) ), "U+212B and U+0041 U+030A should evaluate the same";
ok !( $c->cmp( $a2, $a3 ) ), "U+00C5 and U+0041 U+030A should evaluate the same";
};
done_testing;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment