-
-
Save chansen/c9884c0817463fa34284 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
{ | |
package encoding::heuristic; | |
our $Encoding; | |
BEGIN { | |
require Encode; | |
$Encoding = Encode::find_encoding('utf8'); | |
} | |
sub import { | |
${^ENCODING} = bless \my $x, __PACKAGE__; | |
} | |
sub decode : lvalue { | |
local ${^ENCODING}; | |
utf8::upgrade($_[1]) | |
unless utf8::decode($_[1]); | |
$_[1]; | |
} | |
sub cat_decode { | |
shift; | |
return $Encoding->cat_decode(@_); | |
} | |
} | |
BEGIN { | |
encoding::heuristic::import(); | |
} | |
use Test::More qw[no_plan]; | |
{ | |
my $str = "\x{263A}" # unicode string | |
. "\xE2\x98\xBA" # UTF-8 encoded U+263A | |
. "\xC4" # Latin-1 encoded U+00C4 | |
; | |
cmp_ok($str, "eq", "\x{263A}\x{263A}\x{c4}", "No mojibake when concatenating"); | |
} | |
{ | |
my $str = "\xE2\x98\xBA"; | |
utf8::upgrade($str); | |
cmp_ok($str, "eq", "\x{263A}", "No mojibake when upgrading UTF-8 octets"); | |
} | |
{ | |
my $str = "\xC4"; | |
utf8::upgrade($str); | |
cmp_ok($str, "eq", "\x{c4}", "Upgrading native still works"); | |
} | |
{ | |
my $str = "\xE2\x98\xBA"; | |
utf8::encode($str); | |
cmp_ok($str, "eq", "\xE2\x98\xBA", "Encoding UTF-8 octets just work"); | |
} | |
{ | |
my $str = "\xC4"; | |
utf8::encode($str); | |
cmp_ok($str, "eq", "\xC3\x84", "So does native"); | |
} | |
cmp_ok("\x{263A}", "eq", "\xE2\x98\xBA", "Equality of unicode and UTF-8 octets"); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment