Last active
December 14, 2015 11:29
-
-
Save dankogai/5079930 to your computer and use it in GitHub Desktop.
Encode vs. Unicode::UTF8 vs. utf8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use Encode qw(); | |
use Unicode::UTF8 qw(); | |
use Benchmark qw/:all/; | |
my $bytes = do { | |
local $/; | |
open my $fh, '<', $0 or die "$0:$!"; | |
my $str = <$fh>; | |
close $fh; | |
$str; | |
}; | |
my $utf8 = Encode::decode_utf8($bytes); | |
my $enc = Encode::find_encoding('utf8'); | |
warn sprintf "%d bytes -> %d chars", length $bytes, length $utf8; | |
# Decode | |
cmpthese timethese - 1, | |
{ | |
'$e->decode' => sub { $enc->decode($bytes) }, | |
'E::d_utf8' => sub { Encode::decode_utf8($bytes) }, | |
'u8::decode' => sub { utf8::decode( my $u = $bytes ) }, | |
'U::U8::d_utf8' => sub { Unicode::UTF8::decode_utf8($bytes) }, | |
}; | |
# Encode | |
cmpthese timethese - 1, | |
{ | |
'$e->encode' => sub { $enc->encode($utf8) }, | |
'E::e_utf8 ' => sub { Encode::encode_utf8($utf8) }, | |
'u8::encode' => sub { utf8::encode( my $b = $utf8 ) }, | |
'U::U8::e_utf8' => sub { Unicode::UTF8::encode_utf8($utf8) }, | |
}; | |
__DATA__ | |
もう、三十七歳になります。こないだ、或る先輩が、 | |
よく、まあ、君は、生きて来たなあ、 | |
としみじみ言っていました。 | |
私自身にも、三十七まで生きて来たのが、うそのように思われる事があります。 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Rate E::d_utf8 $e->decode u8::decode U::U8::d_utf8 | |
E::d_utf8 35544/s -- -5% -93% -96% | |
$e->decode 37236/s 5% -- -93% -96% | |
u8::decode 513912/s 1346% 1280% -- -48% | |
U::U8::d_utf8 983040/s 2666% 2540% 91% -- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Rate u8::encode U::U8::e_utf8 E::e_utf8 $e->encode | |
u8::encode 735179/s -- -25% -68% -71% | |
U::U8::e_utf8 980239/s 33% -- -57% -62% | |
E::e_utf8 2295672/s 212% 134% -- -11% | |
$e->encode 2572440/s 250% 162% 12% -- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment