Created
February 23, 2010 12:39
-
-
Save kappa/312135 to your computer and use it in GitHub Desktop.
Benchmark ways to encode HTML entities in Perl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/perl | |
use Modern::Perl; | |
use Benchmark qw/:all/; | |
use HTML::Entities; | |
my $big_text = "See, they call Los Angeles the \"City Of Angels\"; but I didn't find it to be that, exactly. But I'll allow it as there are s ome nice folks there. 'Course I ain't never been to London, and I ain't never seen France. And I ain't never seen no queen in her damned undies, so the feller says. But I'll tell you what - after seeing Los Angeles, and this here story I'm about to unfold, well, I guess I seen somethin' every bit as stupefyin' as you'd seen in any of them other places. And in English , too. So I can die with a smile on my face, without feelin' like the good Lord gypped me. Now this here story I'm about to unfold took place in the early '90s - just about the time of our conflict with Sad'm and the I-raqis. I only mention it be cause sometimes there's a man..."; | |
my $small_text = "Jeffrey \"The Dude\" Lebowski"; | |
my $text = $small_text; | |
sub html_entities { | |
my $res = encode_entities($text, q{<>&"'}); | |
return; | |
} | |
sub html_entities_in { | |
my $res = $text; | |
encode_entities($res, q{<>&"'}); | |
return; | |
} | |
sub subst { | |
my $res = $text; | |
$res =~ s/&/&/g; | |
$res =~ s/</</g; | |
$res =~ s/>/>/g; | |
$res =~ s/"/"/g; | |
$res =~ s/'/'/g; | |
return; | |
} | |
my %char2entity = ( | |
'&' => '&', | |
'<' => '<', | |
'>' => '>', | |
'"' => '"', | |
q{'}=> ''', | |
); | |
sub subst_hash { | |
my $res = $text; | |
$res =~ s/([&<>"'])/$char2entity{$1}/g; | |
return; | |
} | |
sub subst_plus { | |
my $res = $text; | |
$res =~ s/([&<>"'])/$char2entity{$+}/g; | |
return; | |
} | |
my @char2entity; | |
for (keys %char2entity) { | |
$char2entity[ord $_] = $char2entity{$_}; | |
} | |
sub subst_array { | |
my $res = $text; | |
$res =~ s/([&<>"'])/$char2entity[ord($+)]/g; | |
return; | |
} | |
cmpthese(-5, { | |
'HTML::Entities-sub'=> sub { html_entities }, | |
'HTML::Entities' => \&html_entities, | |
'HTML::Entities-in' => \&html_entities_in, | |
's///' => \&subst, | |
's///-sub' => sub { subst }, | |
's///hash' => \&subst_hash, | |
's///hash-plus' => \&subst_plus, | |
's///array' => \&subst_array, | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment