Created February 23, 2010 12:39
Benchmark ways to encode HTML entities in Perl
#! /usr/bin/perl
use Modern::Perl;
use Benchmark qw/:all/;
use HTML::Entities;
my $big_text = "See, they call Los Angeles the \"City Of Angels\"; but I didn't find it to be that, exactly. But I'll allow it as there are s ome nice folks there. 'Course I ain't never been to London, and I ain't never seen France. And I ain't never seen no queen in her damned undies, so the feller says. But I'll tell you what - after seeing Los Angeles, and this here story I'm about to unfold, well, I guess I seen somethin' every bit as stupefyin' as you'd seen in any of them other places. And in English , too. So I can die with a smile on my face, without feelin' like the good Lord gypped me. Now this here story I'm about to unfold took place in the early '90s - just about the time of our conflict with Sad'm and the I-raqis. I only mention it be cause sometimes there's a man...";
my $small_text = "Jeffrey \"The Dude\" Lebowski";
my $text = $small_text;
sub html_entities {
my $res = encode_entities($text, q{<>&"'});
sub html_entities_in {
my $res = $text;
encode_entities($res, q{<>&"'});
sub subst {
my $res = $text;
$res =~ s/&/&amp;/g;
$res =~ s/</&lt;/g;
$res =~ s/>/&gt;/g;
$res =~ s/"/&quot;/g;
$res =~ s/'/&apos;/g;
my %char2entity = (
'&' => '&amp;',
'<' => '&lt;',
'>' => '&gt;',
'"' => '&quot;',
q{'}=> '&apos;',
sub subst_hash {
my $res = $text;
$res =~ s/([&<>"'])/$char2entity{$1}/g;
sub subst_plus {
my $res = $text;
$res =~ s/([&<>"'])/$char2entity{$+}/g;
my @char2entity;
for (keys %char2entity) {
$char2entity[ord $_] = $char2entity{$_};
sub subst_array {
my $res = $text;
$res =~ s/([&<>"'])/$char2entity[ord($+)]/g;
cmpthese(-5, {
'HTML::Entities-sub'=> sub { html_entities },
'HTML::Entities' => \&html_entities,
'HTML::Entities-in' => \&html_entities_in,
's///' => \&subst,
's///-sub' => sub { subst },
's///hash' => \&subst_hash,
's///hash-plus' => \&subst_plus,
's///array' => \&subst_array,
