Skip to content

Instantly share code, notes, and snippets.

/attr.diff Secret

Created March 9, 2017 18:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/cd1a08ca0919e43ebfaa81e760e06676 to your computer and use it in GitHub Desktop.
Save anonymous/cd1a08ca0919e43ebfaa81e760e06676 to your computer and use it in GitHub Desktop.
diff --git a/lib/Mojo/DOM/HTML.pm b/lib/Mojo/DOM/HTML.pm
index f2f70a29b..19e2e2250 100644
--- a/lib/Mojo/DOM/HTML.pm
+++ b/lib/Mojo/DOM/HTML.pm
@@ -1,7 +1,7 @@
package Mojo::DOM::HTML;
use Mojo::Base -base;
-use Mojo::Util qw(html_unescape xml_escape);
+use Mojo::Util qw(html_attr_unescape html_unescape xml_escape);
use Scalar::Util 'weaken';
has tree => sub { ['root'] };
@@ -125,7 +125,7 @@ sub parse {
# Empty tag
++$closing and next if $key eq '/';
- $attrs{$key} = defined $value ? html_unescape $value : $value;
+ $attrs{$key} = defined $value ? html_attr_unescape $value : $value;
}
# "image" is an alias for "img"
diff --git a/lib/Mojo/Util.pm b/lib/Mojo/Util.pm
index 6f30be035..02648e35c 100644
--- a/lib/Mojo/Util.pm
+++ b/lib/Mojo/Util.pm
@@ -51,14 +51,17 @@ my %XML = (
# "Sun, 06 Nov 1994 08:49:37 GMT" and "Sunday, 06-Nov-94 08:49:37 GMT"
my $EXPIRES_RE = qr/(\w+\W+\d+\W+\w+\W+\d+\W+\d+:\d+:\d+\W*\w+)/;
+# HTML entities
+my $ENTITY_RE = qr/&(?:\#((?:[0-9]{1,7}|x[0-9a-fA-F]{1,6}));|(\w+;?))/;
+
# Encoding cache
my %CACHE;
our @EXPORT_OK = (
qw(b64_decode b64_encode camelize class_to_file class_to_path decamelize),
qw(decode deprecated dumper encode extract_usage getopt hmac_sha1_sum),
- qw(html_unescape md5_bytes md5_sum monkey_patch punycode_decode),
- qw(punycode_encode quote secure_compare sha1_bytes sha1_sum),
+ qw(html_attr_unescape html_unescape md5_bytes md5_sum monkey_patch),
+ qw(punycode_decode punycode_encode quote secure_compare sha1_bytes sha1_sum),
qw(split_cookie_header split_header steady_time tablify term_escape trim),
qw(unindent unquote url_escape url_unescape xml_escape xor_encode)
);
@@ -155,10 +158,15 @@ sub getopt {
Getopt::Long::Configure($save);
}
+sub html_attr_unescape {
+ my $str = shift;
+ $str =~ s/$ENTITY_RE/_decode($1, $2, 1)/geo;
+ return $str;
+}
+
sub html_unescape {
my $str = shift;
- $str
- =~ s/&(?:\#((?:[0-9]{1,7}|x[0-9a-fA-F]{1,6}));|(\w+;?))/_decode($1, $2)/ge;
+ $str =~ s/$ENTITY_RE/_decode($1, $2, 0)/geo;
return $str;
}
@@ -368,16 +376,18 @@ sub _adapt {
}
sub _decode {
- my ($point, $name) = @_;
+ my ($point, $name, $attr) = @_;
# Code point
return chr($point !~ /^x/ ? $point : hex $point) unless defined $name;
# Named character reference
- my $rest = '';
+ my $rest = my $last = '';
while (length $name) {
- return $ENTITIES{$name} . reverse $rest if exists $ENTITIES{$name};
- $rest .= chop $name;
+ return $ENTITIES{$name} . reverse $rest
+ if exists $ENTITIES{$name}
+ && (!$attr || $name =~ /;$/ || $last !~ /[A-Za-z0-9=]/);
+ $rest .= $last = chop $name;
}
return '&' . reverse $rest;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment