-
-
Save anonymous/c361fe4abce69334a90d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/lib/Mojo/DOM/HTML.pm b/lib/Mojo/DOM/HTML.pm | |
index 43c09fa..17e6be0 100644 | |
--- a/lib/Mojo/DOM/HTML.pm | |
+++ b/lib/Mojo/DOM/HTML.pm | |
@@ -63,8 +63,24 @@ map { $END{$_} = ['p'] } ( | |
qw(h3 h4 h5 h6 header hr main menu nav ol p pre section table ul) | |
); | |
-# HTML table elements with optional end tags | |
-my %TABLE = map { $_ => 1 } qw(colgroup tbody td tfoot th thead tr); | |
+# HTML elements with optional end tags and special scoping rules | |
+my @RULES = ( | |
+ [ | |
+ [qw(colgroup tbody tfoot thead)], | |
+ [qw(colgroup tbody td tfoot th thead tr)], | |
+ ['table'] | |
+ ], | |
+ [[qw(dd dt)], [qw(dd dt)], ['dl']], | |
+ [['li'], ['li'], [qw(ul ol)]], | |
+ [[qw(td th)], [qw(td th)], ['table']], | |
+ [['tr'], ['tr'], ['table']] | |
+); | |
+my %CLOSE; | |
+for my $rule (@RULES) { | |
+ my $allowed = {map { $_ => 1 } @{$rule->[1]}}; | |
+ my $scope = {map { $_ => 1 } @{$rule->[2]}}; | |
+ map { $CLOSE{$_} = [$allowed, $scope] } @{$rule->[0]}; | |
+} | |
# HTML elements without end tags | |
my %EMPTY = map { $_ => 1 } ( | |
@@ -165,17 +181,6 @@ sub parse { | |
sub render { _render($_[0]->tree, $_[0]->xml) } | |
-sub _close { | |
- my ($current, $allowed, $scope) = @_; | |
- | |
- # Close allowed parent elements in scope | |
- my $parent = $$current; | |
- while ($parent->[0] ne 'root' && !$scope->{$parent->[1]}) { | |
- _end($parent->[1], 0, $current) if $allowed->{$parent->[1]}; | |
- $parent = $parent->[3]; | |
- } | |
-} | |
- | |
sub _end { | |
my ($end, $xml, $current) = @_; | |
@@ -269,25 +274,15 @@ sub _start { | |
if (!$xml && $$current->[0] ne 'root') { | |
if (my $end = $END{$start}) { _end($_, 0, $current) for @$end } | |
- # "dd" and "dt" | |
- elsif ($start eq 'dd' || $start eq 'dt') { | |
- _close($current, {dd => 1, dt => 1}, {dl => 1}) for qw(dd dt); | |
- } | |
- | |
- # "li" | |
- elsif ($start eq 'li') { _close($current, {li => 1}, {ul => 1, ol => 1}) } | |
- | |
- # "colgroup", "thead", "tbody" and "tfoot" | |
- elsif ($start eq 'colgroup' || $start =~ /^t(?:head|body|foot)$/) { | |
- _close($current, \%TABLE, {table => 1}); | |
- } | |
- | |
- # "tr" | |
- elsif ($start eq 'tr') { _close($current, {tr => 1}, {table => 1}) } | |
+ # Close allowed parent elements in scope | |
+ elsif (my $close = $CLOSE{$start}) { | |
+ my ($allowed, $scope) = @$close; | |
- # "th" and "td" | |
- elsif ($start eq 'th' || $start eq 'td') { | |
- _close($current, {th => 1, td => 1}, {table => 1}); | |
+ my $parent = $$current; | |
+ while ($parent->[0] ne 'root' && !$scope->{$parent->[1]}) { | |
+ _end($parent->[1], 0, $current) if $allowed->{$parent->[1]}; | |
+ $parent = $parent->[3]; | |
+ } | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment