Skip to content

Instantly share code, notes, and snippets.

/structure.diff Secret

Created October 14, 2014 20:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/c361fe4abce69334a90d to your computer and use it in GitHub Desktop.
Save anonymous/c361fe4abce69334a90d to your computer and use it in GitHub Desktop.
diff --git a/lib/Mojo/DOM/HTML.pm b/lib/Mojo/DOM/HTML.pm
index 43c09fa..17e6be0 100644
--- a/lib/Mojo/DOM/HTML.pm
+++ b/lib/Mojo/DOM/HTML.pm
@@ -63,8 +63,24 @@ map { $END{$_} = ['p'] } (
qw(h3 h4 h5 h6 header hr main menu nav ol p pre section table ul)
);
-# HTML table elements with optional end tags
-my %TABLE = map { $_ => 1 } qw(colgroup tbody td tfoot th thead tr);
+# HTML elements with optional end tags and special scoping rules
+my @RULES = (
+ [
+ [qw(colgroup tbody tfoot thead)],
+ [qw(colgroup tbody td tfoot th thead tr)],
+ ['table']
+ ],
+ [[qw(dd dt)], [qw(dd dt)], ['dl']],
+ [['li'], ['li'], [qw(ul ol)]],
+ [[qw(td th)], [qw(td th)], ['table']],
+ [['tr'], ['tr'], ['table']]
+);
+my %CLOSE;
+for my $rule (@RULES) {
+ my $allowed = {map { $_ => 1 } @{$rule->[1]}};
+ my $scope = {map { $_ => 1 } @{$rule->[2]}};
+ map { $CLOSE{$_} = [$allowed, $scope] } @{$rule->[0]};
+}
# HTML elements without end tags
my %EMPTY = map { $_ => 1 } (
@@ -165,17 +181,6 @@ sub parse {
sub render { _render($_[0]->tree, $_[0]->xml) }
-sub _close {
- my ($current, $allowed, $scope) = @_;
-
- # Close allowed parent elements in scope
- my $parent = $$current;
- while ($parent->[0] ne 'root' && !$scope->{$parent->[1]}) {
- _end($parent->[1], 0, $current) if $allowed->{$parent->[1]};
- $parent = $parent->[3];
- }
-}
-
sub _end {
my ($end, $xml, $current) = @_;
@@ -269,25 +274,15 @@ sub _start {
if (!$xml && $$current->[0] ne 'root') {
if (my $end = $END{$start}) { _end($_, 0, $current) for @$end }
- # "dd" and "dt"
- elsif ($start eq 'dd' || $start eq 'dt') {
- _close($current, {dd => 1, dt => 1}, {dl => 1}) for qw(dd dt);
- }
-
- # "li"
- elsif ($start eq 'li') { _close($current, {li => 1}, {ul => 1, ol => 1}) }
-
- # "colgroup", "thead", "tbody" and "tfoot"
- elsif ($start eq 'colgroup' || $start =~ /^t(?:head|body|foot)$/) {
- _close($current, \%TABLE, {table => 1});
- }
-
- # "tr"
- elsif ($start eq 'tr') { _close($current, {tr => 1}, {table => 1}) }
+ # Close allowed parent elements in scope
+ elsif (my $close = $CLOSE{$start}) {
+ my ($allowed, $scope) = @$close;
- # "th" and "td"
- elsif ($start eq 'th' || $start eq 'td') {
- _close($current, {th => 1, td => 1}, {table => 1});
+ my $parent = $$current;
+ while ($parent->[0] ne 'root' && !$scope->{$parent->[1]}) {
+ _end($parent->[1], 0, $current) if $allowed->{$parent->[1]};
+ $parent = $parent->[3];
+ }
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment