Created
October 5, 2012 18:57
-
-
Save jberger/3841692 to your computer and use it in GitHub Desktop.
HTML4 comment causes havoc in Mojo::DOM tree
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use 5.010; | |
use Mojo::DOM; | |
use HTML::TreeBuilder; | |
my $content = <<'EOF'; | |
<html> | |
<body> | |
<!-- This is a valid comment -- > | |
<p>Here's a paragraph</p> | |
</body> | |
</html> | |
EOF | |
my $dom = Mojo::DOM->new( $content ); | |
use Data::Printer; | |
p $dom->tree; | |
__END__ | |
\ [ | |
[0] "root", | |
[1] [ | |
[0] "tag", | |
[1] "html", | |
[2] {}, | |
[3] var (weak), | |
[4] [ | |
[0] "text", | |
[1] " | |
" | |
], | |
[5] [ | |
[0] "tag", | |
[1] "body", | |
[2] {}, | |
[3] var[1] (weak), | |
[4] [ | |
[0] "text", | |
[1] " | |
" | |
], | |
[5] [ | |
[0] "tag", | |
[1] "!--", | |
[2] { # <------ Note: commment text seen as hash keys! | |
-- undef, | |
a undef, | |
comment undef, | |
is undef, | |
this undef, | |
valid undef | |
}, | |
[3] var[1][5] (weak), | |
[4] [ | |
[0] "text", | |
[1] " | |
" | |
], | |
[5] [ | |
[0] "tag", | |
[1] "p", | |
[2] {}, | |
[3] var[1][5][5] (weak), # <----- Note: a child of the comment! | |
[4] [ | |
[0] "text", | |
[1] "Here's a paragraph" | |
] | |
], | |
[6] [ | |
[0] "text", | |
[1] " | |
" | |
] | |
] | |
], | |
[6] [ | |
[0] "text", | |
[1] " | |
" | |
] | |
], | |
[2] [ | |
[0] "text", | |
[1] " | |
" | |
] | |
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# By point of comparison, here is the file given an HTML5 comment syntax | |
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use 5.010; | |
use Mojo::DOM; | |
use HTML::TreeBuilder; | |
my $content = <<'EOF'; | |
<html> | |
<body> | |
<!-- This is a valid comment --> | |
<p>Here's a paragraph</p> | |
</body> | |
</html> | |
EOF | |
my $dom = Mojo::DOM->new( $content ); | |
use Data::Printer; | |
p $dom->tree; | |
__END__ | |
\ [ | |
[0] "root", | |
[1] [ | |
[0] "tag", | |
[1] "html", | |
[2] {}, | |
[3] var (weak), | |
[4] [ | |
[0] "text", | |
[1] " | |
" | |
], | |
[5] [ | |
[0] "tag", | |
[1] "body", | |
[2] {}, | |
[3] var[1] (weak), | |
[4] [ | |
[0] "text", | |
[1] " | |
" | |
], | |
[5] [ | |
[0] "comment", | |
[1] " This is a valid comment " # <----- Note: comment text is a string | |
], | |
[6] [ | |
[0] "text", | |
[1] " | |
" | |
], | |
[7] [ | |
[0] "tag", | |
[1] "p", | |
[2] {}, | |
[3] var[1][5] (weak), # <---- Note: no longer a child of the comment | |
[4] [ | |
[0] "text", | |
[1] "Here's a paragraph" | |
] | |
], | |
[8] [ | |
[0] "text", | |
[1] " | |
" | |
] | |
], | |
[6] [ | |
[0] "text", | |
[1] " | |
" | |
] | |
], | |
[2] [ | |
[0] "text", | |
[1] " | |
" | |
] | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Fixed: see mojolicious/mojo#389