Skip to content

Instantly share code, notes, and snippets.

@jberger
Created October 5, 2012 18:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jberger/3841692 to your computer and use it in GitHub Desktop.
Save jberger/3841692 to your computer and use it in GitHub Desktop.
HTML4 comment causes havoc in Mojo::DOM tree
#!/usr/bin/env perl
use strict;
use warnings;
use 5.010;
use Mojo::DOM;
use HTML::TreeBuilder;
my $content = <<'EOF';
<html>
<body>
<!-- This is a valid comment -- >
<p>Here's a paragraph</p>
</body>
</html>
EOF
my $dom = Mojo::DOM->new( $content );
use Data::Printer;
p $dom->tree;
__END__
\ [
[0] "root",
[1] [
[0] "tag",
[1] "html",
[2] {},
[3] var (weak),
[4] [
[0] "text",
[1] "
"
],
[5] [
[0] "tag",
[1] "body",
[2] {},
[3] var[1] (weak),
[4] [
[0] "text",
[1] "
"
],
[5] [
[0] "tag",
[1] "!--",
[2] { # <------ Note: commment text seen as hash keys!
-- undef,
a undef,
comment undef,
is undef,
this undef,
valid undef
},
[3] var[1][5] (weak),
[4] [
[0] "text",
[1] "
"
],
[5] [
[0] "tag",
[1] "p",
[2] {},
[3] var[1][5][5] (weak), # <----- Note: a child of the comment!
[4] [
[0] "text",
[1] "Here's a paragraph"
]
],
[6] [
[0] "text",
[1] "
"
]
]
],
[6] [
[0] "text",
[1] "
"
]
],
[2] [
[0] "text",
[1] "
"
]
]
# By point of comparison, here is the file given an HTML5 comment syntax
#!/usr/bin/env perl
use strict;
use warnings;
use 5.010;
use Mojo::DOM;
use HTML::TreeBuilder;
my $content = <<'EOF';
<html>
<body>
<!-- This is a valid comment -->
<p>Here's a paragraph</p>
</body>
</html>
EOF
my $dom = Mojo::DOM->new( $content );
use Data::Printer;
p $dom->tree;
__END__
\ [
[0] "root",
[1] [
[0] "tag",
[1] "html",
[2] {},
[3] var (weak),
[4] [
[0] "text",
[1] "
"
],
[5] [
[0] "tag",
[1] "body",
[2] {},
[3] var[1] (weak),
[4] [
[0] "text",
[1] "
"
],
[5] [
[0] "comment",
[1] " This is a valid comment " # <----- Note: comment text is a string
],
[6] [
[0] "text",
[1] "
"
],
[7] [
[0] "tag",
[1] "p",
[2] {},
[3] var[1][5] (weak), # <---- Note: no longer a child of the comment
[4] [
[0] "text",
[1] "Here's a paragraph"
]
],
[8] [
[0] "text",
[1] "
"
]
],
[6] [
[0] "text",
[1] "
"
]
],
[2] [
[0] "text",
[1] "
"
]
]
@jberger
Copy link
Author

jberger commented Oct 5, 2012

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment