Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

HTML4 comment causes havoc in Mojo::DOM tree

View html4.pl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
#!/usr/bin/env perl
use strict;
use warnings;
use 5.010;
use Mojo::DOM;
use HTML::TreeBuilder;
 
my $content = <<'EOF';
<html>
<body>
<!-- This is a valid comment -- >
<p>Here's a paragraph</p>
</body>
</html>
EOF
 
my $dom = Mojo::DOM->new( $content );
use Data::Printer;
p $dom->tree;
 
__END__
 
 
\ [
[0] "root",
[1] [
[0] "tag",
[1] "html",
[2] {},
[3] var (weak),
[4] [
[0] "text",
[1] "
"
],
[5] [
[0] "tag",
[1] "body",
[2] {},
[3] var[1] (weak),
[4] [
[0] "text",
[1] "
"
],
[5] [
[0] "tag",
[1] "!--",
[2] { # <------ Note: commment text seen as hash keys!
-- undef,
a undef,
comment undef,
is undef,
this undef,
valid undef
},
[3] var[1][5] (weak),
[4] [
[0] "text",
[1] "
"
],
[5] [
[0] "tag",
[1] "p",
[2] {},
[3] var[1][5][5] (weak), # <----- Note: a child of the comment!
[4] [
[0] "text",
[1] "Here's a paragraph"
]
],
[6] [
[0] "text",
[1] "
"
]
]
],
[6] [
[0] "text",
[1] "
"
]
],
[2] [
[0] "text",
[1] "
"
]
]
View html4.pl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
# By point of comparison, here is the file given an HTML5 comment syntax
 
#!/usr/bin/env perl
use strict;
use warnings;
use 5.010;
use Mojo::DOM;
use HTML::TreeBuilder;
 
my $content = <<'EOF';
<html>
<body>
<!-- This is a valid comment -->
<p>Here's a paragraph</p>
</body>
</html>
EOF
 
my $dom = Mojo::DOM->new( $content );
use Data::Printer;
p $dom->tree;
 
__END__
 
\ [
[0] "root",
[1] [
[0] "tag",
[1] "html",
[2] {},
[3] var (weak),
[4] [
[0] "text",
[1] "
"
],
[5] [
[0] "tag",
[1] "body",
[2] {},
[3] var[1] (weak),
[4] [
[0] "text",
[1] "
"
],
[5] [
[0] "comment",
[1] " This is a valid comment " # <----- Note: comment text is a string
],
[6] [
[0] "text",
[1] "
"
],
[7] [
[0] "tag",
[1] "p",
[2] {},
[3] var[1][5] (weak), # <---- Note: no longer a child of the comment
[4] [
[0] "text",
[1] "Here's a paragraph"
]
],
[8] [
[0] "text",
[1] "
"
]
],
[6] [
[0] "text",
[1] "
"
]
],
[2] [
[0] "text",
[1] "
"
]
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.