Skip to content

Instantly share code, notes, and snippets.

@cxw42
Created April 3, 2018 17:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cxw42/1a702d2f2f048ab5048e9cf22f5a76ce to your computer and use it in GitHub Desktop.
Save cxw42/1a702d2f2f048ab5048e9cf22f5a76ce to your computer and use it in GitHub Desktop.
Testing XML::Twig on Cygwin 64-bit
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use XML::Twig;
use Test::More tests => 16;
is( XML::Twig->new( keep_encoding => 1)->parse( q{<d a='"foo'/>})->sprint, q{<d a="&quot;foo"/>}, "quote in att with keep_encoding");
# test CDATA sections in HTML escaping https://rt.cpan.org/Ticket/Display.html?id=86773
my $html = <<'EOF';
<div id="body">body</div>
<script>
//<![CDATA[
if ( this.value && ( !request.term || matcher.test(text) ) && 1 > 0 && 0 < 1 )
//]]>
</script>
EOF
# module => XML::Twig->new options
my %html_conv= ( 'HTML::TreeBuilder' => {},
'HTML::Tidy' => { use_tidy => 1 },
);
foreach my $module ( sort keys %html_conv)
{ SKIP:
{ eval "use $module";
skip "$module not available", 3 if $@ ;
my $parser= XML::Twig->new( %{$html_conv{$module}});
my $xml = $parser->safe_parse_html($html);
print $@ if $@;
my @cdata = $xml->get_xpath('//#CDATA');
ok(@cdata == 1, "1 CDATA section found (using $module)");
#diag "\n", Dumper($xml),"\n";
#diag "\n", Dumper($xml->sprint),"\n";
#diag "\n", $xml->sprint, "\n";
#diag "Index: /", index($xml->sprint, '//]]>'), '/';
############ Re-creating the string makes it work.
############ Otherwise, index() returns the 64-bit negative
############ of the correct value.
my $x = ' ' . $xml->sprint;
$x = substr($x, 1);
#diag "Length is ", length $x;
#diag "Index: /", index($x, '//]]>'), '/';
#ok(((0+index($xml->sprint, '//]]>')) >= 0), "end of cdata ok in doc (using $module)");
ok(((index $x, '//]]>') >= 0), "end of cdata ok in doc (using $module)");
diag "... but it matches RE" if ($xml->sprint =~ qr{//]]>});
my @elts = $xml->get_xpath('//script');
foreach my $el (@elts)
{ #diag $el->sprint;
################### Same deal here.
my $x = ' ' . $el->sprint;
$x = substr($x, 1);
ok(((index $x, '//]]>') >= 0), "end of cdata ok in script element (using $module)");
diag "... but it matches RE" if ($el->sprint =~ qr{//]]>});
}
}
}
# test & in HTML (RT #86633)
my $html_with_amp='<h1>Marco&amp;company</h1>';
my $expected_body= '<body><h1>Marco&amp;company</h1></body>';
SKIP:
{ eval "use HTML::Tidy";
skip "HTML::Tidy not available", 1 if $@ ;
my $parsert = XML::Twig->new();
my $html_tidy = $parsert->safe_parse_html( { use_tidy => 1 }, "<h1>Marco&amp;company</h1>");
diag $@ if $@;
is( $html_tidy->first_elt( 'body')->sprint, $expected_body, "&amp; in text, converting html with use_tidy");
}
SKIP:
{ eval "use HTML::TreeBuilder";
skip "HTML::TreeBuilder not available", 1 if $@ ;
my $parserh= XML::Twig->new();
my $html = $parserh->safe_parse_html("<h1>Marco&amp;company</h1>");
diag $@ if $@;
is( $html->first_elt( 'body')->sprint , $expected_body, "&amp; in text, converting html with treebuilder");
}
is( XML::Twig::_unescape_cdata( '&lt;tag att="foo&amp;bar&amp;baz"&gt;&gt;&gt;&lt;/tag&gt;'), '<tag att="foo&bar&baz">>></tag>', '_unescape_cdata');
SKIP:
{ skip "safe_print_to_file method does not work on Windows", 6 if $^O =~ m{win}i;
# testing safe_print_to_file
my $tmp= "safe_print_to_file.xml";
my $doc= "<doc>foo</doc>";
unlink( $tmp); # no check, it could not be there
my $t1= XML::Twig->nparse( $doc)->safe_print_to_file( $tmp);
ok( -f $tmp, "safe_print_to_file created document");
my $t2= XML::Twig->nparse( $tmp);
is( $t2->sprint, $t1->sprint, "generated document identical to original document");
unlink( $tmp);
my $e1= XML::Twig->parse( '<d><a>foo</a><b>bar</b></d>')->first_elt( 'b')->safe_print_to_file( $tmp);
ok( -f $tmp, "safe_print_to_file on elt created document");
$t2= XML::Twig->nparse( $tmp);
is( $t2->sprint, '<b>bar</b>', "generated sub-document identical to original sub-document");
unlink( $tmp);
# failure modes
eval { XML::Twig->nparse( $tmp); };
like( $@, qr/Couldn't open $tmp:/, 'parse a non-existent file');
my $non_existent="safe_non_existent_I_hope_01/tmp";
while( -f $non_existent) { $non_existent++; } # most likely unnecessary ;--)
eval { $t1->safe_print_to_file( $non_existent); };
like( $@, qr/(does not exist|is not a directory)/, 'safe_print_to_file in non-existent dir');
}
exit;
@cxw42
Copy link
Author

cxw42 commented Apr 3, 2018

This is with XML-Twig-3.52 on cygwin x86_64, with perlbrew's perl-5.26.1. I had to edit t/test_3_45.t to get it to pass - index was returning the negative of the value (!). Looks like improper sign extension.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment