Skip to content

Instantly share code, notes, and snippets.

@Zegnat
Last active May 31, 2020 08:49
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Zegnat/a94489e9b7d5501193e724e336bc6052 to your computer and use it in GitHub Desktop.
Save Zegnat/a94489e9b7d5501193e724e336bc6052 to your computer and use it in GitHub Desktop.
--TEST--
Check if the HTML parser is skipping text nodes.
--SKIPIF--
<?php
if (!extension_loaded('dom')) print 'skip dom extension not available';
?>
--FILE--
<?php
$doc = new DOMDocument();
$doc->loadHTML("<ul>\n\t<li>test</li>\n</ul>");
var_dump($doc->getElementsByTagName('ul')[0]->firstChild->nodeType === XML_TEXT_NODE);
?>
--EXPECT--
bool(true)
<?php
// All the Libxml parameters taken from https://www.php.net/manual/en/libxml.constants.php
$constants = [
'LIBXML_BIGLINES' => LIBXML_BIGLINES, // Only available as of PHP 7.0.0 with Libxml >= 2.9.0
'LIBXML_COMPACT' => LIBXML_COMPACT, // Only available in Libxml >= 2.6.21
'LIBXML_DTDATTR' => LIBXML_DTDATTR,
'LIBXML_DTDLOAD' => LIBXML_DTDLOAD,
'LIBXML_DTDVALID' => LIBXML_DTDVALID,
'LIBXML_HTML_NOIMPLIED' => LIBXML_HTML_NOIMPLIED, // Only available in Libxml >= 2.7.7 (as of PHP >= 5.4.0)
'LIBXML_HTML_NODEFDTD' => LIBXML_HTML_NODEFDTD, // Only available in Libxml >= 2.7.8 (as of PHP >= 5.4.0)
'LIBXML_NOBLANKS' => LIBXML_NOBLANKS,
'LIBXML_NOCDATA' => LIBXML_NOCDATA,
'LIBXML_NOEMPTYTAG' => LIBXML_NOEMPTYTAG, // This option is currently just available in the DOMDocument::save and DOMDocument::saveXML functions.
'LIBXML_NOENT' => LIBXML_NOENT,
'LIBXML_NOERROR' => LIBXML_NOERROR,
'LIBXML_NONET' => LIBXML_NONET,
'LIBXML_NOWARNING' => LIBXML_NOWARNING,
'LIBXML_NOXMLDECL' => LIBXML_NOXMLDECL, // Only available in Libxml >= 2.6.21
'LIBXML_NSCLEAN' => LIBXML_NSCLEAN,
'LIBXML_PARSEHUGE' => LIBXML_PARSEHUGE, // Only available in Libxml >= 2.7.0 (as of PHP >= 5.3.2 and PHP >= 5.2.12)
'LIBXML_PEDANTIC' => LIBXML_PEDANTIC, // Available as of PHP >= 5.4.0
'LIBXML_XINCLUDE' => LIBXML_XINCLUDE,
'LIBXML_ERR_ERROR' => LIBXML_ERR_ERROR, // Error code
'LIBXML_ERR_FATAL' => LIBXML_ERR_FATAL, // Error code
'LIBXML_ERR_NONE' => LIBXML_ERR_NONE, // Error code
'LIBXML_ERR_WARNING' => LIBXML_ERR_WARNING, // Error code
'LIBXML_VERSION' => LIBXML_VERSION, // Integer version
'LIBXML_DOTTED_VERSION' => LIBXML_DOTTED_VERSION, // String version
'LIBXML_SCHEMA_CREATE' => LIBXML_SCHEMA_CREATE,
];
if (version_compare(phpversion(), '7.0.0', '<') || version_compare(LIBXML_DOTTED_VERSION, '2.9.0', '<')) {
unset($constants['LIBXML_BIGLINES']);
}
if (version_compare(LIBXML_DOTTED_VERSION, '2.6.21', '<')) {
unset($constants['LIBXML_COMPACT']);
unset($constants['LIBXML_NOXMLDECL']);
}
if (version_compare(phpversion(), '5.4.0', '<')) {
unset($constants['LIBXML_PEDANTIC']);
if (version_compare(LIBXML_DOTTED_VERSION, '2.7.7', '<')) {
unset($constants['LIBXML_HTML_NOIMPLIED']);
}
if (version_compare(LIBXML_DOTTED_VERSION, '2.7.8', '<')) {
unset($constants['LIBXML_HTML_NODEFDTD']);
}
}
if (version_compare(LIBXML_DOTTED_VERSION, '2.7.0', '<')) { // @TODO add fancy PHP version check
unset($constants['LIBXML_PARSEHUGE']);
}
// Baseline: no options passed to loadHTML. This fails in some very specific PHP builds?!
$doc = new DOMDocument();
$doc->loadHTML("<ul>\n\t<li>test</li>\n</ul>");
$testresult = $doc->getElementsByTagName('ul')[0]->firstChild->nodeType === XML_TEXT_NODE;
echo 'No options ' . ($testresult ? 'true' : 'false') . "\n";
// Explicit zero: in case there is a different default than zero?!
$doc = new DOMDocument();
$doc->loadHTML("<ul>\n\t<li>test</li>\n</ul>", 0);
$testresult = $doc->getElementsByTagName('ul')[0]->firstChild->nodeType === XML_TEXT_NODE;
echo 'Explicit zero ' . ($testresult ? 'true ' : 'false') . ' (' . 0 . ")\n";
foreach ($constants as $name => $value) {
// Ignore constants that are error types or Xmllib versions, also skip constants that are only for save methods.
if (substr($name, 0, 11) === 'LIBXML_ERR_' || substr($name, -8) === '_VERSION' || $name === 'LIBXML_NOEMPTYTAG') continue;
$doc = new DOMDocument();
$doc->loadHTML("<ul>\n\t<li>test</li>\n</ul>", $value);
$testresult = $doc->getElementsByTagName('ul')[0]->firstChild->nodeType === XML_TEXT_NODE;
echo str_pad($name, 22, ' ', STR_PAD_RIGHT) . ($testresult ? 'true ' : 'false') . " (" . $value . ")\n";
}
PHP 5.6.40 (cli) (built: Jan 23 2019 00:04:26)
Copyright (c) 1997-2016 The PHP Group
Zend Engine v2.6.0, Copyright (c) 1998-2016 Zend Technologies
with Zend OPcache v7.0.6-dev, Copyright (c) 1999-2016, by Zend Technologies
with Xdebug v2.5.5, Copyright (c) 2002-2017, by Derick Rethans
Provided by: cd2team/docker-php:5.6
No options false
Explicit zero false (0)
LIBXML_COMPACT true (65536)
LIBXML_DTDATTR true (8)
LIBXML_DTDLOAD true (4)
LIBXML_DTDVALID true (16)
LIBXML_HTML_NOIMPLIED true (8192)
LIBXML_HTML_NODEFDTD true (4)
LIBXML_NOBLANKS false (256)
LIBXML_NOCDATA true (16384)
LIBXML_NOENT true (2)
LIBXML_NOERROR true (32)
LIBXML_NONET true (2048)
LIBXML_NOWARNING true (64)
LIBXML_NOXMLDECL true (2)
LIBXML_NSCLEAN true (8192)
LIBXML_PARSEHUGE true (524288)
LIBXML_PEDANTIC true (128)
LIBXML_XINCLUDE true (1024)
LIBXML_SCHEMA_CREATE true (1)
PHP 7.3.4 (cli) (built: Apr 6 2019 02:24:14) ( NTS )
Copyright (c) 1997-2018 The PHP Group
Zend Engine v3.3.4, Copyright (c) 1998-2018 Zend Technologies
with Zend OPcache v7.3.4, Copyright (c) 1999-2018, by Zend Technologies
with Xdebug v2.7.1, Copyright (c) 2002-2019, by Derick Rethans
Provided by: cd2team/docker-php:7.3
No options false
Explicit zero false (0)
LIBXML_BIGLINES true (4194304)
LIBXML_COMPACT true (65536)
LIBXML_DTDATTR true (8)
LIBXML_DTDLOAD true (4)
LIBXML_DTDVALID true (16)
LIBXML_HTML_NOIMPLIED true (8192)
LIBXML_HTML_NODEFDTD true (4)
LIBXML_NOBLANKS false (256)
LIBXML_NOCDATA true (16384)
LIBXML_NOENT true (2)
LIBXML_NOERROR true (32)
LIBXML_NONET true (2048)
LIBXML_NOWARNING true (64)
LIBXML_NOXMLDECL true (2)
LIBXML_NSCLEAN true (8192)
LIBXML_PARSEHUGE true (524288)
LIBXML_PEDANTIC true (128)
LIBXML_XINCLUDE true (1024)
LIBXML_SCHEMA_CREATE true (1)
PHP 7.4.6 (cli) (built: May 29 2020 01:44:57) ( NTS )
Copyright (c) The PHP Group
Zend Engine v3.4.0, Copyright (c) Zend Technologies
with Zend OPcache v7.4.6, Copyright (c), by Zend Technologies
Provided by: Homebrew (php-7.4.6_1.catalina.bottle.tar.gz)
No options true
Explicit zero true (0)
LIBXML_BIGLINES true (4194304)
LIBXML_COMPACT true (65536)
LIBXML_DTDATTR true (8)
LIBXML_DTDLOAD true (4)
LIBXML_DTDVALID true (16)
LIBXML_HTML_NOIMPLIED true (8192)
LIBXML_HTML_NODEFDTD true (4)
LIBXML_NOBLANKS false (256)
LIBXML_NOCDATA true (16384)
LIBXML_NOENT true (2)
LIBXML_NOERROR true (32)
LIBXML_NONET true (2048)
LIBXML_NOWARNING true (64)
LIBXML_NOXMLDECL true (2)
LIBXML_NSCLEAN true (8192)
LIBXML_PARSEHUGE true (524288)
LIBXML_PEDANTIC true (128)
LIBXML_XINCLUDE true (1024)
LIBXML_SCHEMA_CREATE true (1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment