public
Last active

function for people whining that preg_match_all is less to type and concluding from it that regex must be better for parsing html than a dom parser

  • Download Gist
xpath_match_all
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
<?php
/**
* Run an XPath query against an HTML string and returns the results
*
* @param string $xpath The XPath query to run on $thml
* @param string $html The HTML to parse. Uses the previously used string if omitted
* @return array
*/
function xpath_match_all($query, $html = '')
{
static $dom;
static $xpath;
static $content;
 
if (!$dom) {
$dom = new DOMDocument;
}
 
if ($html !== '') {
$content = $html;
libxml_use_internal_errors(true);
$dom->loadHtml($html);
$xpath = new DOMXPath($dom);
libxml_use_internal_errors(false);
}
 
$matches = array(array(), array());
foreach ($xpath->evaluate($query) as $i => $resultNode) {
$save = version_compare(PHP_VERSION, '5.3.6', '<') ? 'saveXml' : 'saveHtml';
$matches[0][] = $dom->$save($resultNode);
$innerHtml = '';
if ($resultNode->childNodes) {
foreach ($resultNode->childNodes as $childNode) {
$innerHtml .= $dom->$save($childNode);
}
}
$matches[1][] = $innerHtml;
}
return $matches;
}

Store the current state of $user_errors in libxml_use_internal_errors() https://gist.github.com/4546595/revisions

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.