Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Load XML into SimpleXMLElement without Namespaces.
<?php
/**
* Loads XML and kills namespaces in the process.
* It allows easy usage of namespaced XML code.
*
* - NameSpaced tags are renamed from <ns:tag to <ns_tag.
* - NameSpaced tags are renamed from </ns:tag> to </ns_tag>.
* - NameSpaced attributes are renamed from ns:tag=... to ns_tag=....
*
* @license http://sam.zoy.org/wtfpl/
*
* @param string $xml
* @param string $sxclass
* @param bool $nsattr
* @param int $flags
* @return SimpleXMLElement
*/
function simplexml_load_string_nons($xml, $sxclass = 'SimpleXMLElement', $nsattr = false, $flags = null){
// Validate arguments first
if(!is_string($sxclass) or empty($sxclass) or !class_exists($sxclass)){
trigger_error('$sxclass must be a SimpleXMLElement or a derived class.', E_USER_WARNING);
return false;
}
if(!is_string($xml) or empty($xml)){
trigger_error('$xml must be a non-empty string.', E_USER_WARNING);
return false;
}
// Load XML if URL is provided as XML
if(preg_match('~^https?://[^\s]+$~i', $xml) || file_exists($xml)){
$xml = file_get_contents($xml);
}
// Let's drop namespace definitions
if(stripos($xml, 'xmlns=') !== false){
$xml = preg_replace('~[\s]+xmlns=[\'"].+?[\'"]~i', null, $xml);
}
// I know this looks kind of funny but it changes namespaced attributes
if(preg_match_all('~xmlns:([a-z0-9]+)=~i', $xml, $matches)){
foreach(($namespaces = array_unique($matches[1])) as $namespace){
$escaped_namespace = preg_quote($namespace, '~');
$xml = preg_replace('~[\s]xmlns:'.$escaped_namespace.'=[\'].+?[\']~i', null, $xml);
$xml = preg_replace('~[\s]xmlns:'.$escaped_namespace.'=["].+?["]~i', null, $xml);
$xml = preg_replace('~([\'"\s])'.$escaped_namespace.':~i', '$1'.$namespace.'_', $xml);
}
}
// Let's change <namespace:tag to <namespace_tag ns="namespace"
$regexfrom = sprintf('~<([a-z0-9]+):%s~is', !empty($nsattr) ? '([a-z0-9]+)' : null);
$regexto = strlen($nsattr) ? '<$1_$2 '.$nsattr.'="$1"' : '<$1_';
$xml = preg_replace($regexfrom, $regexto, $xml);
// Let's change </namespace:tag> to </namespace_tag>
$xml = preg_replace('~</([a-z0-9]+):~is', '</$1_', $xml);
// Default flags I use
if(empty($flags)) $flags = LIBXML_COMPACT | LIBXML_NOBLANKS | LIBXML_NOCDATA;
// Now load and return (namespaceless)
return $xml = simplexml_load_string($xml, $sxclass, $flags);
}
?>
@YuzhiboCN

This comment has been minimized.

Copy link

@YuzhiboCN YuzhiboCN commented Apr 19, 2019

if(preg_match_all('xmlns:?([^=]*)=i', $xml, $matches)){
foreach(($namespaces = array_unique($matches[1])) as $namespace){
$escaped_namespace = preg_quote($namespace, '
');
$xml = preg_replace('
[\s]xmlns:?'.$escaped_namespace.'=['].+?[']i', null, $xml);
$xml = preg_replace('
[\s]xmlns:?'.$escaped_namespace.'=["].+?["]i', null, $xml);
$xml = preg_replace('
(['"\s])'.$escaped_namespace.':~i', '$1'.$namespace.'_', $xml);
}
}

DELETE LINE 34-37

~xmlns:([a-z0-9]+)=~i -> ~xmlns:?([^=]*)=~i
<([a-z0-9]+):%sis -> <([^:]+):%sis


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment