Skip to content

Instantly share code, notes, and snippets.

@mckelvey
Created September 8, 2017 19:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mckelvey/3820ff4a1052325d032f85d24c2363b1 to your computer and use it in GitHub Desktop.
Save mckelvey/3820ff4a1052325d032f85d24c2363b1 to your computer and use it in GitHub Desktop.
/* replaces lines 18985-19057 of HTMLPurifier.standalone.php v.4.9.3 */
/**
* @param DOMNode $node
*/
protected function getTagName($node)
{
if (property_exists($node, 'tagName')) {
return $node->tagName;
} else if (property_exists($node, 'nodeName')) {
return $node->nodeName;
} else if (property_exists($node, 'localName')) {
return $node->localName;
}
return null;
}
/**
* @param DOMNode $node
*/
protected function getData($node)
{
if (property_exists($node, 'data')) {
return $node->data;
} else if (property_exists($node, 'nodeValue')) {
return $node->nodeValue;
} else if (property_exists($node, 'textContent')) {
return $node->textContent;
}
return null;
}
/**
* @param DOMNode $node DOMNode to be tokenized.
* @param HTMLPurifier_Token[] $tokens Array-list of already tokenized tokens.
* @param bool $collect Says whether or start and close are collected, set to
* false at first recursion because it's the implicit DIV
* tag you're dealing with.
* @return bool if the token needs an endtoken
* @todo data and tagName properties don't seem to exist in DOMNode?
*/
protected function createStartNode($node, &$tokens, $collect, $config)
{
// intercept non element nodes. WE MUST catch all of them,
// but we're not getting the character reference nodes because
// those should have been preprocessed
if ($node->nodeType === XML_TEXT_NODE) {
$data = $this->getData($node); // Handle variable data property
if ($data !== null) {
$tokens[] = $this->factory->createText($data);
}
return false;
} elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
// undo libxml's special treatment of <script> and <style> tags
$last = end($tokens);
$data = $node->data;
// (note $node->tagname is already normalized)
if ($last instanceof HTMLPurifier_Token_Start && ($last->name == 'script' || $last->name == 'style')) {
$new_data = trim($data);
if (substr($new_data, 0, 4) === '<!--') {
$data = substr($new_data, 4);
if (substr($data, -3) === '-->') {
$data = substr($data, 0, -3);
} else {
// Highly suspicious! Not sure what to do...
}
}
}
$tokens[] = $this->factory->createText($this->parseText($data, $config));
return false;
} elseif ($node->nodeType === XML_COMMENT_NODE) {
// this is code is only invoked for comments in script/style in versions
// of libxml pre-2.6.28 (regular comments, of course, are still
// handled regularly)
$tokens[] = $this->factory->createComment($node->data);
return false;
} elseif ($node->nodeType !== XML_ELEMENT_NODE) {
// not-well tested: there may be other nodes we have to grab
return false;
}
$attr = $node->hasAttributes() ? $this->transformAttrToAssoc($node->attributes) : array();
$tag_name = $this->getTagName($node); // Handle variable tagName property
if (empty($tag_name)) {
return (bool) $node->childNodes->length;
}
// We still have to make sure that the element actually IS empty
if (!$node->childNodes->length) {
if ($collect) {
$tokens[] = $this->factory->createEmpty($tag_name, $attr);
}
return false;
} else {
if ($collect) {
$tokens[] = $this->factory->createStart($tag_name, $attr);
}
return true;
}
}
/**
* @param DOMNode $node
* @param HTMLPurifier_Token[] $tokens
*/
protected function createEndNode($node, &$tokens)
{
$tag_name = $this->getTagName($node); // Handle variable tagName property
$tokens[] = $this->factory->createEnd($tag_name);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment