Skip to content

Instantly share code, notes, and snippets.

@Zegnat

Zegnat/bla.php Secret

Created March 24, 2018 21:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Zegnat/bd1c53df51568d3b5a9c0a34c2c2b643 to your computer and use it in GitHub Desktop.
Save Zegnat/bd1c53df51568d3b5a9c0a34c2c2b643 to your computer and use it in GitHub Desktop.
<?php
/**
* The following two methods implements plain text parsing.
* @see https://wiki.zegnat.net/media/textparsing.html
**/
public function textContent(DOMElement $element)
{
return preg_replace(
'/( +(?=\n)|(?<=\n) +|^[\t\n\f\r ]+|[\t\n\f\r ]+$| +(?= ))/',
'',
$this->elementToString($element)
);
}
private function elementToString(DOMElement $input)
{
$output = '';
foreach ($input->childNodes as $child) {
if ($child->nodeType === XML_TEXT_NODE) {
$output .= str_replace(array("\t", "\n", "\r") , ' ', $child->textContent);
} else if ($child->nodeType === XML_ELEMENT_NODE) {
$tagName = strtoupper($child->tagName);
if (in_array($tagName, array('SCRIPT', 'STYLE'))) {
continue;
} else if ($tagName === 'IMG') {
if ($child->hasAttribute('alt')) {
$output .= ' ' . trim($child->getAttribute('alt'), "\t\n\f\r ") . ' ';
} else if ($child->hasAttribute('src')) {
$output .= ' ' . $this->resolveUrl(trim($child->getAttribute('src'), "\t\n\f\r ")) . ' ';
}
} else if ($tagName === 'BR') {
$output .= "\n";
} else if ($tagName === 'P') {
$output .= "\n" . $this->elementToString($child);
} else {
$output .= $this->elementToString($child);
}
}
}
return $output;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment