Skip to content

Instantly share code, notes, and snippets.

@u01jmg3
Last active November 14, 2016 10:54
Show Gist options
  • Save u01jmg3/a220f20a99f942f4a692b95fa928f02a to your computer and use it in GitHub Desktop.
Save u01jmg3/a220f20a99f942f4a692b95fa928f02a to your computer and use it in GitHub Desktop.
Sanitize WYSIWYG input
<?php
/**
* Removes empty paragraphs, ordered and unordered lists
* Reconciles spacing
* Replaces curly quotes with straight equivalent
*
* @param $html
* @return string
*/
public static function sanitizeWysiwygInput($html)
{
// Replace `&nbsp;` with a space character throughout (it prevents run-on sentences breaking out of layout)
$html = str_replace('&nbsp;', ' ', $html);
// Remove leading and trailing spaces from within an HTML string
$html = preg_replace('/(?<=>)\s+|\s+(?=<\/)/', '', $html);
// Replace multiple spaces with a single space
$html = preg_replace('!\s+!', ' ', $html);
// Convert curly quotes to straight equivalent
$replacements = [
"\xE2\x80\x98" => "'", // ‘
"\xE2\x80\x99" => "'", // ’
"\xE2\x80\x9A" => "'", // ‚
"\xE2\x80\x9B" => "'", // ‛
"\xE2\x80\x9C" => '"', // “
"\xE2\x80\x9D" => '"', // ”
"\xE2\x80\x9E" => '"', // „
"\xE2\x80\x9F" => '"', // ‟
"\xE2\x80\x93" => '-',
"\xE2\x80\x94" => '--',
"\xE2\x80\xa6" => '...',
];
$html = strtr($html, $replacements);
// Fastest way of removing unwanted tags
$html = strip_tags($html, '<p><ol><ul><li>');
// Ignore malformed HTML
libxml_use_internal_errors(true) && libxml_clear_errors();
$dom = new DOMDocument();
$dom->loadHTML($html);
$html = ''; // Clear var for returning output
$xpath = new DOMXPath($dom);
// Remove empty tags
foreach ($xpath->query('//*[not(node())]') as $node) {
$node->parentNode->removeChild($node);
}
// Remove all attributes
foreach ($xpath->query('//*') as $node) {
for ($i = $node->attributes->length -1; $i >= 0; $i--) {
$attribute = $node->attributes->item($i);
$node->removeAttributeNode($attribute);
}
}
// We are only interested in saving the `body` content
$body = $xpath->query('//body');
$children = $body->item(0)->childNodes;
foreach ($children as $child) {
$html .= $child->ownerDocument->saveHtml($child);
}
return $html;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment