Skip to content

Instantly share code, notes, and snippets.

@Aaron3
Created September 4, 2013 14:36
Show Gist options
  • Save Aaron3/6437837 to your computer and use it in GitHub Desktop.
Save Aaron3/6437837 to your computer and use it in GitHub Desktop.
Cleanup everything dumb authors can screw up on a website. (Keeping the rules readable. Efficiency be damned)
<?php
add_filter('content_save_pre', 'cleanup_annoying_writers');
function cleanup_annoying_writers($content)
{
remove_filter('content_save_pre', array($this, 'cleanup_annoying_writers'));
$content =trim(stripslashes($content));
//No JS
if (strpos($content,'</script>') !== false) {
$content = preg_replace('/<\/script>/', "\n\n", $content);
$content = preg_replace('/<script[^>]*>/', "\n\n", $content);
}
//No CSS
if (strpos($content,'</style>') !== false) {
$content = preg_replace('/<\/style>/', "\n\n", $content);
$content = preg_replace('/<style[^>]*>/', "\n\n", $content);
}
//No Divs
if (strpos($content,'</div>') !== false) {
$content = preg_replace('/[\s]*<\/div>[\s]*/', "\n\n", $content);
$content = preg_replace('/[\s]*<div[^>]*>[\s]*/', "\n\n", $content);
}
//No paragraph tags
if (strpos($content,'</p>') !== false) {
$content = preg_replace('/[\s]*<\/p>[\s]*/', "\n\n", $content);
$content = preg_replace('/[\s]*<p[^>]*>[\s]*/', "\n\n", $content);
}
$content =trim($content);
// No Spans
if (strpos($content,'</span>') !== false) {
$content = preg_replace('/<\/span>/', "", $content);
$content = preg_replace('/<span[^>]*>/', "", $content);
}
//No Inline CSS
if (strpos($content,'style=') !== false) {
$content = preg_replace('/style="[^"]+"/', "", $content);
$content = preg_replace("/style='[^']+'/", "", $content);
}
//except counter-resets
if (strpos($content,'start=') !== false) {
$content = preg_replace('/start="([^"]+)"/', 'start="${1}" style="counter-reset:headers ${1};"', $content);
$content = preg_replace("/start='([^']+)'/", 'start="${1}" style="counter-reset:headers ${1};"', $content);
}
//No Center tags allowed
if (strpos($content,'<center>') !== false) {
$content = str_replace('</center>', '', $content);
$content = str_replace('<center>', '', $content);
}
//em not i.
if (strpos($content,'<i>') !== false) { //change <i> tags to <em>
$content = str_replace('<i>', '<em>', $content);
$content = str_replace('</i>', '</em>', $content);
}
//strong not bolds
if (strpos($content,'<b>') !== false) { //change <i> tags to <em>
$content = str_replace('<b>', '<strong>', $content);
$content = str_replace('</b>', '</strong>', $content);
}
//Remove character non-breaking spaces
if(strpos($content,' ') !== false) //remove all non-breaking spaces from word.
$content = str_replace(' ', ' ', $content);
//Remove entity non-breaking spaces
if(strpos($content,'&nbsp;') !== false) //no entity non-breaking spaces either.
$content = str_replace('&nbsp;', ' ', $content);
//Move spaces outside A tags and Em Tags
//If there is a word character or space after, move it outside.
//If there is not, delete it.
if (strpos($content,' </a>') !== false) {
$content = preg_replace('/[\s]+<\/a>(?=[a-zA-Z0-9 ])/', '</a> ', $content);
$content = preg_replace('/[\s]+<\/a>/', '</a>', $content);
}
if (strpos($content,' </em>') !== false) {
$content = preg_replace('/[\s]+<\/em>(?=[a-zA-Z0-9 ])/', '</em> ', $content);
$content = preg_replace('/[\s]+<\/em>/', '</em>', $content);
$content =trim($content);
}
$content = preg_replace('/ ([\.\!\?\,])/', '${1}', $content);
$content =trim($content);
//No More more tags
if(strpos($content,'<!--more-->') !== false) // get rid of more tags
$content = str_replace('<!--more-->', '', $content);
//no entity &'s in text
if(strpos($content,'&amp;') !== false) // get rid of double spaces
$content = str_replace(' &amp; ', ' & ', $content);
//No double spaces
if(strpos($content,' ') !== false) // get rid of multiple spaces
$content = preg_replace('! +!', ' ', $content);
//remove extra spaces between tags and empty tags
//back to back a tags are allowed
if(strpos($content,'> <') !== false)
$content = preg_replace('/!> +<(?!a)/', '><', $content);
if (strpos($content,'></') !== false) { //get rid of common empty tags
$content = str_replace('<blockquote></blockquote>', '', $content);
$content = str_replace('<p></p>', '', $content);
$content = str_replace('<div></div>', '', $content);
$content = str_replace('<span></span>', '', $content);
$content = str_replace('<em></em>', '', $content);
$content = str_replace('<strong></strong>', '', $content);
}
if (strpos($content,'<br') !== false) { // remove empty p tags with lots of white space
$content = preg_replace('/<br[\s]*\/?>[\s]*/', "\n", $content);
}
//No fancy quotes.
$content = str_replace('“', '"', $content);
$content = str_replace('”', '"', $content);
$content = str_replace('’', "'", $content);
$content = str_replace('‘', "'", $content);
/*
//If you really can't trust the morons to do anything right:
//Load it up to make sure it's all formatted well, then spit it out.
@$dom->loadHTML('<?xml encoding="UTF-8">' . $content );
$content = $dom->saveHTML($dom->getElementsByTagName('body')->item(0));
$content = trim(trim(str_replace(array('&#13;','<body>', '</body>'), array("\n",''), $content)));
*/
//remove windows newlines
if (strpos($content,"\r\n") !== false) {
$content = str_replace("\r\n", "\n", $content);
}
//remove windows newlines
if (strpos($content,"\r") !== false) {
$content = str_replace("\r", "\n", $content);
}
if (strpos($content,"\n\n\n") !== false) { //remove large blocks of new lines
$content = preg_replace('!\n\n\n+!', "\n\n", $content);
}
//$content = mb_convert_encoding($content, "UTF-8");
//trim it all one last time.
$content =trim($content);
return $content;
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment