Last active
September 3, 2018 23:46
-
-
Save jdc-cunningham/4eb94c9f7ecd6673a3d4e1d0f6afded6 to your computer and use it in GitHub Desktop.
Dumb Remove Scripts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// ps this does not work, use HTML Purifier | |
// I mean this works, but it's not guaranteed to fix the problem | |
// wth nested functions don't work as expected hmm | |
// probably time to learn OOP, look at that $GLOBAL, could not get the final output out of the function | |
// tried passing it in as parameter, regular global | |
// another note, to avoid case sensitivity use str_ireplace so you can match script, SCRIPT, ScRiPt, without | |
// worrying about exact str index position, or just don't do this entirely | |
$str = $content; | |
$str_lc_copy = strtolower($str); | |
$alphanums = str_split('abcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+[]/:;",.? ' . "'"); | |
$html_replace = [ | |
'<ul>' => 'a___', | |
'</ul>' => 'b____', | |
'<li>' => 'c___', | |
'</li>' => 'd____', | |
'<b>' => 'e__', | |
'</b>' => 'f___', | |
'<u>' => 'g__', | |
'</u>' => 'h___', | |
'<i>' => 'j__', | |
'</i>' => 'k___', | |
'<strong>' => 'l_______', | |
'</strong>' => 'm________' | |
]; | |
$test_str = ''; | |
$script_count = substr_count($str_lc_copy, 'script'); | |
$last_offset = 0; | |
$run_count = 0; | |
$GLOBALS['exp_str'] = ''; | |
// replace known wanted html with placeholders | |
foreach ($html_replace as $wanted_html => $html_placeholder) { | |
$str_lc_copy = str_replace($wanted_html, $html_placeholder, $str_lc_copy); | |
} | |
// < > replace | |
// $str_lc_copy = str_replace('<', '< ', $str_lc_copy); | |
// $str_lc_copy = str_replace('>', '> ', $str_lc_copy); | |
function removeScript($inp_str, $script_count, $run_count, $alphanums, $last_offset, $str) { | |
if ($run_count < $script_count) { | |
$cur_script_pos = strpos($inp_str, 'script', $last_offset); | |
if ($cur_script_pos !== false) { | |
$prior_script_char = $inp_str[$cur_script_pos - 1]; | |
if ($prior_script_char) { | |
$left_str = substr($inp_str, 0, $cur_script_pos); | |
$right_str = substr($inp_str, $cur_script_pos + strlen('script'), strlen($inp_str)); | |
if (!in_array($prior_script_char, $alphanums) || $prior_script_char === '/' || strpos($left_str, '<') !== false || strpos($right_str, '>') !== false) { | |
$inp_str = substr_replace($inp_str, ' ', $cur_script_pos, 6); | |
$str = substr_replace($inp_str, ' ', $cur_script_pos, 6); | |
} | |
} | |
} | |
$last_offset = $cur_script_pos; | |
$run_count += 1; | |
$test_str = $inp_str; | |
removeScript($test_str, $script_count, $run_count, $alphanums, $last_offset, $str); | |
} | |
else { | |
$GLOBALS['exp_str'] = $inp_str; | |
} | |
} | |
removeScript($str_lc_copy, $script_count, $run_count, $alphanums, $last_offset, $str); | |
function removeHtmlPlaceholders($inp_str, $html_replace) { | |
foreach ($html_replace as $html => $placeholder) { | |
$inp_str = str_replace($placeholder, $html, $inp_str); | |
} | |
return $inp_str; | |
} | |
$str = $GLOBALS['exp_str']; | |
$str = removeHtmlPlaceholders($str, $html_replace); | |
// < > replace | |
// $str = str_replace('< ', '<', $str); | |
// $str = str_replace('> ', '>', $str); | |
// duplicate spaces | |
$str = preg_replace('!\s+!', ' ', $str); | |
$content = $str; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment