Created
October 6, 2011 22:02
-
-
Save ChrisMcKee/1268823 to your computer and use it in GitHub Desktop.
Data Cleansing Class
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class RDM_DataSecurity { | |
// this basic clean should clean html code from | |
// lot of possible malicious code for Cross Site Scripting | |
// use it whereever you get external input | |
static function basicClean($string) { | |
if (get_magic_quotes_gpc()) { | |
$string = stripslashes($string); | |
} | |
//if the newer externalinput class exists, use this | |
if (method_exists('lx_externalinput_clean', 'basic')) { | |
return lx_externalinput_clean::basic($string); | |
} | |
$string = str_replace(array("&", "<", ">"), array("&amp;", "&lt;", "&gt;"), $string); | |
// fix &entitiy\n; | |
$string = preg_replace('#(&\#*\w+)[\x00-\x20]+;#u', "$1;", $string); | |
$string = preg_replace('#(&\#x*)([0-9A-F]+);*#iu', "$1$2;", $string); | |
$string = html_entity_decode($string, ENT_COMPAT, "UTF-8"); | |
// remove any attribute starting with "on" or xmlns | |
$string = preg_replace('#(<[^>]+[\x00-\x20\"\'\/])(on|xmlns)[^>]*>#iUu', "$1>", $string); | |
// remove javascript: and vbscript: protocol | |
$string = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2nojavascript...', $string); | |
$string = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2novbscript...', $string); | |
$string = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/]*-moz-binding[\x00-\x20]*:#Uu', '$1=$2nomozbinding...', $string); | |
$string = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/]*data[\x00-\x20]*:#Uu', '$1=$2nodata...', $string); | |
//remove any style attributes, IE allows too much stupid things in them, eg. | |
//<span style="width: expression(alert('Ping!'));"></span> | |
// and in general you really don't want style declarations in your UGC | |
$string = preg_replace('#(<[^>]+[\x00-\x20\"\'\/])style[^>]*>#iUu', "$1>", $string); | |
//remove namespaced elements (we do not need them...) | |
$string = preg_replace('#</*\w+:\w[^>]*>#i', "", $string); | |
//remove really unwanted tags | |
do { | |
$oldstring = $string; | |
$string = preg_replace('#</*(applet|meta|xml|blink|link|style|script|embed|object|iframe|frame|frameset|ilayer|layer|bgsound|title|base)[^>]*>#i', "", $string); | |
} while ($oldstring != $string); | |
return $string; | |
} | |
static function removeMagicQuotes($data) { | |
if (get_magic_quotes_gpc()) { | |
$newdata = array(); | |
foreach ($data as $name => $value) { | |
$name = stripslashes($name); | |
if (is_array($value)) { | |
$newdata[$name] = self::removeMagicQuotes($value); | |
} else { | |
$newdata[$name] = stripslashes($value); | |
} | |
} | |
return $newdata; | |
} | |
return $data; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment