Skip to content

Instantly share code, notes, and snippets.

@ChrisMcKee
Created October 6, 2011 22:02
Show Gist options
  • Save ChrisMcKee/1268823 to your computer and use it in GitHub Desktop.
Save ChrisMcKee/1268823 to your computer and use it in GitHub Desktop.
Data Cleansing Class
<?php
class RDM_DataSecurity {
// this basic clean should clean html code from
// lot of possible malicious code for Cross Site Scripting
// use it whereever you get external input
static function basicClean($string) {
if (get_magic_quotes_gpc()) {
$string = stripslashes($string);
}
//if the newer externalinput class exists, use this
if (method_exists('lx_externalinput_clean', 'basic')) {
return lx_externalinput_clean::basic($string);
}
$string = str_replace(array("&amp;", "&lt;", "&gt;"), array("&amp;amp;", "&amp;lt;", "&amp;gt;"), $string);
// fix &entitiy\n;
$string = preg_replace('#(&\#*\w+)[\x00-\x20]+;#u', "$1;", $string);
$string = preg_replace('#(&\#x*)([0-9A-F]+);*#iu', "$1$2;", $string);
$string = html_entity_decode($string, ENT_COMPAT, "UTF-8");
// remove any attribute starting with "on" or xmlns
$string = preg_replace('#(<[^>]+[\x00-\x20\"\'\/])(on|xmlns)[^>]*>#iUu', "$1>", $string);
// remove javascript: and vbscript: protocol
$string = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2nojavascript...', $string);
$string = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2novbscript...', $string);
$string = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/]*-moz-binding[\x00-\x20]*:#Uu', '$1=$2nomozbinding...', $string);
$string = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/]*data[\x00-\x20]*:#Uu', '$1=$2nodata...', $string);
//remove any style attributes, IE allows too much stupid things in them, eg.
//<span style="width: expression(alert('Ping!'));"></span>
// and in general you really don't want style declarations in your UGC
$string = preg_replace('#(<[^>]+[\x00-\x20\"\'\/])style[^>]*>#iUu', "$1>", $string);
//remove namespaced elements (we do not need them...)
$string = preg_replace('#</*\w+:\w[^>]*>#i', "", $string);
//remove really unwanted tags
do {
$oldstring = $string;
$string = preg_replace('#</*(applet|meta|xml|blink|link|style|script|embed|object|iframe|frame|frameset|ilayer|layer|bgsound|title|base)[^>]*>#i', "", $string);
} while ($oldstring != $string);
return $string;
}
static function removeMagicQuotes($data) {
if (get_magic_quotes_gpc()) {
$newdata = array();
foreach ($data as $name => $value) {
$name = stripslashes($name);
if (is_array($value)) {
$newdata[$name] = self::removeMagicQuotes($value);
} else {
$newdata[$name] = stripslashes($value);
}
}
return $newdata;
}
return $data;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment