Skip to content

Instantly share code, notes, and snippets.

@Sommerregen
Created January 19, 2018 19:16
Show Gist options
  • Save Sommerregen/113175eb40c292c7f2ca6ea5720dfff5 to your computer and use it in GitHub Desktop.
Save Sommerregen/113175eb40c292c7f2ca6ea5720dfff5 to your computer and use it in GitHub Desktop.
filter html user input allowing only specifics tags and attributes
<?php
// filter html user input allowing only specifics tags and attributes
// XSS - You shall not pass ! ;)
$html = 'TEST <div>Hello PHP/SQL developers, <img src="https://foxorm.com/img/foxorm.png" height="24" width="24" on-click="alert(\'javascript injection try\');"><a href="https://foxorm.com">FoxORM</a> is <b>awesome</b> !
<a href="javascript:alert(\'another javascript injection try\');"> !!! </a>
<script>window.location = http://xss-injection.hack; </script></div> IFY';
$securisedHtml = htmlfilter($html, [
'*'=>[
'style', //allow style attribute for all tags
],
'div'=>[
'data-*', //allow all attributes starting with data- for div tag
],
'p'=>[],
'b'=>[],
'i'=>[],
'a'=>[
'href'
],
'img'=>[
'src',
'height',
'width',
],
]);
var_dump(htmlentities($securisedHtml));
//echo $securisedHtml;
function htmlfilter($htmlUserInput, $allowedAttributesByTag=[]){
$allowedAttributesForAllTags = [];
if(isset($allowedAttributesByTag['*'])){
$allowedAttributesForAllTags = $allowedAttributesByTag['*'];
unset($allowedAttributesByTag['*']);
}
$htmlUserInput = preg_replace('/<!--(.*)-->/Uis', '', $htmlUserInput); //remove comments
$htmlUserInput = preg_replace("/^<!\[CDATA\[(.*)\]\]>$/s", '', $htmlUserInput); //remove CDATA
$htmlLength = strlen($htmlUserInput);
$securisedHtml = '';
$state = 'PARSING';
$charContainer = '';
$quoteType = '';
for($i=0;$i<$htmlLength;$i++){
$currentChar = $htmlUserInput{$i};
switch($currentChar){
case '<':
switch($state){
case 'PARSING':
$state = 'PARSING_OPENER';
case 'PARSING_OPENER':
$securisedHtml .= $charContainer;
$charContainer = '';
break;
break;
case 'ATTR_VALUE':
$charContainer .= $currentChar;
break;
default:
$securisedHtml .= $charContainer;
$charContainer = '';
$state = 'PARSING';
$i+=-1;
break;
}
break;
case '=':
switch($state){
case 'PARSING_OPENER':
if(!isset($htmlUserInput{$i+1}))
break;
$quote = $htmlUserInput{$i+1};
$y = $i+2;
$charContainer .= '='.$quote;
while(($ch=$htmlUserInput{$y++})!=$quote){
$charContainer .= $ch;
if(!isset($htmlUserInput{$y+1}))
break 2;
}
$charContainer .= $quote;
$i = $y-1;
break;
default:
$charContainer .= $currentChar;
break;
}
break;
case '"':
case "'":
switch($state){
case 'PARSING_OPENER':
$state = 'ATTR_VALUE';
$quoteType = $currentChar;
break;
case 'ATTR_VALUE':
if($quoteType==$currentChar)
$state = 'PARSING_OPENER';
break;
}
$charContainer .= $currentChar;
break;
case '>':
switch($state){
case 'PARSING_OPENER':
$state = 'PARSING';
case 'PARSING':
$firstChar = isset($charContainer{0})?$charContainer{0}:'';
$myAttributes = [];
switch($firstChar){
case '/':
$tagName = substr($charContainer, 1);
if(!isset($allowedAttributesByTag[$tagName]))
break;
$securisedHtml .= "</$tagName>";
break;
default:
if ((strpos($charContainer, '"') !== false) || (strpos($charContainer, "'") !== false)){
$tagName = '';
for($y=0;$y<strlen($charContainer);$y++){
$currentChar = $charContainer{$y};
if (($currentChar == ' ') || ($currentChar == "\t") ||
($currentChar == "\n") || ($currentChar == "\r") ||
($currentChar == "\x0B")) {
$myAttributes = parseAttributes(substr($charContainer, $y));
break;
}
else
$tagName .= $currentChar;
}
if(!isset($allowedAttributesByTag[$tagName]))
break;
$attrs = [];
$myAttributes = filterAttributes($tagName, $myAttributes);
foreach($myAttributes as $k=>$v){
if(!in_array($k, $allowedAttributesByTag[$tagName])&&!in_array($k, $allowedAttributesForAllTags))
continue;
$attrs[] = $k.'="'.$v.'"';
}
$attrs = implode(' ',$attrs);
$securisedHtml .= '<'.$tagName;
if(!empty($attrs)){
$securisedHtml .= ' '.$attrs;
}
if(strrpos($charContainer, '/')==(strlen($charContainer)-1)){
$securisedHtml .= '/';
}
$securisedHtml .= '>';
}
else{
if(strpos($charContainer,' ')!==false){
$x = explode(' ',$charContainer);
$charContainer = array_shift($x);
foreach($x as $k)
if($k=='/')
$charContainer .= '/';
else
$myAttributes[] = $k;
}
$tagName = $charContainer;
if(!isset($allowedAttributesByTag[$tagName]))
break;
$attrs = [];
$myAttributes = filterAttributes($tagName, $myAttributes);
foreach($myAttributes as $k=>$v){
$attrs[] = $k.'='.$v;
}
$attrs = implode(' ',$attrs);
$securisedHtml .= '<'.$tagName;
if(!empty($attrs)){
$securisedHtml .= ' '.$attrs;
}
if(strrpos($charContainer, '/')==(strlen($charContainer)-1)){
$securisedHtml .= '/';
}
$securisedHtml .= '>'; }
break;
}
$charContainer = '';
break;
default:
$charContainer .= $currentChar;
break;
}
break;
default:
$charContainer .= $currentChar;
break;
}
}
$securisedHtml .= $charContainer;
return $securisedHtml;
}
function parseAttributes($attrText){
$attrArray = [];
$total = strlen($attrText);
$keyDump = '';
$valueDump = '';
$currentState = 'ATTR_NONE';
$quoteType = '';
$keyDumpI = 0;
for($i=0;$i<$total;$i++){
$currentChar = $attrText{$i};
if($currentState=='ATTR_NONE'&&trim($currentChar))
$currentState = 'ATTR_KEY';
switch ($currentChar){
case '=':
if ($currentState == 'ATTR_VALUE')
$valueDump .= $currentChar;
else {
$currentState = 'ATTR_VALUE';
$quoteType = '';
}
break;
case '"':
if ($currentState == 'ATTR_VALUE') {
if ($quoteType=='')
$quoteType = '"';
elseif ($quoteType == $currentChar) {
$keyDump = trim($keyDump);
$tValueDump = trim($valueDump);
$attrArray[$keyDump] = $tValueDump||$tValueDump==='0'?$valueDump:'';
$keyDump = $valueDump = $quoteType = '';
$currentState = 'ATTR_NONE';
}
else
$valueDump .= $currentChar;
}
else{
$keyDump = $keyDumpI++;
$valueDump = '';
$currentState = 'ATTR_VALUE';
$quoteType = '"';
}
break;
case "'":
if ($currentState == 'ATTR_VALUE') {
if ($quoteType == '')
$quoteType = "'";
elseif ($quoteType == $currentChar){
$keyDump = trim($keyDump);
$tValueDump = trim($valueDump);
$attrArray[$keyDump] = $tValueDump||$tValueDump==='0'?$valueDump:'';
$keyDump = $valueDump = $quoteType = '';
$currentState = 'ATTR_NONE';
}
else
$valueDump .= $currentChar;
}
else{
$keyDump = $keyDumpI++;
$valueDump = '';
$currentState = 'ATTR_VALUE';
$quoteType = "'";
}
break;
case "\t":
case "\x0B":
case "\n":
case "\r":
case ' ':
if($currentState=='ATTR_KEY'){
$currentState = 'ATTR_NONE';
if($keyDump)
$attrArray[] = trim($keyDump);
$keyDump = $valueDump = $quoteType = '';
}
elseif($currentState=='ATTR_VALUE')
$valueDump .= $currentChar;
break;
default:
if ($currentState == 'ATTR_KEY')
$keyDump .= $currentChar;
else
$valueDump .= $currentChar;
break;
}
}
if(trim($keyDump))
$attrArray[] = trim($keyDump);
return $attrArray;
}
function filterAttributes($tag,$attrsInput){
//see http://heideri.ch/jso/
$attrs = [];
foreach($attrsInput as $k=>$v){
if(substr($k,0,2)=='on'){
continue;
}
switch($k){
case 'form':
case 'formaction':
case 'autofocus':
case 'dirname':
continue;
break;
case 'href':
case 'poster':
case 'xlink:href':
if(substr(trim($v),0,11)=='javascript:'){
continue 2;
}
break;
}
switch($tag.'['.$k.']'){
case 'link[rel]':
if(trim($v)=='import'){
continue 2;
}
break;
case 'iframe[srcdoc]':
continue 2;
break;
}
$attrs[$k] = $v;
}
return $attrs;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment