public
Created

Stop dangerous HTML using different criteria for editing and display. This is a very basic example of how easy it is to encoded text so you don't have to remove dangerous stuff!

  • Download Gist
xss.encode.php
PHP
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
<?php
// @see http://stackoverflow.com/questions/8419038/what-is-the-correct-way-to-detect-whether-string-inputs-contain-html-or-not
 
 
// Lets disable HTML/XML for our safety
//header('Content-Type: text/plain');
$text_mode = false;
 
# Get the hackers Cheat Sheet
# $ wget http://ha.ckers.org/xssAttacks.xml
# or $file = 'http://ha.ckers.org/xssAttacks.xml'; //if your install supports HTTP requests
$file = 'xssAttacks.xml';
 
/*
* Core Functions
*/
function h($input)
{
return htmlspecialchars($input, ENT_QUOTES, 'UTF-8');
}
 
function gtlt($input)
{
return (mb_strpos($input, '<') !== FALSE OR mb_strpos($input, '>') !== FALSE);
}
 
libxml_use_internal_errors(true);
$xml = simplexml_load_file($file);
$xml = json_decode(json_encode($xml));
$xml = $xml->attack;
 
foreach($xml as $key => $attack)
{
//print_r($attack);
 
if($text_mode)
{
print "## " . $attack->name . ' (' . strip_tags($attack->browser) . ")\n\n";
 
/*
* The four posible attack forms when displaying user input for editing
* or content.
*/
 
// The "value" attribute
// Why anyone would not enclose attributes with quotes!?
print '<input value='. $attack->code . ">\n\n"; // no quotes
print '<input value="'. $attack->code . "\">\n\n"; // Double-quotes
print "<input value='". $attack->code . "'>\n\n"; // Single-quotes
 
// Raw HTML
print '<div>' . $attack->code . "</div>\n\n\n";
 
/*
* There is also the posibility that some projects might get user input into
* another attribute of an HTML tag. I've yet to see this since there is no
* reason for it, but it could happen and isn't discussed here:
*
* <tag [attribute]="$attack->code">...</tag>
* <a href="$attack->code">...</a>
* <b onClick="$attack->code">...</b>
* <ul rel="$attack->code">...</ul>
* <div class="$attack->code">...</div>
* etc...
*/
continue;
}
 
print "<h4>" . $key . ': ' . $attack->name . ' (' . strip_tags($attack->browser) . ")</h4>\n\n";
/*
* Step 1: HTML
*
* HTML parsers should only run dangerous code when contained within a one
* (or both) of [<>]. Assuming there are no broken tags on the page before
* this input is displayed - we can safely show it.
*/
 
// It should be safe to use this if our caculations are correct
if(gtlt($attack->code))
{
print "<p></p>\n";
print '<p><b>Unsafe</b>: ' . h($attack->code) . "</p>\n";
}
else
{
print '<p>' . $attack->code . "</p>\n";
}
 
/*
* Step 2: Attributes
*
* Attributes are different because the HTML parser is already running at
* this point and is ready to parse anything it comes accross up to the
* closing ">". We must do more than just HTML encode stuff at this point.
*/
 
$v = $attack->code;
 
// The "value" attribute
print '<input value='. h($v) . ">\n\n"; // no quotes
print '<input value="'. h($v) . "\">\n\n"; // Double-quotes
print "<input value='". h($v) . "'>\n\n"; // Single-quotes
 
print "\n\n";
 
if($key > 200) break;
}
 
// This is a tricky one since "javascript:" passes HTML entities just fine
print "<h3>Invalid Link Protocal</h3>\n\n";
 
$v = "javascript:alert('hi')"; // See also "vbscript: ..." or "expression(..)" for IE!!!
print "<a href=\"$v\">Bad Link</a>\n\n";
print '<input value='. h($v) . ">\n\n"; // no quotes
print '<input value="'. h($v) . "\">\n\n"; // Double-quotes
print "<input value='". h($v) . "'>\n\n"; // Single-quotes
 
 
print "<h1>Finished</h1>\n\n";

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.