Skip to content

Instantly share code, notes, and snippets.

@ElliotChong
Last active December 29, 2015 14:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ElliotChong/7686596 to your computer and use it in GitHub Desktop.
Save ElliotChong/7686596 to your computer and use it in GitHub Desktop.
HTML sanitization script that ignores brackets within <script> tags. Tested in V8- Will need the forEach methods swapped out to work in legacy JS runtimes.
✓	 Bar

✓	 A &lt;p&gt;Baz!&lt;/p&gt; B

✓	 &lt;p&gt;Foo&lt;/p&gt; &lt;script&gt; a < b; &lt;/script&gt; &lt;span&gt;Blam!&lt;/span&gt;

✓	 &lt;p&gt;Foo&lt;/p&gt; &lt;script type='text/javascript'&gt; a < b || c > d; &lt;/script&gt; &lt;span&gt;Blam!&lt;/span&gt;

✓	 D &lt;p&gt;Foo&lt;/p&gt; &lt;script type='text/javascript'&gt;$('body').append('<' + 'script> a < b; <' + '/script>');&lt;/script&gt; &lt;span&gt;Blam!&lt;/span&gt; F &lt;b&gt;O&lt;/b&gt;

✓	 D &lt;p&gt;Foo&lt;/p&gt; &lt;script&gt;<![CDATA[ $('body').append('<script> a < b; </script>'); ]]>&lt;/script&gt; &lt;span&gt;Blam!&lt;/span&gt; F &lt;b&gt;O&lt;/b&gt;

✓	 D &lt;p&gt;Foo&lt;/p&gt; &lt;script type='text/javascript' &gt;<![CDATA[ $('body').append('<script> a < b; </script>'); ]]>&lt;/script&gt; &lt;span&gt;Blam!&lt;/span&gt; F &lt;b&gt;O&lt;/b&gt;

✓	 &lt;script type='text/javascript' &gt; a < b || c > d; &lt;/script&gt;

✓	 &lt;script &gt; a < b || c > d; &lt;/script&gt;
var tests = [
"Bar",
"A <p>Baz!</p> B",
"<p>Foo</p> <script> a < b; </script> <span>Blam!</span>",
"<p>Foo</p> <script type='text/javascript'> a < b || c > d; </script> <span>Blam!</span>",
"D <p>Foo</p> <script type='text/javascript'>$('body').append('<' + 'script> a < b; <' + '/script>');</script> <span>Blam!</span> F <b>O</b>",
"D <p>Foo</p> <script><![CDATA[ $('body').append('<script> a < b; </script>'); ]]></script> <span>Blam!</span> F <b>O</b>",
"D <p>Foo</p> <script type='text/javascript' ><![CDATA[ $('body').append('<script> a < b; </script>'); ]]></script> <span>Blam!</span> F <b>O</b>",
"<script type='text/javascript' > a < b || c > d; </script>",
"<script > a < b || c > d; </script>"
];
var expected = [
"Bar",
"A &lt;p&gt;Baz!&lt;/p&gt; B",
"&lt;p&gt;Foo&lt;/p&gt; &lt;script&gt; a < b; &lt;/script&gt; &lt;span&gt;Blam!&lt;/span&gt;",
"&lt;p&gt;Foo&lt;/p&gt; &lt;script type='text/javascript'&gt; a < b || c > d; &lt;/script&gt; &lt;span&gt;Blam!&lt;/span&gt;",
"D &lt;p&gt;Foo&lt;/p&gt; &lt;script type='text/javascript'&gt;$('body').append('<' + 'script> a < b; <' + '/script>');&lt;/script&gt; &lt;span&gt;Blam!&lt;/span&gt; F &lt;b&gt;O&lt;/b&gt;",
"D &lt;p&gt;Foo&lt;/p&gt; &lt;script&gt;<![CDATA[ $('body').append('<script> a < b; </script>'); ]]>&lt;/script&gt; &lt;span&gt;Blam!&lt;/span&gt; F &lt;b&gt;O&lt;/b&gt;",
"D &lt;p&gt;Foo&lt;/p&gt; &lt;script type='text/javascript' &gt;<![CDATA[ $('body').append('<script> a < b; </script>'); ]]>&lt;/script&gt; &lt;span&gt;Blam!&lt;/span&gt; F &lt;b&gt;O&lt;/b&gt;",
"&lt;script type='text/javascript' &gt; a < b || c > d; &lt;/script&gt;",
"&lt;script &gt; a < b || c > d; &lt;/script&gt;"
];
// Caching the RegExps to avoid instantiating on each sanitize() call
var sanitizeRegExps = {
containsScriptWithBracket: /(<\s*script.*>.*(<|>)+.*<\s*\/\s*script\s*>)/,
isScript: /(<\s*script.*>)|(<\s*\/\s*script\s*>)/,
splitOnScript: /(<\s*script.*>.*<\s*\/\s*script\s*>)/,
splitOnFirstScript: /^(<\s*script)/,
splitOnLastScript: /(<\s*\/\s*script\s*>$)/,
lt: /</g,
gt: />/g
};
function sanitize(p_string)
{
function replaceBrackets(p_string)
{
return p_string.replace(sanitizeRegExps.lt, '&lt;').replace(sanitizeRegExps.gt, '&gt;');
}
if (sanitizeRegExps.containsScriptWithBracket.test(p_string))
{
var modified = "";
p_string.split(sanitizeRegExps.splitOnScript).forEach(
function (p_split)
{
if (!sanitizeRegExps.isScript.test(p_split))
{
modified += replaceBrackets(p_split);
}
else
{
var splits = p_split.split(sanitizeRegExps.splitOnFirstScript);
// This shouldn't ever happen, but just incase there is some browser inconsistency or something...
if (splits.length !== 3)
{
console.warn("Unexpected <script> string passed, defaulting to sanitizing it.\n", splits);
return modified += replaceBrackets(p_split);
}
modified += replaceBrackets(splits[1]);
scriptEndIndex = splits[2].indexOf(">") + 1;
modified += replaceBrackets(splits[2].slice(0, scriptEndIndex));
splits = splits[2].slice(scriptEndIndex).split(sanitizeRegExps.splitOnLastScript);
modified += splits[0];
modified += replaceBrackets(splits[1]);
}
}
);
return modified;
}
else
{
return replaceBrackets(p_string);
}
}
tests.forEach(
function (p_value, p_index)
{
var sanitized = sanitize(p_value);
if (sanitized != expected[p_index])
{
console.log("✘\t", sanitized, "\n");
}
else
{
console.log("✓\t", sanitized, "\n");
}
}
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment