Skip to content

Instantly share code, notes, and snippets.

@dmitry
Forked from timjb/sanitize.js
Created October 28, 2011 21:05
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dmitry/1323556 to your computer and use it in GitHub Desktop.
Save dmitry/1323556 to your computer and use it in GitHub Desktop.
HTML Sanitizer for JavaScript
// I'm developing this now as a part of substance (https://github.com/michael/substance)
var sanitize = function (html, settings) {
settings = settings || {};
var sanitized = '';
function advance (n) {
if (typeof n === 'string') {
sanitized += n;
n = n.length;
}
html = html.slice(n);
}
var match;
while (html.length) {
if (match = html.match(/^[^<]+/)) { // cdata
sanitized += match[0].replace(/>/g, '&gt;')
advance(match[0].length);
continue;
}
var endPos = html.indexOf('>');
if (endPos === -1) {
// discard tag
advance(html.length);
continue;
}
var tag = html.slice(0, endPos+1);
if (match = tag.match(/^<\/([a-zA-Z]+)>$/)) { // end tag
var tagName = match[1];
advance(settings[tagName] ? match[0] : match[0].length);
continue;
}
if (match = tag.match(/^<([a-zA-Z]+)(?:\s+(.+))?\s*\/?>$/)) {
var tagName = match[1].toLowerCase()
, attrs = match[2];
if (settings[tagName]) {
var attributes = {};
while (attrs) { // read attributes
var key = attrs.match(/^[a-zA-Z]+/);
if (!key) break;
key = key[0];
attrs = attrs.slice(key.length);
if (attrs[0] === '=') {
attrs = attrs.slice(1);
if (/['"]/.exec(attrs[0])) {
var quote = attrs[0];
var closingPos = attrs.indexOf(quote, 1);
if (closingPos === -1) break;
attributes[key] = attrs.slice(1, closingPos);
attrs = attrs.slice(closingPos+1);
} else if (!attrs[0].exec(/\s/)) {
var value = attrs.match(/^[^\s]+/);
if (!value) break;
value = value[0];
attrs = attrs.slice(value.length);
attributes[key] = value;
} else {
break;
}
} else if (attrs[0].exec(/\s/)) {
attributes[key] = key;
} else {
break;
}
var ws = attrs.match(/^\s+/);
if (!ws) break;
attrs = attrs.slice(ws[0].length);
}
sanitized += '<'+tagName;
// validate and write attributes
for (var key in attributes) {
var validator;
if (attributes.hasOwnProperty(key) && (validator = settings[tagName][key])) {
var value = attributes[key].replace(/"/g, '&quot;');
if (typeof validator === 'function' && !validator(value)) continue;
sanitized += ' '+key+'="'+value+'"';
}
}
sanitized += '>';
}
}
advance(tag.length);
}
return sanitized;
}
if (typeof exports === 'object') {
exports.sanitize = sanitize;
}
var sanitize = require('./sanitize').sanitize
, assert = require('assert');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor'), 'lorem ipsum dolor');
assert.equal(sanitize('<a>lo>rem</a ipsum'), 'lo&gt;rem');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor', {a:{}}), '<a>lorem ipsum </a>dolor');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor', {a:{b:true}}), '<a b="c">lorem ipsum </a>dolor');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor', {a:{b:true}}), '<a b="c">lorem ipsum </a>dolor');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor', {a:{b:function (v){return v==='c'}}}), '<a b="c">lorem ipsum </a>dolor');
assert.equal(sanitize('<a b="x"><d>lorem</d> ipsum </a>dolor', {a:{b:function (v){return v==='c'}}}), '<a>lorem ipsum </a>dolor');
console.log("All tests have completed successfully!");
@TwistedSim
Copy link

TwistedSim commented Feb 1, 2022

If by any chance someone is copying this 10 years old code, please note that the anchor tag sanitization can by bypassed by providing a html coded char instead of a ":". This allow XSS with the javascript protocol.

Example:
<a href="javascript%26%2358alert(1)">Click Here</a>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment