Skip to content

Instantly share code, notes, and snippets.

@timjb
Created September 24, 2011 10:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save timjb/1239199 to your computer and use it in GitHub Desktop.
Save timjb/1239199 to your computer and use it in GitHub Desktop.
HTML Sanitizer for JavaScript
// I'm developing this now as a part of substance (https://github.com/michael/substance)
var sanitize = function (html, settings) {
settings = settings || {};
var sanitized = '';
function advance (n) {
if (typeof n === 'string') {
sanitized += n;
n = n.length;
}
html = html.slice(n);
}
var match;
while (html.length) {
if (match = html.match(/^[^<]+/)) { // cdata
sanitized += match[0].replace(/>/g, '&gt;')
advance(match[0].length);
continue;
}
var endPos = html.indexOf('>');
if (endPos === -1) {
// discard tag
advance(html.length);
continue;
}
var tag = html.slice(0, endPos+1);
if (match = tag.match(/^<\/([a-zA-Z]+)>$/)) { // end tag
var tagName = match[1];
advance(settings[tagName] ? match[0] : match[0].length);
continue;
}
if (match = tag.match(/^<([a-zA-Z]+)(?:\s+(.+))?\s*\/?>$/)) {
var tagName = match[1].toLowerCase()
, attrs = match[2];
if (settings[tagName]) {
var attributes = {};
while (attrs) { // read attributes
var key = attrs.match(/^[a-zA-Z]+/);
if (!key) break;
key = key[0];
attrs = attrs.slice(key.length);
if (attrs[0] === '=') {
attrs = attrs.slice(1);
if (/['"]/.exec(attrs[0])) {
var quote = attrs[0];
var closingPos = attrs.indexOf(quote, 1);
if (closingPos === -1) break;
attributes[key] = attrs.slice(1, closingPos);
attrs = attrs.slice(closingPos+1);
} else if (!attrs[0].exec(/\s/)) {
var value = attrs.match(/^[^\s]+/);
if (!value) break;
value = value[0];
attrs = attrs.slice(value.length);
attributes[key] = value;
} else {
break;
}
} else if (attrs[0].exec(/\s/)) {
attributes[key] = key;
} else {
break;
}
var ws = attrs.match(/^\s+/);
if (!ws) break;
attrs = attrs.slice(ws[0].length);
}
sanitized += '<'+tagName;
// validate and write attributes
for (var key in attributes) {
var validator;
if (attributes.hasOwnProperty(key) && (validator = settings[tagName][key])) {
var value = attributes[key].replace(/"/g, '&quot;');
if (typeof validator === 'function' && !validator(value)) continue;
sanitized += ' '+key+'="'+value+'"';
}
}
sanitized += '>';
}
}
advance(tag.length);
}
return sanitized;
}
if (typeof exports === 'object') {
exports.sanitize = sanitize;
}
var sanitize = require('./sanitize').sanitize
, assert = require('assert');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor'), 'lorem ipsum dolor');
assert.equal(sanitize('<a>lo>rem</a ipsum'), 'lo&gt;rem');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor', {a:{}}), '<a>lorem ipsum </a>dolor');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor', {a:{b:true}}), '<a b="c">lorem ipsum </a>dolor');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor', {a:{b:true}}), '<a b="c">lorem ipsum </a>dolor');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor', {a:{b:function (v){return v==='c'}}}), '<a b="c">lorem ipsum </a>dolor');
assert.equal(sanitize('<a b="x"><d>lorem</d> ipsum </a>dolor', {a:{b:function (v){return v==='c'}}}), '<a>lorem ipsum </a>dolor');
console.log("All tests have completed successfully!");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment