public
Last active — forked from timjb/sanitize.js

HTML Sanitizer for JavaScript

  • Download Gist
sanitize.js
JavaScript
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
// I'm developing this now as a part of substance (https://github.com/michael/substance)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
var sanitize = function (html, settings) {
settings = settings || {};
var sanitized = '';
function advance (n) {
if (typeof n === 'string') {
sanitized += n;
n = n.length;
}
html = html.slice(n);
}
var match;
while (html.length) {
if (match = html.match(/^[^<]+/)) { // cdata
sanitized += match[0].replace(/>/g, '&gt;')
advance(match[0].length);
continue;
}
var endPos = html.indexOf('>');
if (endPos === -1) {
// discard tag
advance(html.length);
continue;
}
var tag = html.slice(0, endPos+1);
if (match = tag.match(/^<\/([a-zA-Z]+)>$/)) { // end tag
var tagName = match[1];
advance(settings[tagName] ? match[0] : match[0].length);
continue;
}
if (match = tag.match(/^<([a-zA-Z]+)(?:\s+(.+))?\s*\/?>$/)) {
var tagName = match[1].toLowerCase()
, attrs = match[2];
if (settings[tagName]) {
var attributes = {};
while (attrs) { // read attributes
var key = attrs.match(/^[a-zA-Z]+/);
if (!key) break;
key = key[0];
attrs = attrs.slice(key.length);
if (attrs[0] === '=') {
attrs = attrs.slice(1);
if (/['"]/.exec(attrs[0])) {
var quote = attrs[0];
var closingPos = attrs.indexOf(quote, 1);
if (closingPos === -1) break;
attributes[key] = attrs.slice(1, closingPos);
attrs = attrs.slice(closingPos+1);
} else if (!attrs[0].exec(/\s/)) {
var value = attrs.match(/^[^\s]+/);
if (!value) break;
value = value[0];
attrs = attrs.slice(value.length);
attributes[key] = value;
} else {
break;
}
} else if (attrs[0].exec(/\s/)) {
attributes[key] = key;
} else {
break;
}
var ws = attrs.match(/^\s+/);
if (!ws) break;
attrs = attrs.slice(ws[0].length);
}
sanitized += '<'+tagName;
// validate and write attributes
for (var key in attributes) {
var validator;
if (attributes.hasOwnProperty(key) && (validator = settings[tagName][key])) {
var value = attributes[key].replace(/"/g, '&quot;');
if (typeof validator === 'function' && !validator(value)) continue;
sanitized += ' '+key+'="'+value+'"';
}
}
sanitized += '>';
}
}
advance(tag.length);
}
return sanitized;
}
 
if (typeof exports === 'object') {
exports.sanitize = sanitize;
}
test-sanitize.js
JavaScript
1 2 3 4 5 6 7 8 9 10 11 12
var sanitize = require('./sanitize').sanitize
, assert = require('assert');
 
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor'), 'lorem ipsum dolor');
assert.equal(sanitize('<a>lo>rem</a ipsum'), 'lo&gt;rem');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor', {a:{}}), '<a>lorem ipsum </a>dolor');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor', {a:{b:true}}), '<a b="c">lorem ipsum </a>dolor');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor', {a:{b:true}}), '<a b="c">lorem ipsum </a>dolor');
assert.equal(sanitize('<a b="c"><d>lorem</d> ipsum </a>dolor', {a:{b:function (v){return v==='c'}}}), '<a b="c">lorem ipsum </a>dolor');
assert.equal(sanitize('<a b="x"><d>lorem</d> ipsum </a>dolor', {a:{b:function (v){return v==='c'}}}), '<a>lorem ipsum </a>dolor');
 
console.log("All tests have completed successfully!");

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.