Skip to content

Instantly share code, notes, and snippets.

@LeoDT
Created January 5, 2012 02:41
Show Gist options
  • Save LeoDT/1563455 to your computer and use it in GitHub Desktop.
Save LeoDT/1563455 to your computer and use it in GitHub Desktop.
filter html
filter = function(_html) {
var qualifyURL = function(html) {
html = html.replace(/href=\"([^\"]*)\"|href=\'([^\']*)\'/ig, function(url) {
if(url == "" || url.search(/javascript:|vbscript:/) != -1){
return "";
}
try {
return 'href="' + qualify(url.split('href="')[1].split('"')[0]) + '"';
} catch (e) {
return 'href="' + qualify(url.split("href='")[1].split("'")[0]) + '"';
}
});
html = html.replace(/src=\"([^\"]*)\"|src=\'([^\']*)\'/ig, function(url) {
try {
return 'src="' + qualify(url.split('src="')[1].split('"')[0]) + '"';
} catch (e) {
return 'src="' + qualify(url.split("src='")[1].split("'")[0]) + '"';
}
});
function qualify(url){
var div = document.createElement('div');
div.innerHTML = "<a></a>";
div.firstChild.href = url; // Ensures that the href is properly escaped
div.innerHTML = div.innerHTML; // Run the current innerHTML back through the parser
return div.firstChild.href;
}
return html;
},
removeAttr = function(html) {
var attr = ["mkmarkid", "style", "id", "class", "height", "width", "align", "\s{1,}on.[^\"|^\s|^<|^>]*?", "action", "background", "bgsound"],
regex = "";
for (var i = 0; i < attr.length; i++) {
regex = new RegExp("(<\\w+[^>]+)(" + attr[i] + "=(('[^'>]+')|(\"[^\">]+\")|[^\\s>]+))", "ig");
html = html.replace(regex, function($0,$1,$2) {
return $1;
});
}
return html;
},
removeTag = function(html) {
var tagRegex = /<(style|script|link|iframe|frame|frameset|noscript|head|html|applet|base|basefont|bgsound|blink|ilayer|layer|meta|object|embed)[^<>]*>[\s\S]*?.*?<\/\1>/igm;
return html.replace(tagRegex, "");
},
removeWhite = function(html) {
var whiteRegex = /\s{2,}|(\r|\n)+/g,//multi white or single newline
whitegtRegex = /\s>/g; //" >"
return html.replace(whiteRegex, " ...").replace(whitegtRegex, ">");
};
_html = removeTag(_html);
_html = removeAttr(_html);
//_html = removeWhite(_html); //this will remove the white space in code and pre
_html = qualifyURL(_html);
return _html;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment