Created
February 12, 2014 02:32
-
-
Save chemdemo/8948956 to your computer and use it in GitHub Desktop.
html tag filter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var parseHtmlTarg = (function(){ | |
//解析页面html字符串 | |
var _RegExp = /<(\/)*([a-zA-Z]+)([^>\w]+[^>]*)*>/ig; | |
var _attrRegExp = /[^>\w]+([a-zA-Z]+)=[\"\']?([^\"\']*)[\"\']?/ig; | |
var cssRegExp = /(expression)+/ig | |
var URL_EXP = new RegExp("((news|telnet|nttp|file|http|ftp|https)://)(([-A-Za-z0-9]+(\\.[-A-Za-z0-9]+)*(\\.[-A-Za-z]{2,5}))|([0-9]{1,3}(\\.[0-9]{1,3}){3}))(:[0-9]*)?(/[-A-Za-z0-9_\\$\\.\\+\\!\\*\\(\\),;:@&=\\?/~\\#\\%]*)*", "g"); | |
//var IMG_EXP = /([^'"]+)[^>]*/ig | |
var filtterTags = { | |
'script' : true, | |
'iframe' : true, | |
'link' : true | |
}; | |
var filtterTagsAttr = { | |
'img' : { | |
'src':{ | |
regexp: URL_EXP | |
}, | |
'alt' :{} | |
}, | |
'a' : { | |
'href': { | |
regexp: URL_EXP | |
}, | |
'alt': {}, | |
'target': {} | |
} | |
}; | |
var filtterAttr = { | |
'width' : true, | |
'height' : true, | |
'style' : { | |
regexp : cssRegExp | |
} | |
}; | |
return function(html) { | |
return html.replace(_RegExp,function() { | |
var _targName = arguments[2] || ''; | |
_targName = _targName.toLowerCase(); | |
if(!_targName || filtterTags[_targName]) return '';//没有标签名,去掉script link iframe | |
if(arguments[1]){//结束标记 | |
return '</'+ arguments[2] +'>'; | |
} else { | |
var _classStr = arguments[3] || '';//获取属性值 | |
if(_classStr) { | |
//依次判断属性类型 | |
_classStr = _classStr.replace(_attrRegExp,function() { | |
//console.log(arguments); | |
var attrName = arguments[1].toLowerCase(); | |
if(filtterAttr[attrName]) {//保留属性 | |
var ffa = filtterAttr[attrName]['regexp']; | |
if(ffa && arguments[2].match(ffa)) { | |
return ''; | |
} | |
} else { | |
var ff = filtterTagsAttr[_targName]; | |
if(ff && (ff = ff[attrName])) { | |
if(ff['regexp'] && !arguments[2].match(ff['regexp'])) { | |
return ''; | |
} | |
} else { | |
return ''; | |
} | |
} | |
return arguments[0] || ''; | |
}); | |
} | |
return '<'+ arguments[2] + _classStr +'>'; | |
} | |
}); | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment