Skip to content

Instantly share code, notes, and snippets.

@rezen
Created September 11, 2017 21:37
Show Gist options
  • Save rezen/2c11a1ac76f5922476b5d2ebf0ce6283 to your computer and use it in GitHub Desktop.
Save rezen/2c11a1ac76f5922476b5d2ebf0ce6283 to your computer and use it in GitHub Desktop.
'use strict';
/**
* https://github.com/SalesforceEng/secure-filters
*
* @description
* Frequently strings are dirty and need to
* have some HTML removed. Often times you may
* want to control the filtering a bit more, for
* example removing all elements except anchors.
* Maybe you want to remove all tags except
* anchors that have [data-keep].
*
* This module supports removing HTML with
* options for finer-grained control
*
*/
function TrimHtml() {
/**
* Regexs to find element tags
* @type {Object}
*/
var regex = {
// @<[\/\!]*?[^<>]*?>@si -alternative regex
// tags : /<\/?([a-z]+)([a-z0-9\=\"\'\.\?\:\s\/]+)?\>/g,
tags : /<\/?([a-z]+)([a-z0-9\=\"\'\.\?\:\s\/]+)?\>/g,
tagAndContent : function(element) {
// Original /\<([a-z]+)([a-z0-9\=\"\'\.\?\:\s\/]+)?\>(.*)<\/[a-z]+\>/g
return new RegExp('\\<' + element + '([a-z0-9\\=\"\'\\.\\?\\:\\s\/]+)?\\>(.*)<\/'+element+'\\>', 'g');
}
};
/**
* Hash map of functions for custom replace rules for an element
* @type {Object}
*/
var replaceRules = {};
/**
* Tags that we also want to remove the inner content for
* @type {Array}
*/
var removeInner = [
'style',
'script'
];
var HTML_CONTROL = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]/g;
this.escape = function(string) {
return string.replace(/\&/g, '&amp;')
.replace(/\"/g, '&quot;')
.replace(/</g, '&lt;')
.replace(/\>/g, '&gt;');
}
/**
* Strip the html tags from a given string
* @param {String} string
* @return {String}
*/
this.trim = function(string) {
string = string.replace(HTML_CONTROL, ' ');
/**
* Handles the initial string replace
* which removes elements but maintains
* their content
* @param {String} string
* @param {String} el
* @return {String}
*/
function replaceTags(string, el) {
/**
* If the tag does not need to be
* completely removed with it's
* content .. handle here
*/
if (removeInner.indexOf(el) !== -1) {
return string;
}
/**
* If there a custom rule for the replacing
* the element ..
*/
if (replaceRules[el] !== undefined) {
return replaceRules[el](string);
}
return '';
}
/**
* Replace matches for element and contents
* @param {String} string [description]
* @param {String} el [description]
* @return {String} [description]
*/
function replaceTagsContent(string, el) {
return '';
}
// Remove tags but not content
var trimmed = string.replace(regex.tags, replaceTags);
// Remove tags AND content
for (var idx in removeInner) {
var _regex;
_regex = regex.tagAndContent(removeInner[idx]);
trimmed = trimmed.replace(_regex, replaceTagsContent);
}
return trimmed;
};
/**
* Add an element to the list of elements
* that we remove the element and content
* @param {String} elementName
*/
this.addRemoveInner = function(elementName) {
removeInner.push(elementName);
};
/**
* Add a custom replace rule
* @param {String} elementName
* @param {Function} func
*/
this.addReplaceRule = function(elementName, func) {
replaceRules[ elementName ] = func;
};
return this;
}
module.exports = TrimHtml;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment