Skip to content

Instantly share code, notes, and snippets.

@kosso
Created December 12, 2017 14:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kosso/d0580c96379a331a8ff2e8dc6b22e42e to your computer and use it in GitHub Desktop.
Save kosso/d0580c96379a331a8ff2e8dc6b22e42e to your computer and use it in GitHub Desktop.
Extract text only from HTML
<html>
<head>
<script>
function strip_tags (str, allowed_tags) {
// via http://kevin.vanzonneveld.net
var key = '', allowed = false;
var matches = [];
var allowed_array = [];
var allowed_tag = '';
var i = 0;
var k = '';
var html = '';
var replacer = function (search, replace, str) {
return str.split(search).join(replace);
};
// Build allowes tags associative array
if (allowed_tags) {
allowed_array = allowed_tags.match(/([a-zA-Z0-9]+)/gi);
}
str += '';
// Match tags
matches = str.match(/(<\/?[\S][^>]*>)/gi);
// Go through all HTML tags
for (key in matches) {
if(key){
// Save HTML tag
html = matches[key].toString();
// Is tag not in allowed list? Remove from str!
allowed = false;
// Go through all allowed tags
for (k in allowed_array) {
if(k){
// Init
allowed_tag = allowed_array[k];
i = -1;
if (i != 0) { i = html.toLowerCase().indexOf('<'+allowed_tag+'>');}
if (i != 0) { i = html.toLowerCase().indexOf('<'+allowed_tag+' ');}
if (i != 0) { i = html.toLowerCase().indexOf('</'+allowed_tag) ;}
// Determine
if (i == 0) {
allowed = true;
break;
}
}
}
if (!allowed) {
str = replacer(html, "", str); // Custom replace. No regexing
}
}
}
return str;
}
function init(){
var _html = '<h1>The title</h1> <p>blah blah <em>BLAH</em></p>';
console.log('_html:', _html);
var _text = strip_tags(_html);
console.log('_text:', _text);
}
</script>
</head>
<body onload="init()">
test
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment