Skip to content

Instantly share code, notes, and snippets.

@xguru
Created September 26, 2012 13:16
Show Gist options
  • Save xguru/3787969 to your computer and use it in GitHub Desktop.
Save xguru/3787969 to your computer and use it in GitHub Desktop.
PHPJS
function strip_tags(input, allowed) {
// http://kevin.vanzonneveld.net
// + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
// + improved by: Luke Godfrey
// + input by: Pul
// + bugfixed by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
// + bugfixed by: Onno Marsman
// + input by: Alex
// + bugfixed by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
// + input by: Marc Palau
// + improved by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
// + input by: Brett Zamir (http://brett-zamir.me)
// + bugfixed by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
// + bugfixed by: Eric Nagel
// + input by: Bobby Drake
// + bugfixed by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
// + bugfixed by: Tomasz Wesolowski
// + input by: Evertjan Garretsen
// + revised by: Rafał Kukawski (http://blog.kukawski.pl/)
// * example 1: strip_tags('<p>Kevin</p> <br /><b>van</b> <i>Zonneveld</i>', '<i><b>');
// * returns 1: 'Kevin <b>van</b> <i>Zonneveld</i>'
// * example 2: strip_tags('<p>Kevin <img src="someimage.png" onmouseover="someFunction()">van <i>Zonneveld</i></p>', '<p>');
// * returns 2: '<p>Kevin van Zonneveld</p>'
// * example 3: strip_tags("<a href='http://kevin.vanzonneveld.net'>Kevin van Zonneveld</a>", "<a>");
// * returns 3: '<a href='http://kevin.vanzonneveld.net'>Kevin van Zonneveld</a>'
// * example 4: strip_tags('1 < 5 5 > 1');
// * returns 4: '1 < 5 5 > 1'
// * example 5: strip_tags('1 <br/> 1');
// * returns 5: '1 1'
// * example 6: strip_tags('1 <br/> 1', '<br>');
// * returns 6: '1 1'
// * example 7: strip_tags('1 <br/> 1', '<br><br/>');
// * returns 7: '1 <br/> 1'
allowed = (((allowed || "") + "").toLowerCase().match(/<[a-z][a-z0-9]*>/g) || []).join(''); // making sure the allowed arg is a string containing only tags in lowercase (<a><b><c>)
var tags = /<\/?([a-z][a-z0-9]*)\b[^>]*>/gi,
commentsAndPhpTags = /<!--[\s\S]*?-->|<\?(?:php)?[\s\S]*?\?>/gi;
return input.replace(commentsAndPhpTags, '').replace(tags, function($0, $1) {
return allowed.indexOf('<' + $1.toLowerCase() + '>') > -1 ? $0 : '';
});
}
function strpos(haystack, needle, offset) {
// http://kevin.vanzonneveld.net
// + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
// + improved by: Onno Marsman
// + bugfixed by: Daniel Esteban
// + improved by: Brett Zamir (http://brett-zamir.me)
// * example 1: strpos('Kevin van Zonneveld', 'e', 5);
// * returns 1: 14
var i = (haystack + '').indexOf(needle, (offset || 0));
return i === -1 ? false : i;
}
function strlen(string) {
// http://kevin.vanzonneveld.net
// + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
// + improved by: Sakimori
// + input by: Kirk Strobeck
// + improved by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
// + bugfixed by: Onno Marsman
// + revised by: Brett Zamir (http://brett-zamir.me)
// % note 1: May look like overkill, but in order to be truly faithful to handling all Unicode
// % note 1: characters and to this function in PHP which does not count the number of bytes
// % note 1: but counts the number of characters, something like this is really necessary.
// * example 1: strlen('Kevin van Zonneveld');
// * returns 1: 19
// * example 2: strlen('A\ud87e\udc04Z');
// * returns 2: 3
var str = string + '';
var i = 0,
chr = '',
lgth = 0;
if (!this.php_js || !this.php_js.ini || !this.php_js.ini['unicode.semantics'] || this.php_js.ini['unicode.semantics'].local_value.toLowerCase() !== 'on') {
return string.length;
}
var getWholeChar = function(str, i) {
var code = str.charCodeAt(i);
var next = '',
prev = '';
if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters)
if (str.length <= (i + 1)) {
throw 'High surrogate without following low surrogate';
}
next = str.charCodeAt(i + 1);
if (0xDC00 > next || next > 0xDFFF) {
throw 'High surrogate without following low surrogate';
}
return str.charAt(i) + str.charAt(i + 1);
} else if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate
if (i === 0) {
throw 'Low surrogate without preceding high surrogate';
}
prev = str.charCodeAt(i - 1);
if (0xD800 > prev || prev > 0xDBFF) { //(could change last hex to 0xDB7F to treat high private surrogates as single characters)
throw 'Low surrogate without preceding high surrogate';
}
return false; // We can pass over low surrogates now as the second component in a pair which we have already processed
}
return str.charAt(i);
};
for (i = 0, lgth = 0; i < str.length; i++) {
if ((chr = getWholeChar(str, i)) === false) {
continue;
} // Adapt this line at the top of any loop, passing in the whole string and the current iteration and returning a variable to represent the individual character; purpose is to treat the first part of a surrogate pair as the whole character and then ignore the second part
lgth++;
}
return lgth;
}
function substr(str, start, len) {
// Returns part of a string
//
// version: 909.322
// discuss at: http://phpjs.org/functions/substr
// + original by: Martijn Wieringa
// + bugfixed by: T.Wild
// + tweaked by: Onno Marsman
// + revised by: Theriault
// + improved by: Brett Zamir (http://brett-zamir.me)
// % note 1: Handles rare Unicode characters if 'unicode.semantics' ini (PHP6) is set to 'on'
// * example 1: substr('abcdef', 0, -1);
// * returns 1: 'abcde'
// * example 2: substr(2, 0, -6);
// * returns 2: false
// * example 3: ini_set('unicode.semantics', 'on');
// * example 3: substr('a\uD801\uDC00', 0, -1);
// * returns 3: 'a'
// * example 4: ini_set('unicode.semantics', 'on');
// * example 4: substr('a\uD801\uDC00', 0, 2);
// * returns 4: 'a\uD801\uDC00'
// * example 5: ini_set('unicode.semantics', 'on');
// * example 5: substr('a\uD801\uDC00', -1, 1);
// * returns 5: '\uD801\uDC00'
// * example 6: ini_set('unicode.semantics', 'on');
// * example 6: substr('a\uD801\uDC00z\uD801\uDC00', -3, 2);
// * returns 6: '\uD801\uDC00z'
// * example 7: ini_set('unicode.semantics', 'on');
// * example 7: substr('a\uD801\uDC00z\uD801\uDC00', -3, -1)
// * returns 7: '\uD801\uDC00z'
// Add: (?) Use unicode.runtime_encoding (e.g., with string wrapped in "binary" or "Binary" class) to
// allow access of binary (see file_get_contents()) by: charCodeAt(x) & 0xFF (see https://developer.mozilla.org/En/Using_XMLHttpRequest ) or require conversion first?
var i = 0,
allBMP = true,
es = 0,
el = 0,
se = 0,
ret = '';
str += '';
var end = str.length;
// BEGIN REDUNDANT
this.php_js = this.php_js || {};
this.php_js.ini = this.php_js.ini || {};
// END REDUNDANT
switch ((this.php_js.ini['unicode.semantics'] && this.php_js.ini['unicode.semantics'].local_value.toLowerCase())) {
case 'on':
// Full-blown Unicode including non-Basic-Multilingual-Plane characters
// strlen()
for (i = 0; i < str.length; i++) {
if (/[\uD800-\uDBFF]/.test(str.charAt(i)) && /[\uDC00-\uDFFF]/.test(str.charAt(i + 1))) {
allBMP = false;
break;
}
}
if (!allBMP) {
if (start < 0) {
for (i = end - 1, es = (start += end); i >= es; i--) {
if (/[\uDC00-\uDFFF]/.test(str.charAt(i)) && /[\uD800-\uDBFF]/.test(str.charAt(i - 1))) {
start--;
es--;
}
}
} else {
var surrogatePairs = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
while ((surrogatePairs.exec(str)) != null) {
var li = surrogatePairs.lastIndex;
if (li - 2 < start) {
start++;
} else {
break;
}
}
}
if (start >= end || start < 0) {
return false;
}
if (len < 0) {
for (i = end - 1, el = (end += len); i >= el; i--) {
if (/[\uDC00-\uDFFF]/.test(str.charAt(i)) && /[\uD800-\uDBFF]/.test(str.charAt(i - 1))) {
end--;
el--;
}
}
if (start > end) {
return false;
}
return str.slice(start, end);
} else {
se = start + len;
for (i = start; i < se; i++) {
ret += str.charAt(i);
if (/[\uD800-\uDBFF]/.test(str.charAt(i)) && /[\uDC00-\uDFFF]/.test(str.charAt(i + 1))) {
se++; // Go one further, since one of the "characters" is part of a surrogate pair
}
}
return ret;
}
break;
}
// Fall-through
case 'off':
// assumes there are no non-BMP characters;
// if there may be such characters, then it is best to turn it on (critical in true XHTML/XML)
default:
if (start < 0) {
start += end;
}
end = typeof len === 'undefined' ? end : (len < 0 ? len + end : len + start);
// PHP returns false if start does not fall within the string.
// PHP returns false if the calculated end comes before the calculated start.
// PHP returns an empty string if start and end are the same.
// Otherwise, PHP returns the portion of the string from start to end.
return start >= str.length || start < 0 || start > end ? !1 : str.slice(start, end);
}
return undefined; // Please Netbeans
}
function get_string_between(string, start, end){
var string = " " + string;
var ini = strpos(string,start);
if (ini == 0) return "";
ini += strlen(start);
var len = strpos(string,end,ini) - ini;
return substr(string,ini,len);
}
function returnSubstrings(text, openingMarker, closingMarker) {
var openingMarkerLength = strlen(openingMarker);
var closingMarkerLength = strlen(closingMarker);
var result = new Array();
var position = 0;
var rcount = 0;
while ((position = strpos(text, openingMarker, position)) !== false) {
position += openingMarkerLength;
if ((closingMarkerPosition = strpos(text, closingMarker, position)) !== false) {
result[rcount++] = substr(text, position, closingMarkerPosition - position);
position = closingMarkerPosition + closingMarkerLength;
}
}
return result;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment