Created
September 26, 2012 13:16
-
-
Save xguru/3787969 to your computer and use it in GitHub Desktop.
PHPJS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function strip_tags(input, allowed) { | |
// http://kevin.vanzonneveld.net | |
// + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) | |
// + improved by: Luke Godfrey | |
// + input by: Pul | |
// + bugfixed by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) | |
// + bugfixed by: Onno Marsman | |
// + input by: Alex | |
// + bugfixed by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) | |
// + input by: Marc Palau | |
// + improved by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) | |
// + input by: Brett Zamir (http://brett-zamir.me) | |
// + bugfixed by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) | |
// + bugfixed by: Eric Nagel | |
// + input by: Bobby Drake | |
// + bugfixed by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) | |
// + bugfixed by: Tomasz Wesolowski | |
// + input by: Evertjan Garretsen | |
// + revised by: Rafał Kukawski (http://blog.kukawski.pl/) | |
// * example 1: strip_tags('<p>Kevin</p> <br /><b>van</b> <i>Zonneveld</i>', '<i><b>'); | |
// * returns 1: 'Kevin <b>van</b> <i>Zonneveld</i>' | |
// * example 2: strip_tags('<p>Kevin <img src="someimage.png" onmouseover="someFunction()">van <i>Zonneveld</i></p>', '<p>'); | |
// * returns 2: '<p>Kevin van Zonneveld</p>' | |
// * example 3: strip_tags("<a href='http://kevin.vanzonneveld.net'>Kevin van Zonneveld</a>", "<a>"); | |
// * returns 3: '<a href='http://kevin.vanzonneveld.net'>Kevin van Zonneveld</a>' | |
// * example 4: strip_tags('1 < 5 5 > 1'); | |
// * returns 4: '1 < 5 5 > 1' | |
// * example 5: strip_tags('1 <br/> 1'); | |
// * returns 5: '1 1' | |
// * example 6: strip_tags('1 <br/> 1', '<br>'); | |
// * returns 6: '1 1' | |
// * example 7: strip_tags('1 <br/> 1', '<br><br/>'); | |
// * returns 7: '1 <br/> 1' | |
allowed = (((allowed || "") + "").toLowerCase().match(/<[a-z][a-z0-9]*>/g) || []).join(''); // making sure the allowed arg is a string containing only tags in lowercase (<a><b><c>) | |
var tags = /<\/?([a-z][a-z0-9]*)\b[^>]*>/gi, | |
commentsAndPhpTags = /<!--[\s\S]*?-->|<\?(?:php)?[\s\S]*?\?>/gi; | |
return input.replace(commentsAndPhpTags, '').replace(tags, function($0, $1) { | |
return allowed.indexOf('<' + $1.toLowerCase() + '>') > -1 ? $0 : ''; | |
}); | |
} | |
function strpos(haystack, needle, offset) { | |
// http://kevin.vanzonneveld.net | |
// + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) | |
// + improved by: Onno Marsman | |
// + bugfixed by: Daniel Esteban | |
// + improved by: Brett Zamir (http://brett-zamir.me) | |
// * example 1: strpos('Kevin van Zonneveld', 'e', 5); | |
// * returns 1: 14 | |
var i = (haystack + '').indexOf(needle, (offset || 0)); | |
return i === -1 ? false : i; | |
} | |
function strlen(string) { | |
// http://kevin.vanzonneveld.net | |
// + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) | |
// + improved by: Sakimori | |
// + input by: Kirk Strobeck | |
// + improved by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) | |
// + bugfixed by: Onno Marsman | |
// + revised by: Brett Zamir (http://brett-zamir.me) | |
// % note 1: May look like overkill, but in order to be truly faithful to handling all Unicode | |
// % note 1: characters and to this function in PHP which does not count the number of bytes | |
// % note 1: but counts the number of characters, something like this is really necessary. | |
// * example 1: strlen('Kevin van Zonneveld'); | |
// * returns 1: 19 | |
// * example 2: strlen('A\ud87e\udc04Z'); | |
// * returns 2: 3 | |
var str = string + ''; | |
var i = 0, | |
chr = '', | |
lgth = 0; | |
if (!this.php_js || !this.php_js.ini || !this.php_js.ini['unicode.semantics'] || this.php_js.ini['unicode.semantics'].local_value.toLowerCase() !== 'on') { | |
return string.length; | |
} | |
var getWholeChar = function(str, i) { | |
var code = str.charCodeAt(i); | |
var next = '', | |
prev = ''; | |
if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters) | |
if (str.length <= (i + 1)) { | |
throw 'High surrogate without following low surrogate'; | |
} | |
next = str.charCodeAt(i + 1); | |
if (0xDC00 > next || next > 0xDFFF) { | |
throw 'High surrogate without following low surrogate'; | |
} | |
return str.charAt(i) + str.charAt(i + 1); | |
} else if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate | |
if (i === 0) { | |
throw 'Low surrogate without preceding high surrogate'; | |
} | |
prev = str.charCodeAt(i - 1); | |
if (0xD800 > prev || prev > 0xDBFF) { //(could change last hex to 0xDB7F to treat high private surrogates as single characters) | |
throw 'Low surrogate without preceding high surrogate'; | |
} | |
return false; // We can pass over low surrogates now as the second component in a pair which we have already processed | |
} | |
return str.charAt(i); | |
}; | |
for (i = 0, lgth = 0; i < str.length; i++) { | |
if ((chr = getWholeChar(str, i)) === false) { | |
continue; | |
} // Adapt this line at the top of any loop, passing in the whole string and the current iteration and returning a variable to represent the individual character; purpose is to treat the first part of a surrogate pair as the whole character and then ignore the second part | |
lgth++; | |
} | |
return lgth; | |
} | |
function substr(str, start, len) { | |
// Returns part of a string | |
// | |
// version: 909.322 | |
// discuss at: http://phpjs.org/functions/substr | |
// + original by: Martijn Wieringa | |
// + bugfixed by: T.Wild | |
// + tweaked by: Onno Marsman | |
// + revised by: Theriault | |
// + improved by: Brett Zamir (http://brett-zamir.me) | |
// % note 1: Handles rare Unicode characters if 'unicode.semantics' ini (PHP6) is set to 'on' | |
// * example 1: substr('abcdef', 0, -1); | |
// * returns 1: 'abcde' | |
// * example 2: substr(2, 0, -6); | |
// * returns 2: false | |
// * example 3: ini_set('unicode.semantics', 'on'); | |
// * example 3: substr('a\uD801\uDC00', 0, -1); | |
// * returns 3: 'a' | |
// * example 4: ini_set('unicode.semantics', 'on'); | |
// * example 4: substr('a\uD801\uDC00', 0, 2); | |
// * returns 4: 'a\uD801\uDC00' | |
// * example 5: ini_set('unicode.semantics', 'on'); | |
// * example 5: substr('a\uD801\uDC00', -1, 1); | |
// * returns 5: '\uD801\uDC00' | |
// * example 6: ini_set('unicode.semantics', 'on'); | |
// * example 6: substr('a\uD801\uDC00z\uD801\uDC00', -3, 2); | |
// * returns 6: '\uD801\uDC00z' | |
// * example 7: ini_set('unicode.semantics', 'on'); | |
// * example 7: substr('a\uD801\uDC00z\uD801\uDC00', -3, -1) | |
// * returns 7: '\uD801\uDC00z' | |
// Add: (?) Use unicode.runtime_encoding (e.g., with string wrapped in "binary" or "Binary" class) to | |
// allow access of binary (see file_get_contents()) by: charCodeAt(x) & 0xFF (see https://developer.mozilla.org/En/Using_XMLHttpRequest ) or require conversion first? | |
var i = 0, | |
allBMP = true, | |
es = 0, | |
el = 0, | |
se = 0, | |
ret = ''; | |
str += ''; | |
var end = str.length; | |
// BEGIN REDUNDANT | |
this.php_js = this.php_js || {}; | |
this.php_js.ini = this.php_js.ini || {}; | |
// END REDUNDANT | |
switch ((this.php_js.ini['unicode.semantics'] && this.php_js.ini['unicode.semantics'].local_value.toLowerCase())) { | |
case 'on': | |
// Full-blown Unicode including non-Basic-Multilingual-Plane characters | |
// strlen() | |
for (i = 0; i < str.length; i++) { | |
if (/[\uD800-\uDBFF]/.test(str.charAt(i)) && /[\uDC00-\uDFFF]/.test(str.charAt(i + 1))) { | |
allBMP = false; | |
break; | |
} | |
} | |
if (!allBMP) { | |
if (start < 0) { | |
for (i = end - 1, es = (start += end); i >= es; i--) { | |
if (/[\uDC00-\uDFFF]/.test(str.charAt(i)) && /[\uD800-\uDBFF]/.test(str.charAt(i - 1))) { | |
start--; | |
es--; | |
} | |
} | |
} else { | |
var surrogatePairs = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g; | |
while ((surrogatePairs.exec(str)) != null) { | |
var li = surrogatePairs.lastIndex; | |
if (li - 2 < start) { | |
start++; | |
} else { | |
break; | |
} | |
} | |
} | |
if (start >= end || start < 0) { | |
return false; | |
} | |
if (len < 0) { | |
for (i = end - 1, el = (end += len); i >= el; i--) { | |
if (/[\uDC00-\uDFFF]/.test(str.charAt(i)) && /[\uD800-\uDBFF]/.test(str.charAt(i - 1))) { | |
end--; | |
el--; | |
} | |
} | |
if (start > end) { | |
return false; | |
} | |
return str.slice(start, end); | |
} else { | |
se = start + len; | |
for (i = start; i < se; i++) { | |
ret += str.charAt(i); | |
if (/[\uD800-\uDBFF]/.test(str.charAt(i)) && /[\uDC00-\uDFFF]/.test(str.charAt(i + 1))) { | |
se++; // Go one further, since one of the "characters" is part of a surrogate pair | |
} | |
} | |
return ret; | |
} | |
break; | |
} | |
// Fall-through | |
case 'off': | |
// assumes there are no non-BMP characters; | |
// if there may be such characters, then it is best to turn it on (critical in true XHTML/XML) | |
default: | |
if (start < 0) { | |
start += end; | |
} | |
end = typeof len === 'undefined' ? end : (len < 0 ? len + end : len + start); | |
// PHP returns false if start does not fall within the string. | |
// PHP returns false if the calculated end comes before the calculated start. | |
// PHP returns an empty string if start and end are the same. | |
// Otherwise, PHP returns the portion of the string from start to end. | |
return start >= str.length || start < 0 || start > end ? !1 : str.slice(start, end); | |
} | |
return undefined; // Please Netbeans | |
} | |
function get_string_between(string, start, end){ | |
var string = " " + string; | |
var ini = strpos(string,start); | |
if (ini == 0) return ""; | |
ini += strlen(start); | |
var len = strpos(string,end,ini) - ini; | |
return substr(string,ini,len); | |
} | |
function returnSubstrings(text, openingMarker, closingMarker) { | |
var openingMarkerLength = strlen(openingMarker); | |
var closingMarkerLength = strlen(closingMarker); | |
var result = new Array(); | |
var position = 0; | |
var rcount = 0; | |
while ((position = strpos(text, openingMarker, position)) !== false) { | |
position += openingMarkerLength; | |
if ((closingMarkerPosition = strpos(text, closingMarker, position)) !== false) { | |
result[rcount++] = substr(text, position, closingMarkerPosition - position); | |
position = closingMarkerPosition + closingMarkerLength; | |
} | |
} | |
return result; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment