-
-
Save mikesamuel/1b76779ec5206e258829914e2b0dec27 to your computer and use it in GitHub Desktop.
Benchmarkable isJavascriptUrl predicate
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// By analysis of url.scheme., the first scheme character of a javascript URL has to be 'j' or 'J'. | |
const notJavascriptFirstCharRe = /^[a-ik-z0-9\+\-\.\/\?:#]/i; | |
// https://tools.ietf.org/html/std66#appendix-B | |
const schemeWithColonRe = /^(?:[^:/?#]*:)?/; | |
// To match conservatively (no false negatives), we ignore chars not in | |
// Std66's scheme production: | |
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) | |
// Which are also the only chars appended to the buffer by | |
// https://url.spec.whatwg.org/#scheme-state | |
// 1. If c is an ASCII alphanumeric, U+002B (+), U+002D (-), or U+002E (.), | |
// append c, lowercased, to buffer. | |
const runsOfNonSchemeCharsRe = /[^a-zA-Z0-9\+\-\.]+/g; | |
// See "lowercased" above for why we match insensitively. | |
// TODO: Without the 'u' flag, this shouldn't be confused by | |
// "javascr<Turkish-Variant-I>pt" | |
// http://unicode.org/faq/casemap_charprop.html#9 | |
// and should in-any-case overmatch but worth testing on | |
// a browser with a Turkish locale. | |
const javascriptCaseInsensitiveRe = /^javascript$/i; | |
function isJavascriptUrl(str) { | |
str = `${ str }`; | |
if (notJavascriptFirstCharRe.test(str)) { | |
return false; | |
} | |
let scheme = schemeWithColonRe.exec(str)[0]; | |
http://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Embedded_tab | |
scheme = scheme.replace(runsOfNonSchemeCharsRe, ''); | |
return javascriptCaseInsensitiveRe.test(scheme); | |
} | |
console.log(JSON.stringify( | |
[ | |
'http://example.com', 'javascript:alert(1)', 'javascript', | |
'javascript/', '/javascript:', '#javascript:', '?javascript:', | |
'Javascript:alert(1)', 'java\tscript:alert(1)', | |
'javascrıpt:alert(1)', 'javascrİpt:alert(1)', | |
] | |
.filter(isJavascriptUrl) | |
)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment