Last active
November 19, 2018 05:54
-
-
Save kcak11/4ea8aac637b15a7357ebe95f91824005 to your computer and use it in GitHub Desktop.
URL Sanitizer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!-- | |
© 2018 https://kcak11.com / https://ashishkumarkc.com | |
--> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
© 2018 https://kcak11.com / https://ashishkumarkc.com | |
*/ | |
(function(w) { | |
var Sanitizer = w.Sanitizer; | |
if (!Sanitizer) { | |
Sanitizer = w.Sanitizer = {}; | |
} else { | |
return; //Ensuring Singleton | |
} | |
Sanitizer.sanitizeUrl = function(url) { | |
if (!url) { | |
return url; | |
} | |
try { | |
url = decodeURIComponent(url); | |
} catch (URL_NOT_DECODABLE) { | |
/*DO_NOTHING*/ | |
} | |
var notAllowedCharsRegex = /[^a-zA-Z0-9/:\?&=\$\-_\.\+!\*'\(\),#]/g; | |
url = url.replace(notAllowedCharsRegex, function(match, idx, url) { | |
if (match === "%" && url[idx+1] && url[idx+2] && isCharDecodable(url[idx], url[idx + 1], url[idx + 2])) { | |
/* Avoid double encoding */ | |
return match; | |
} else { | |
return enc(match); | |
} | |
}); | |
/* URLs may contain Autofill Token patterns like %XXX_YYY% where XXX_YYY is a combination of 3 or more uppercase A-Z and _ characters */ | |
var autoFillTokenRegex = /%25([A-Z_]{3,})%25/g; | |
/* URLs may contain Double Curly Token patterns like {{mytoken}} */ | |
var curlyTokenRegex = /%7B%7B(.+?)%7D%7D/g; | |
url = url.replace(autoFillTokenRegex, "%$1%"); | |
url = url.replace(curlyTokenRegex, "{{$1}}"); | |
return url; | |
}; | |
Sanitizer.decodeSanitizedURL = function(url) { | |
if (!url) { | |
return url; | |
} | |
return extractUrlWithAutoFillTokens(url); | |
}; | |
/* Helper Functions*/ | |
function enc(c) { | |
return encodeURIComponent(c); | |
} | |
function isCharDecodable(c, c1, c2) { | |
try { | |
decodeURIComponent(c + c1 + c2); | |
return true; | |
} catch (CHAR_NOT_DECODABLE) { | |
return false; | |
} | |
} | |
function extractUrlWithAutoFillTokens(customUrl) { | |
try { | |
// When a url contains an AUTOFILL Token, then properly encode it before attempting decode operation | |
var autoFillTokenRegex = /%([A-Z_]{3,})%/g; | |
if (customUrl.match(autoFillTokenRegex)) { | |
customUrl = customUrl.replace(autoFillTokenRegex, "%25$1%25"); | |
} | |
return decodeURIComponent(customUrl); | |
} catch (URL_NOT_DECODABLE) { | |
// If decoding is not possible, then return the url without any decoding | |
return customUrl; | |
} | |
} | |
}(window)); | |
//TestCase: | |
var inputStr = "https://www.foobar.com?abc=xyz%20pqrs&#$&p2=foo%ba*.'r&p3=test(__)string&__proto__=[object Object]&token=This is %SAMPLE_TITLE% page&q={{mytoken}} and {{another_token}}#pagefragment%25"; | |
//inputStr contains not allowed characters and also a usecase for double encoding. | |
document.open("text/html", "replace"); | |
document.write("<b>INPUT:</b><br/> " + inputStr); | |
document.write("<br/><br/>"); | |
document.write("<b>ENCODED OUTPUT:</b><br/> " + Sanitizer.sanitizeUrl(inputStr)); | |
document.write("<br/><br/>"); | |
document.write("<b>DECODED OUTPUT:</b><br/> " + Sanitizer.decodeSanitizedURL(Sanitizer.sanitizeUrl(inputStr))); | |
document.close(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
© 2018 https://kcak11.com / https://ashishkumarkc.com | |
*/ | |
body { | |
font-family: Monospace; | |
font-size: 16px; | |
} | |
b { | |
color: #00f; | |
} |
A URL Sanitizer utility that will properly encode a given url and avoid double-encoding scenarios.
Also it would allow certain characters which are valid and ignore certain token patterns like %MY_TOKEN% or {{MY_TOKEN}}.
[ RFC-1738 http://www.faqs.org/rfcs/rfc1738.html ]
A Pen by K.C.Ashish Kumar on CodePen.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment