Skip to content

Instantly share code, notes, and snippets.

@isaacs
Created February 27, 2011 18:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save isaacs/846399 to your computer and use it in GitHub Desktop.
Save isaacs/846399 to your computer and use it in GitHub Desktop.
From 6790b9a4fd282733eb30bd807c416e2c78bfef4b Mon Sep 17 00:00:00 2001
From: isaacs <i@izs.me>
Date: Sun, 27 Feb 2011 10:21:23 -0800
Subject: [PATCH] Closes GH-711 URL parse more safely
This does 3 things:
1. Delimiters and "unwise" characters are never included in the
hostname or path.
2. url.format will sanitize string URLs that are passed to it.
3. The parsed url's 'href' member will be the sanitized url, which may
not match the argument to url.parse.
---
lib/url.js | 74 +++++++++++++++++++++---
test/simple/test-url.js | 148 +++++++++++++++++++++++++++--------------------
2 files changed, 150 insertions(+), 72 deletions(-)
diff --git a/lib/url.js b/lib/url.js
index f272551..55c4bf2 100644
--- a/lib/url.js
+++ b/lib/url.js
@@ -7,9 +7,30 @@ exports.format = urlFormat;
// compiled once on the first module load.
var protocolPattern = /^([a-z0-9]+:)/,
portPattern = /:[0-9]+$/,
- nonHostChars = ['/', '?', ';', '#'],
+ delims = ['<', '>', '"', '\'', '`', /\s/],
+ unwise = ['{', '}', '|', '\\', '^', '~', '[', ']', '`'].concat(delims),
+ nonHostChars = ['/', '?', ';', '#'].concat(unwise),
+ hostnameMaxLen = 255,
+ hostnamePartPattern = /^[a-z0-9][a-z0-9A-Z-]{0,62}$/,
+ unsafeProtocol = {
+ 'javascript': true,
+ 'javascript:': true
+ },
hostlessProtocol = {
+ 'javascript': true,
+ 'javascript:': true,
+ 'file': true,
+ 'file:': true
+ },
+ pathedProtocol = {
+ 'http': true,
+ 'https': true,
+ 'ftp': true,
+ 'gopher': true,
'file': true,
+ 'http:': true,
+ 'ftp:': true,
+ 'gopher:': true,
'file:': true
},
slashedProtocol = {
@@ -29,7 +50,7 @@ var protocolPattern = /^([a-z0-9]+:)/,
function urlParse(url, parseQueryString, slashesDenoteHost) {
if (url && typeof(url) === 'object' && url.href) return url;
- var out = { href: url },
+ var out = {},
rest = url;
var proto = protocolPattern.exec(rest);
@@ -50,6 +71,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
out.slashes = true;
}
}
+
if (!hostlessProtocol[proto] &&
(slashes || (proto && !slashedProtocol[proto]))) {
// there's a hostname.
@@ -79,9 +101,36 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
// we've indicated that there is a hostname,
// so even if it's empty, it has to be present.
out.hostname = out.hostname || '';
+
+ // validate a little.
+ if (out.hostname.length > hostnameMaxLen) {
+ out.hostname = '';
+ } else {
+ var hostparts = out.hostname.split(/\./);
+ for (var i = 0, l = hostparts.length; i < l; i++) {
+ var part = hostparts[i];
+ if (!part.match(hostnamePartPattern)) {
+ out.hostname = '';
+ break;
+ }
+ }
+ }
}
// now rest is set to the post-host stuff.
+ // chop off any delim chars.
+ if (!unsafeProtocol[proto]) {
+ var chop = rest.length;
+ for (var i = 0, l = delims.length; i < l; i++) {
+ var c = rest.indexOf(delims[i]);
+ if (c !== -1) {
+ chop = Math.min(c, chop);
+ }
+ }
+ rest = rest.substr(0, chop);
+ }
+
+
// chop off from the tail first.
var hash = rest.indexOf('#');
if (hash !== -1) {
@@ -99,9 +148,17 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
rest = rest.slice(0, qm);
} else if (parseQueryString) {
// no query string, but parseQueryString still requested
+ out.search = '';
out.query = {};
}
if (rest) out.pathname = rest;
+ if (slashedProtocol[proto] &&
+ out.hostname && !out.pathname) {
+ out.pathname = '/';
+ }
+
+ // finally, reconstruct the href based on what has been validated.
+ out.href = urlFormat(out);
return out;
}
@@ -123,13 +180,12 @@ function urlFormat(obj) {
) :
false,
pathname = obj.pathname || '',
- search = obj.search || (
- obj.query && ('?' + (
- typeof(obj.query) === 'object' ?
- querystring.stringify(obj.query) :
- String(obj.query)
- ))
- ) || '',
+ query = obj.query &&
+ ((typeof obj.query === 'object' &&
+ Object.keys(obj.query).length) ?
+ querystring.stringify(obj.query) :
+ '') || '',
+ search = obj.search || (query && ('?' + query)) || '',
hash = obj.hash || '';
if (protocol && protocol.substr(-1) !== ':') protocol += ':';
diff --git a/test/simple/test-url.js b/test/simple/test-url.js
index 91509b2..f381958 100644
--- a/test/simple/test-url.js
+++ b/test/simple/test-url.js
@@ -28,7 +28,8 @@ var parseTests = {
'pathname': '/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s='
},
'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=' : {
- 'href': 'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=',
+ 'href': 'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api' +
+ '&x=2&y=2&z=3&s=',
'protocol': 'http:',
'host': 'mt0.google.com',
'hostname': 'mt0.google.com',
@@ -37,7 +38,8 @@ var parseTests = {
'pathname': '/vt/lyrs=m@114'
},
'http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=' : {
- 'href': 'http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=',
+ 'href': 'http://user:pass@mt0.google.com/vt/lyrs=m@114???' +
+ '&hl=en&src=api&x=2&y=2&z=3&s=',
'protocol': 'http:',
'host': 'user:pass@mt0.google.com',
'auth': 'user:pass',
@@ -84,49 +86,6 @@ var parseTests = {
'query': 'baz=quux',
'pathname': '/foo/bar'
},
- 'http://example.com?foo=bar#frag' : {
- 'href': 'http://example.com?foo=bar#frag',
- 'protocol': 'http:',
- 'host': 'example.com',
- 'hostname': 'example.com',
- 'hash': '#frag',
- 'search': '?foo=bar',
- 'query': 'foo=bar'
- },
- 'http://example.com?foo=@bar#frag' : {
- 'href': 'http://example.com?foo=@bar#frag',
- 'protocol': 'http:',
- 'host': 'example.com',
- 'hostname': 'example.com',
- 'hash': '#frag',
- 'search': '?foo=@bar',
- 'query': 'foo=@bar'
- },
- 'http://example.com?foo=/bar/#frag' : {
- 'href': 'http://example.com?foo=/bar/#frag',
- 'protocol': 'http:',
- 'host': 'example.com',
- 'hostname': 'example.com',
- 'hash': '#frag',
- 'search': '?foo=/bar/',
- 'query': 'foo=/bar/'
- },
- 'http://example.com?foo=?bar/#frag' : {
- 'href': 'http://example.com?foo=?bar/#frag',
- 'protocol': 'http:',
- 'host': 'example.com',
- 'hostname': 'example.com',
- 'hash': '#frag',
- 'search': '?foo=?bar/',
- 'query': 'foo=?bar/'
- },
- 'http://example.com#frag=?bar/#frag' : {
- 'href': 'http://example.com#frag=?bar/#frag',
- 'protocol': 'http:',
- 'host': 'example.com',
- 'hostname': 'example.com',
- 'hash': '#frag=?bar/#frag'
- },
'/foo/bar?baz=quux#frag' : {
'href': '/foo/bar?baz=quux#frag',
'hash': '#frag',
@@ -154,9 +113,7 @@ var parseTests = {
'javascript:alert(\'hello\');' : {
'href': 'javascript:alert(\'hello\');',
'protocol': 'javascript:',
- 'host': 'alert(\'hello\')',
- 'hostname': 'alert(\'hello\')',
- 'pathname' : ';'
+ 'pathname': 'alert(\'hello\');'
},
'xmpp:isaacschlueter@jabber.org' : {
'href': 'xmpp:isaacschlueter@jabber.org',
@@ -194,21 +151,13 @@ var parseTestsWithQueryString = {
'pathname': '/foo/bar'
},
'http://example.com' : {
- 'href': 'http://example.com',
- 'protocol': 'http:',
- 'slashes': true,
- 'host': 'example.com',
- 'hostname': 'example.com',
- 'query': {}
- },
- 'http://example.com?' : {
- 'href': 'http://example.com?',
+ 'href': 'http://example.com/',
'protocol': 'http:',
'slashes': true,
'host': 'example.com',
'hostname': 'example.com',
- 'search': '?',
- 'query': {}
+ 'query': {},
+ 'pathname': '/'
}
};
for (var u in parseTestsWithQueryString) {
@@ -225,7 +174,72 @@ for (var u in parseTestsWithQueryString) {
// some extra formatting tests, just to verify
// that it'll format slightly wonky content to a valid url.
var formatTests = {
+ 'http://example.com?' : {
+ 'href': 'http://example.com/?',
+ 'protocol': 'http:',
+ 'slashes': true,
+ 'host': 'example.com',
+ 'hostname': 'example.com',
+ 'search': '?',
+ 'query': {},
+ 'pathname': '/'
+ },
+ 'http://example.com?foo=bar#frag' : {
+ 'href': 'http://example.com/?foo=bar#frag',
+ 'protocol': 'http:',
+ 'host': 'example.com',
+ 'hostname': 'example.com',
+ 'hash': '#frag',
+ 'search': '?foo=bar',
+ 'query': 'foo=bar',
+ 'pathname': '/'
+ },
+ 'http://example.com?foo=@bar#frag' : {
+ 'href': 'http://example.com/?foo=@bar#frag',
+ 'protocol': 'http:',
+ 'host': 'example.com',
+ 'hostname': 'example.com',
+ 'hash': '#frag',
+ 'search': '?foo=@bar',
+ 'query': 'foo=@bar',
+ 'pathname': '/'
+ },
+ 'http://example.com?foo=/bar/#frag' : {
+ 'href': 'http://example.com/?foo=/bar/#frag',
+ 'protocol': 'http:',
+ 'host': 'example.com',
+ 'hostname': 'example.com',
+ 'hash': '#frag',
+ 'search': '?foo=/bar/',
+ 'query': 'foo=/bar/',
+ 'pathname': '/'
+ },
+ 'http://example.com?foo=?bar/#frag' : {
+ 'href': 'http://example.com/?foo=?bar/#frag',
+ 'protocol': 'http:',
+ 'host': 'example.com',
+ 'hostname': 'example.com',
+ 'hash': '#frag',
+ 'search': '?foo=?bar/',
+ 'query': 'foo=?bar/',
+ 'pathname': '/'
+ },
+ 'http://example.com#frag=?bar/#frag' : {
+ 'href': 'http://example.com/#frag=?bar/#frag',
+ 'protocol': 'http:',
+ 'host': 'example.com',
+ 'hostname': 'example.com',
+ 'hash': '#frag=?bar/#frag',
+ 'pathname': '/'
+ },
+ 'http://google.com" onload="alert(42)/' : {
+ 'href': 'http://google.com/',
+ 'protocol': 'http:',
+ 'host': 'google.com',
+ 'pathname': '/'
+ },
'http://a.com/a/b/c?s#h' : {
+ 'href': 'http://a.com/a/b/c?s#h',
'protocol': 'http',
'host': 'a.com',
'pathname': 'a/b/c',
@@ -233,7 +247,7 @@ var formatTests = {
'search': 's'
},
'xmpp:isaacschlueter@jabber.org' : {
- 'href': 'xmpp://isaacschlueter@jabber.org',
+ 'href': 'xmpp:isaacschlueter@jabber.org',
'protocol': 'xmpp:',
'host': 'isaacschlueter@jabber.org',
'auth': 'isaacschlueter',
@@ -241,9 +255,17 @@ var formatTests = {
}
};
for (var u in formatTests) {
- var actual = url.format(formatTests[u]);
- assert.equal(actual, u,
- 'wonky format(' + u + ') == ' + u + '\nactual:' + actual);
+ var expect = formatTests[u].href;
+ delete formatTests[u].href;
+ var actual = url.format(u);
+ var actualObj = url.format(formatTests[u]);
+ assert.equal(actual, expect,
+ 'wonky format(' + u + ') == ' + expect +
+ '\nactual:' + actual);
+ assert.equal(actualObj, expect,
+ 'wonky format(' + JSON.stringify(formatTests[u]) +
+ ') == ' + expect +
+ '\nactual: ' + actualObj);
}
/*
--
1.7.2.3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment