Created
September 10, 2010 03:49
-
-
Save kergoth/573056 to your computer and use it in GitHub Desktop.
Experimentations with a Url class
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from uri import parse_url, Url | |
class MalformedUrl(Exception): | |
pass | |
def new_decodeurl(url): | |
uri = parse_url(url) | |
return uri.scheme, uri.hostname or '', uri.path, uri.username or '', uri.password or '', uri.params | |
def decodeurl(url): | |
"""Decodes an URL into the tokens (scheme, network location, path, | |
user, password, parameters). | |
""" | |
m = re.compile('(?P<type>[^:]*)://((?P<user>.+)@)?(?P<location>[^;]+)(;(?P<parm>.*))?').match(url) | |
if not m: | |
raise MalformedUrl(url) | |
type = m.group('type') | |
location = m.group('location') | |
if not location: | |
raise MalformedUrl(url) | |
user = m.group('user') | |
parm = m.group('parm') | |
locidx = location.find('/') | |
if locidx != -1 and type.lower() != 'file': | |
host = location[:locidx] | |
path = location[locidx:] | |
else: | |
host = "" | |
path = location | |
if user: | |
m = re.compile('(?P<user>[^:]+)(:?(?P<pswd>.*))').match(user) | |
if m: | |
user = m.group('user') | |
pswd = m.group('pswd') | |
else: | |
user = '' | |
pswd = '' | |
p = {} | |
if parm: | |
for s in parm.split(';'): | |
s1, s2 = s.split('=') | |
p[s1] = s2 | |
return (type, host, path, user, pswd, p) | |
def new_encodeurl(decoded): | |
scheme, hostname, path, username, password, params = decoded | |
netloc = hostname | |
if username: | |
auth = username | |
if password: | |
auth += ":" + password | |
netloc = auth + "@" + netloc | |
url = Url(scheme, netloc, path, params, None, None) | |
return str(url) | |
def encodeurl(decoded): | |
"""Encodes a URL from tokens (scheme, network location, path, | |
user, password, parameters). | |
""" | |
(type, host, path, user, pswd, p) = decoded | |
if not type or not path: | |
raise MissingParameterError("Type or path url components missing when encoding %s" % decoded) | |
url = '%s://' % type | |
if user: | |
url += "%s" % user | |
if pswd: | |
url += ":%s" % pswd | |
url += "@" | |
if host: | |
url += "%s" % host | |
url += "%s" % path | |
if p: | |
for parm in p: | |
url += ";%s=%s" % (parm, p[parm]) | |
return url |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
try: | |
import unittest2 as unittest | |
except ImportError: | |
import unittest | |
from uri import parse_url, Url | |
default = Url("", "", "", {}, {}, "") | |
def new_url(**kwargs): | |
return default._replace(**kwargs) | |
class TestURIs(unittest.TestCase): | |
uris = { | |
"file://defconfig": new_url(scheme="file", path="defconfig"), | |
"file://foo/defconfig": new_url(scheme="file", path="foo/defconfig"), | |
"file:///defconfig": new_url(scheme="file", path="/defconfig"), | |
"file:///foo/defconfig": new_url(scheme="file", path="/foo/defconfig"), | |
"file://foo/defconfig;patch=1;alpha=beta": new_url(scheme="file", path="foo/defconfig", | |
params={"patch": "1", "alpha": "beta"}), | |
"http://foo.com/bar/defconfig;patch=1;alpha=beta": new_url(scheme="http", path="/bar/defconfig", | |
netloc="foo.com", | |
params={"patch": "1", "alpha": "beta"}), | |
"git://github.com/kergoth/homefiles.git": new_url(scheme="git", netloc="github.com", | |
path="/kergoth/homefiles.git"), | |
"svn://clarson@kergoth.com/;module=homefiles;protocol=http": new_url(scheme="svn", netloc="clarson@kergoth.com", | |
path="/", params={"module": "homefiles", | |
"protocol": "http"}), | |
"svn://svn.enlightenment.org/svn/e/trunk;module=E-MODULES-EXTRA/elfe;scmdata=keep;proto=http": | |
new_url(scheme="svn", netloc="svn.enlightenment.org", path="/svn/e/trunk", | |
params={"module": "E-MODULES-EXTRA/elfe", "scmdata": "keep", "proto": "http"}), | |
} | |
def test_decode_performance(self): | |
from timeit import repeat | |
from oe_uri import decodeurl, new_decodeurl | |
for url, compareto in self.uris.iteritems(): | |
self.assertEqual(decodeurl(url), new_decodeurl(url)) | |
old = repeat(lambda: decodeurl(url), number=1000, repeat=1) | |
new = repeat(lambda: new_decodeurl(url), number=1000, repeat=1) | |
new2 = repeat(lambda: parse_url(url), number=1000, repeat=1) | |
print("decode for %s: old=%s, new=%s, new without wrapper=%s" % (url, old, new, new2)) | |
def test_encode_performance(self): | |
from timeit import repeat | |
from oe_uri import decodeurl, encodeurl, new_encodeurl | |
for url, compareto in self.uris.iteritems(): | |
parsed = parse_url(url) | |
decoded = decodeurl(url) | |
self.assertEqual(encodeurl(decoded), new_encodeurl(decoded)) | |
old = repeat(lambda: encodeurl(decoded), number=1000, repeat=1) | |
new = repeat(lambda: new_encodeurl(decoded), number=1000, repeat=1) | |
new2 = repeat(lambda: str(url), number=1000, repeat=1) | |
print("encode for %s: old=%s, new=%s, new without wrapper=%s" % (url, old, new, new2)) | |
def test_uris(self): | |
for url, compareto in self.uris.iteritems(): | |
parsed = parse_url(url) | |
self.assertEqual(parsed, compareto) | |
def test_file_uri_rejoin(self): | |
url = parse_url("file://defconfig") | |
self.assertEqual(str(url), "file://defconfig") | |
def test_file_uri_rejoin_abs(self): | |
url = parse_url("file:///foo/defconfig") | |
self.assertEqual(str(url), "file:///foo/defconfig") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Enhance urlparse for OpenEmbedded's needs | |
- Handles OpenEmbedded's odd file urls. | |
OE uses file://foo/bar.patch as relative, file:///foo/bar.patch as absolute, | |
but '//' following the scheme implies the existance of an authority, aka | |
a hostname, and urlparse handles it in that way. | |
- Allows url params for all schemes. | |
- Pre-parses the params and query string for convenience. | |
Portions of the Url class were copied directly from the urlparse source tree. | |
The encodeurl and decodeurl functions are still provided, for compatibility reasons. | |
""" | |
import urlparse | |
import warnings | |
from collections import namedtuple | |
def parse_url(url): | |
url = url.replace('file://', 'file:') | |
if ';' in url: | |
url, params = url.split(';', 1) | |
params = parse_params(params) | |
else: | |
params = {} | |
scheme, netloc, path, _, query, fragment = urlparse.urlparse(url) | |
query = urlparse.parse_qs(query) | |
return Url(scheme, netloc, path, params, query, fragment) | |
def parse_params(params): | |
values = {} | |
if params: | |
for param in params.split(';'): | |
try: | |
key, value = param.split('=', 1) | |
except ValueError: | |
key, value = param, True | |
values[key] = value | |
return values | |
#noinspection PyUnresolvedReferences | |
class Url(urlparse.ParseResult): | |
"""Representation of a Uniform Resource Identifier""" | |
__slots__ = () | |
@property | |
def querystring(self): | |
"""Reassembled query string""" | |
query = ';'.join('%s=%s' % (key, v) | |
for key, value in self.query.iteritems() | |
for v in value) | |
return query | |
@property | |
def parameterstring(self): | |
"""Reassembled parameter string""" | |
parameters = ';'.join('%s=%s' % (key, value) | |
for key, value in self.params.iteritems()) | |
return parameters | |
def join(self, otherurl): | |
"""Join this url to a possibly relative URL to form an absolute | |
interpretation of the latter.""" | |
return parse_url(urlparse.urljoin(str(self), str(otherurl))) | |
def unsplit(self): | |
"""String version of URL without parameters""" | |
url = self.path | |
if self.netloc or (self.scheme and self.scheme in urlparse.uses_netloc and | |
url[:2] != '//'): | |
url = '//' + (self.netloc or '') + url | |
if self.scheme: | |
url = self.scheme + ':' + url | |
if self.query: | |
url += '?' + self.querystring | |
if self.fragment: | |
url += '#' + self.fragment | |
return url | |
def geturl(self): | |
url = self.unsplit() | |
if self.params: | |
url += ';' + self.parameterstring | |
return url | |
def __str__(self): | |
return self.geturl() | |
# vim: set et fenc=utf-8 sts=4 sw=4 : |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment