Skip to content

Instantly share code, notes, and snippets.

@kergoth
Created September 10, 2010 03:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kergoth/573056 to your computer and use it in GitHub Desktop.
Save kergoth/573056 to your computer and use it in GitHub Desktop.
Experimentations with a Url class
import re
from uri import parse_url, Url
class MalformedUrl(Exception):
pass
def new_decodeurl(url):
uri = parse_url(url)
return uri.scheme, uri.hostname or '', uri.path, uri.username or '', uri.password or '', uri.params
def decodeurl(url):
"""Decodes an URL into the tokens (scheme, network location, path,
user, password, parameters).
"""
m = re.compile('(?P<type>[^:]*)://((?P<user>.+)@)?(?P<location>[^;]+)(;(?P<parm>.*))?').match(url)
if not m:
raise MalformedUrl(url)
type = m.group('type')
location = m.group('location')
if not location:
raise MalformedUrl(url)
user = m.group('user')
parm = m.group('parm')
locidx = location.find('/')
if locidx != -1 and type.lower() != 'file':
host = location[:locidx]
path = location[locidx:]
else:
host = ""
path = location
if user:
m = re.compile('(?P<user>[^:]+)(:?(?P<pswd>.*))').match(user)
if m:
user = m.group('user')
pswd = m.group('pswd')
else:
user = ''
pswd = ''
p = {}
if parm:
for s in parm.split(';'):
s1, s2 = s.split('=')
p[s1] = s2
return (type, host, path, user, pswd, p)
def new_encodeurl(decoded):
scheme, hostname, path, username, password, params = decoded
netloc = hostname
if username:
auth = username
if password:
auth += ":" + password
netloc = auth + "@" + netloc
url = Url(scheme, netloc, path, params, None, None)
return str(url)
def encodeurl(decoded):
"""Encodes a URL from tokens (scheme, network location, path,
user, password, parameters).
"""
(type, host, path, user, pswd, p) = decoded
if not type or not path:
raise MissingParameterError("Type or path url components missing when encoding %s" % decoded)
url = '%s://' % type
if user:
url += "%s" % user
if pswd:
url += ":%s" % pswd
url += "@"
if host:
url += "%s" % host
url += "%s" % path
if p:
for parm in p:
url += ";%s=%s" % (parm, p[parm])
return url
try:
import unittest2 as unittest
except ImportError:
import unittest
from uri import parse_url, Url
default = Url("", "", "", {}, {}, "")
def new_url(**kwargs):
return default._replace(**kwargs)
class TestURIs(unittest.TestCase):
uris = {
"file://defconfig": new_url(scheme="file", path="defconfig"),
"file://foo/defconfig": new_url(scheme="file", path="foo/defconfig"),
"file:///defconfig": new_url(scheme="file", path="/defconfig"),
"file:///foo/defconfig": new_url(scheme="file", path="/foo/defconfig"),
"file://foo/defconfig;patch=1;alpha=beta": new_url(scheme="file", path="foo/defconfig",
params={"patch": "1", "alpha": "beta"}),
"http://foo.com/bar/defconfig;patch=1;alpha=beta": new_url(scheme="http", path="/bar/defconfig",
netloc="foo.com",
params={"patch": "1", "alpha": "beta"}),
"git://github.com/kergoth/homefiles.git": new_url(scheme="git", netloc="github.com",
path="/kergoth/homefiles.git"),
"svn://clarson@kergoth.com/;module=homefiles;protocol=http": new_url(scheme="svn", netloc="clarson@kergoth.com",
path="/", params={"module": "homefiles",
"protocol": "http"}),
"svn://svn.enlightenment.org/svn/e/trunk;module=E-MODULES-EXTRA/elfe;scmdata=keep;proto=http":
new_url(scheme="svn", netloc="svn.enlightenment.org", path="/svn/e/trunk",
params={"module": "E-MODULES-EXTRA/elfe", "scmdata": "keep", "proto": "http"}),
}
def test_decode_performance(self):
from timeit import repeat
from oe_uri import decodeurl, new_decodeurl
for url, compareto in self.uris.iteritems():
self.assertEqual(decodeurl(url), new_decodeurl(url))
old = repeat(lambda: decodeurl(url), number=1000, repeat=1)
new = repeat(lambda: new_decodeurl(url), number=1000, repeat=1)
new2 = repeat(lambda: parse_url(url), number=1000, repeat=1)
print("decode for %s: old=%s, new=%s, new without wrapper=%s" % (url, old, new, new2))
def test_encode_performance(self):
from timeit import repeat
from oe_uri import decodeurl, encodeurl, new_encodeurl
for url, compareto in self.uris.iteritems():
parsed = parse_url(url)
decoded = decodeurl(url)
self.assertEqual(encodeurl(decoded), new_encodeurl(decoded))
old = repeat(lambda: encodeurl(decoded), number=1000, repeat=1)
new = repeat(lambda: new_encodeurl(decoded), number=1000, repeat=1)
new2 = repeat(lambda: str(url), number=1000, repeat=1)
print("encode for %s: old=%s, new=%s, new without wrapper=%s" % (url, old, new, new2))
def test_uris(self):
for url, compareto in self.uris.iteritems():
parsed = parse_url(url)
self.assertEqual(parsed, compareto)
def test_file_uri_rejoin(self):
url = parse_url("file://defconfig")
self.assertEqual(str(url), "file://defconfig")
def test_file_uri_rejoin_abs(self):
url = parse_url("file:///foo/defconfig")
self.assertEqual(str(url), "file:///foo/defconfig")
"""Enhance urlparse for OpenEmbedded's needs
- Handles OpenEmbedded's odd file urls.
OE uses file://foo/bar.patch as relative, file:///foo/bar.patch as absolute,
but '//' following the scheme implies the existance of an authority, aka
a hostname, and urlparse handles it in that way.
- Allows url params for all schemes.
- Pre-parses the params and query string for convenience.
Portions of the Url class were copied directly from the urlparse source tree.
The encodeurl and decodeurl functions are still provided, for compatibility reasons.
"""
import urlparse
import warnings
from collections import namedtuple
def parse_url(url):
url = url.replace('file://', 'file:')
if ';' in url:
url, params = url.split(';', 1)
params = parse_params(params)
else:
params = {}
scheme, netloc, path, _, query, fragment = urlparse.urlparse(url)
query = urlparse.parse_qs(query)
return Url(scheme, netloc, path, params, query, fragment)
def parse_params(params):
values = {}
if params:
for param in params.split(';'):
try:
key, value = param.split('=', 1)
except ValueError:
key, value = param, True
values[key] = value
return values
#noinspection PyUnresolvedReferences
class Url(urlparse.ParseResult):
"""Representation of a Uniform Resource Identifier"""
__slots__ = ()
@property
def querystring(self):
"""Reassembled query string"""
query = ';'.join('%s=%s' % (key, v)
for key, value in self.query.iteritems()
for v in value)
return query
@property
def parameterstring(self):
"""Reassembled parameter string"""
parameters = ';'.join('%s=%s' % (key, value)
for key, value in self.params.iteritems())
return parameters
def join(self, otherurl):
"""Join this url to a possibly relative URL to form an absolute
interpretation of the latter."""
return parse_url(urlparse.urljoin(str(self), str(otherurl)))
def unsplit(self):
"""String version of URL without parameters"""
url = self.path
if self.netloc or (self.scheme and self.scheme in urlparse.uses_netloc and
url[:2] != '//'):
url = '//' + (self.netloc or '') + url
if self.scheme:
url = self.scheme + ':' + url
if self.query:
url += '?' + self.querystring
if self.fragment:
url += '#' + self.fragment
return url
def geturl(self):
url = self.unsplit()
if self.params:
url += ';' + self.parameterstring
return url
def __str__(self):
return self.geturl()
# vim: set et fenc=utf-8 sts=4 sw=4 :
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment