Skip to content

Instantly share code, notes, and snippets.

@sxslex
Last active September 7, 2016 16:22
Show Gist options
  • Save sxslex/8e9b3b190a84ed5c5c08893a27d01638 to your computer and use it in GitHub Desktop.
Save sxslex/8e9b3b190a84ed5c5c08893a27d01638 to your computer and use it in GitHub Desktop.
Decodifica um url em partes
# -*- coding: utf-8 -*-
# da_urllib.py
# by sx.slex@gmail.com
import urllib
def urldencode(params):
resp = {}
for item in params.split('&'):
if not item:
continue
ritem = item.split('=')
value = None
if len(ritem) > 1:
value = urllib.unquote_plus(''.join(ritem[1:]))
if ritem[0] not in resp:
resp[ritem[0]] = value
else:
if not isinstance(resp[ritem[0]], list):
resp[ritem[0]] = [resp[ritem[0]]]
resp[ritem[0]].append(value)
return resp
return dict((
(ritem[0], None if len(ritem) == 1
else urllib.unquote_plus(''.join(ritem[1:])))
for ritem in (item.split('=')
for item in params.split('&') if item)
))
def extract_url(url):
protocol = 'http'
host = None
path = None
fragment = None
if '://' in url:
rurl = url.split('://')
protocol = rurl[0]
url = ''.join(rurl[1:])
if '/' in url:
rurl = url.split('/')
host = rurl[0]
url = '/'.join(rurl[1:])
if '?' in url:
rurl = url.split('?')
path = rurl[0]
url = ''.join(rurl[1:])
if '#' in url:
rurl = url.split('#')
fragment = '#'.join(rurl[1:])
url = rurl[0]
return dict(
protocol=protocol,
host=host,
path=path,
fragment=fragment,
query=urldencode(url)
)
if __name__ == '__main__':
import pprint
pprint.pprint((urldencode('')))
# {}
pprint.pprint((urldencode('a=SleX&c=cc&b=Slex+%26+CIA&teste')))
# {'a': 'SleX', 'b': 'Slex & CIA', 'c': 'cc', 'teste': None}
pprint.pprint(
extract_url(
url=(
'http://www.slex.com.br/teste/aaa'
)
)
)
# {'fragment': None,
# 'host': 'www.slex.com.br',
# 'path': None,
# 'protocol': 'http',
# 'query': {'teste/aaa': None}}
pprint.pprint(
extract_url(
url=(
'http://www.slex.com.br/teste/aaa?' +
'a=SleX&c=cc&b=Slex+%26+CIA&teste'
)
)
)
# {'fragment': None,
# 'host': 'www.slex.com.br',
# 'path': 'teste/aaa',
# 'protocol': 'http',
# 'query': {'a': 'SleX', 'b': 'Slex & CIA', 'c': 'cc', 'teste': None}}
pprint.pprint(
extract_url(
url=(
'http://www.slex.com.br/teste/aaa?' +
'nome=SleX&nome=Alex&c=cc&b=Slex+%26+CIA&teste#ancora'
)
)
)
# {'fragment': 'ancora',
# 'host': 'www.slex.com.br',
# 'path': 'teste/aaa',
# 'protocol': 'http',
# 'query': {'b': 'Slex & CIA',
# 'c': 'cc',
# 'nome': ['SleX', 'Alex'],
# 'teste': None}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment