Skip to content

Instantly share code, notes, and snippets.

@zokis
Created August 26, 2012 03:43
Show Gist options
  • Save zokis/3473678 to your computer and use it in GitHub Desktop.
Save zokis/3473678 to your computer and use it in GitHub Desktop.
URL Parser
"""
http://www.gmail.com/g/mail?fr=3&ui=2
http://gmail.com
ssh://marcelo@git.com
ftp://ime.usp.com.br/~denis/?model=SistemasDeArquivo
"""
import unittest
import string
def analisador_url(url):
url_parseada = {}
querystring = None
if '?' in url:
url, querystring = url.split('?')
parametros = {}
# if querystring:
# for params in querystring.split('&'):
# chave, valor = params.split('=')
# parametros[chave] = valor
if querystring:
parametros = dict([tuple(chave_valor.split('=')) for chave_valor in [item for item in querystring.split('&')]])
url_parseada['querystring'] = parametros
url_parseada['protocolo'], host_dominio_path = url.split('://')
url_parseada['user'] = None
if '@' in url:
url_parseada['user'], host_dominio_path = host_dominio_path.split('@')
url_parseada['path'] = None
host_dominio = host_dominio_path
if '/' in host_dominio_path:
host_dominio, url_parseada['path'] = string.split(host_dominio_path, '/', 1)
url_parseada['path'] = '/' + url_parseada['path']
host_dominio_itens = host_dominio.split('.')
url_parseada['host'] = ''
url_parseada['dominio'] = host_dominio_itens[0]
if len(host_dominio_itens) > 2:
url_parseada['host'], url_parseada['dominio'] = host_dominio_itens[0], '.'.join(host_dominio_itens[1:])
return url_parseada
class DojoUrls(unittest.TestCase):
def test_deve_retornar_protocolo(self):
self.assertEquals('http',
analisador_url('http://')['protocolo'])
self.assertEquals('ssh',
analisador_url('ssh://marcelo@git.com')['protocolo'])
self.assertEquals('ftp',
analisador_url('ftp://ime.usp.com.br/~denis/?model=SistemasDeArquivo')['protocolo'])
def test_deve_retornar_host(self):
self.assertEquals('www',
analisador_url('http://www.gmail.com')['host'])
self.assertEquals('ime',
analisador_url('http://ime.usp.com.br')['host'])
def test_deve_retornar_dominio(self):
self.assertEquals('gmail.com',
analisador_url('http://www.gmail.com')['dominio'])
self.assertEquals('usp.com.br',
analisador_url('http://ime.usp.com.br/~denis/?model=SistemasDeArquivo')['dominio'])
def test_deve_retornar_path(self):
self.assertEquals('/g/mail',
analisador_url('http://www.gmail.com/g/mail?fr=3&ui=2')['path'])
self.assertEquals('/~denis/',
analisador_url('ftp://ime.usp.com.br/~denis/?model=SistemasDeArquivo')['path'])
def test_deve_retornar_querystring(self):
self.assertEquals({'model': 'SistemasDeArquivo'},
analisador_url('ftp://ime.usp.com.br/~denis/?model=SistemasDeArquivo')['querystring'])
self.assertEquals({'fr': '3', 'ui': '2'},
analisador_url('http://www.gmail.com/g/mail?fr=3&ui=2')['querystring'])
def test_deve_retornar_user(self):
self.assertEquals('marcelo',
analisador_url('ssh://marcelo@git.com')['user'])
self.assertEquals('weverton',
analisador_url('ssh://weverton@git.com')['user'])
self.assertEquals(None,
analisador_url('ssh://git.com')['user'])
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment