Last active
August 29, 2015 13:57
-
-
Save emmettbutler/9394992 to your computer and use it in GitHub Desktop.
Example solution to introductory python exercise
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""urltools.py - parse and format web URLs. | |
HINT: | |
>>> "http://google.com".split("://") | |
["http", "google"] | |
>>> "google.com/hangout/parsely.com/am".split("/") | |
["google.com", "hangout", "parsely.com", "am"] | |
>>> ["google.com/hangout/parsely.com/am".split("/", 1) | |
['google.com', 'hangout/parsely.com/am'] | |
This is basically all you need to implement the parser. | |
For formatting / rejoining: | |
>>> "{host}/{path}".format(host="google.com", path="plus") | |
"google.com/plus" | |
You can do the whole thing without a single import! | |
""" | |
class URLParseError(Exception): | |
def __init__(self, message): | |
self.message = message | |
def __str__(self): | |
return "URL parse error: {}".format(self.message) | |
def url_parse(*args): | |
"""Takes a string URL and returns a dictionary of its various parts.""" | |
ret = {"scheme": None, "host": None, "path": None, "port": None, "fragment": None, "query": None, "userinfo": None} | |
if len(args) == 0: | |
return ret | |
url = args[0] | |
if "://" not in url: | |
raise URLParseError("Missing scheme") | |
scheme_rest = url.split("://") | |
# scheme, *rest = url.split("://") # py3 only | |
ret["scheme"], rest = scheme_rest[0].lower(), scheme_rest[1] | |
ret["port"] = 80 if ret["scheme"] == "http" else 443 if ret["scheme"] == "https" else None | |
if "/" not in rest: | |
raise URLParseError("Missing authority") | |
authority_rest = rest.split("/", 1) | |
authority, path_query_fragment = authority_rest[0], "/" + authority_rest[1] | |
userinfo = authority.split("@")[0] if "@" in authority else None | |
# avoid "if @ in authority" by using replace | |
host_port = authority.replace("{}@".format(userinfo), "") | |
port = host_port.split(":")[1] if ":" in host_port else None | |
if port is not None: | |
if not port.isdigit(): | |
raise URLParseError("Invalid port: {}".format(port)) | |
ret["port"] = int(port) | |
ret["query"] = path_query_fragment.split("?")[1].split("#")[0] if "?" in path_query_fragment else None | |
ret["fragment"] = path_query_fragment.split("#")[1] if "#" in path_query_fragment else None | |
ret["host"] = host_port.split(":")[0] | |
ret["userinfo"] = userinfo | |
ret["path"] = path_query_fragment.split("?")[0].split("#")[0] | |
return ret | |
def url_join(*args): | |
"""Takes a dictionary of URL parts and returns a valid URL.""" | |
in_dict = args[0] if len(args) >= 1 else None | |
if not in_dict: | |
return "" | |
scheme = in_dict["scheme"] | |
userinfo = port = query = fragment = "" | |
_userinfo = in_dict.get("userinfo", None) | |
userinfo = "{}@".format(_userinfo) if _userinfo else "" | |
_query = in_dict.get("query", None) | |
query = "?{}".format(_query) if _query else "" | |
_fragment = in_dict.get("fragment", "") | |
fragment = "#{}".format(_fragment) if _fragment else "" | |
_port = in_dict.get("port", "") | |
if _port: | |
if (scheme == "https" and _port != 443) or (scheme == "http" and _port != 80): | |
port = ":{}".format(_port) | |
return "{scheme}://{userinfo}{host}{port}{path}{query}{fragment}".format( | |
scheme=scheme, userinfo=userinfo, | |
host=in_dict["host"], port=port, path=in_dict["path"], | |
query=query, fragment=fragment | |
) | |
def test_basic_url(): | |
url = "http://www.linkedin.com/in/andrewmontalenti" | |
parsed_url = url_parse(url) | |
assert parsed_url["scheme"] == "http" | |
assert parsed_url["host"] == "www.linkedin.com" | |
assert parsed_url["path"] == "/in/andrewmontalenti" | |
assert parsed_url["port"] == 80 | |
assert parsed_url["fragment"] is None | |
assert parsed_url["query"] is None | |
def test_advanced_url(): | |
url = "http://cogtree@www.linkedin.com:1234/profile/view?id=13836198&trk=ppro_viewmore#more-123" | |
parsed_url = url_parse(url) | |
assert parsed_url["fragment"] == "more-123" | |
assert parsed_url["query"] == "id=13836198&trk=ppro_viewmore" | |
assert parsed_url["userinfo"] == "cogtree" | |
assert parsed_url["port"] == 1234 | |
def test_joining_url(): | |
url_parts = { | |
"scheme": "http", | |
"host": "www.linkedin.com", | |
"path": "/profile/view", | |
"fragment": "more-123", | |
"query": "id=13836198&trk=ppro_viewmore", | |
"port": 80 | |
} | |
url = "http://www.linkedin.com/profile/view?id=13836198&trk=ppro_viewmore#more-123" | |
assert url_join(url_parts) == url | |
url_parts["port"] = 8080 | |
url = "http://www.linkedin.com:8080/profile/view?id=13836198&trk=ppro_viewmore#more-123" | |
assert url_join(url_parts) == url | |
url_parts["scheme"] = "https" | |
url_parts["port"] = 443 | |
url = "https://www.linkedin.com/profile/view?id=13836198&trk=ppro_viewmore#more-123" | |
assert url_join(url_parts) == url | |
url = "http://emmett@www.linkedin.com:1234/profile/view#more-123" | |
assert url_join(url_parse(url)) == url | |
url = "ftp://emmett:butler@linkedin.com/profile/view?haha=what#more-123" | |
assert url_join(url_parse(url)) == url | |
url = "ftp//linkedin.com/profile/view?haha=what#more-123" | |
assert url_join(url_parse(url)) == url | |
def main(): | |
test_basic_url() | |
test_advanced_url() | |
test_joining_url() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment