/interview.py Secret
Last active
August 29, 2015 14:02
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Provide a module which parses URLs into useful features (protocol, hostname, path, etc). | |
""" | |
import re | |
class url: | |
def parse(self, feature, url): | |
""" | |
a feature may be any of "protocol", "hostname", | |
or "path", and looks like this: | |
{"protocol": "ftp"} | |
{"hostname": "google"} | |
{"path": "news/google"} | |
The hostname is fully qualified domain name, | |
including both domain and tld, e.g.: google.com | |
An example call to this function: | |
url_parse({"protocol": "http"}, "http://www.google.com") | |
""" | |
# check for valid feature | |
valid_feature = [f for f in feature if f in ["protocol", "hostname", "path"]] | |
if valid_feature == []: | |
try: | |
raise NameError("Please use 'protocol', 'hostname', or 'path' when listing the feature you want to parse") | |
except NameError as e: | |
return e | |
protocol = re.findall('^(\w+://)', url) | |
if protocol != []: | |
self.is_fqdn(True, url) | |
hostname = re.findall('^\w+\:\/\/(\w+)', url) | |
else: | |
hostname = re.findall('^(\w+\.\w+)', url) | |
path = re.findall('^\w+\:\/\/\w+.*(/*)', url) | |
if "protocol" in feature: | |
if protocol != [] and \ | |
(protocol[0] == feature["protocol"] or \ | |
protocol[0] == feature["protocol"] + str("://")): | |
return (True, protocol, url) | |
else: | |
return (False, feature, protocol) | |
if "hostname" in feature and \ | |
hostname != [] and \ | |
hostname == feature["hostname"]: | |
return (True, hostname) | |
if "path" in feature and path != []: | |
return (True, path) | |
def is_fqdn(self, has_protocol, url): | |
if has_protocol: | |
# check for lack of tld | |
if re.findall('^\w+\:\/\/(\w+\.\w+)', url) == []: | |
try: | |
raise NameError("url doesn't include a tld.") | |
except NameError as e: | |
return e | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment