Skip to content

Instantly share code, notes, and snippets.

@ibrahimsha23
Created February 22, 2022 10:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ibrahimsha23/8e99219e32152cccc03415614d78886d to your computer and use it in GitHub Desktop.
Save ibrahimsha23/8e99219e32152cccc03415614d78886d to your computer and use it in GitHub Desktop.
Regex pattern to split the hostname from url path
import re
def url_path_to_dict(path):
pattern = (r'^'
r'((?P<schema>.+?)://)?'
r'((?P<user>.+?)(:(?P<password>.*?))?@)?'
r'(?P<host>.*?)'
r'(:(?P<port>\d+?))?'
r'(?P<path>/.*?)?'
r'(?P<query>[?].*?)?'
r'$'
)
regex = re.compile(pattern)
m = regex.match(path)
d = m.groupdict() if m is not None else None
return d
def main():
return (url_path_to_dict(
'http://ibrahim:password@example.example.com/example/example/example.html'))
data = main()
print(data)
# output:
{
'schema': 'http',
'user': 'ibrahim',
'password': 'password',
'host': 'example.example.com',
'port': None,
'path': '/example/example/example.html',
'query': None
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment