Skip to content

Instantly share code, notes, and snippets.

@ronaldgreeff
Created January 25, 2019 16:45
Show Gist options
  • Save ronaldgreeff/286032ceddac8622f86a79f9eea91f42 to your computer and use it in GitHub Desktop.
Save ronaldgreeff/286032ceddac8622f86a79f9eea91f42 to your computer and use it in GitHub Desktop.
Regex URL split
#*-*encoding: utf-8*-*
import re
PATTERN = re.compile('^(http[s]?|ftp):\/?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$')
class Search(object):
"""docstring for ClassName"""
def __init__(self):
self.list_of_links = self.get_links()
def get_links(self):
return ['http://www.test.com/dir/penis/filename.jpg?var1=foo#bar']
def split_url(self, i):
return {c:v for c, v in enumerate( re.match( PATTERN, self.list_of_links[i]).groups())}
def main():
search_obj = Search()
print search_obj.split_url(0)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment