-
-
Save AnderRV/004d891f58cb88e52d6f4048209bb24e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from urllib.parse import urlparse | |
from parsers import defaults | |
parsers = { | |
'scrapeme.live': defaults, | |
'quotes.toscrape.com': defaults, | |
} | |
def get_parser(url): | |
hostname = urlparse(url).hostname # extract domain from URL | |
if hostname in parsers: | |
# use the dict above to return the custom parser if present | |
return parsers[hostname] | |
return defaults |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment