Skip to content

Instantly share code, notes, and snippets.

@bbelderbos
Created August 11, 2020 15:01
Show Gist options
  • Save bbelderbos/422de5b5504f2021fec60baf634611f6 to your computer and use it in GitHub Desktop.
Save bbelderbos/422de5b5504f2021fec60baf634611f6 to your computer and use it in GitHub Desktop.
import collections
import bs4
import requests
import requests_cache
requests_cache.install_cache('cache')
URL = "https://email-verify.my-addr.com/list-of-most-popular-email-domains.php"
def _get_html(url=URL):
resp = requests.get(url)
soup = bs4.BeautifulSoup(resp.content, 'html.parser')
div = soup.find('div', attrs={'class': 'middle_info_noborder'})
return div
def get_most_common_domain(div=None, top=5):
if div is None:
div = _get_html()
domains = []
for tr in div.find_all('tr'):
domain = tr.find_all('td')[2].text.split('.')[0]
domains.append(domain)
return collections.Counter(domains).most_common(top)
if __name__ == '__main__':
domains = get_most_common_domain()
print(domains)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment