Skip to content

Instantly share code, notes, and snippets.

@sladjandr
Created October 4, 2016 21:00
Show Gist options
  • Save sladjandr/e7b231a4423df737f323cfc10b2eae7d to your computer and use it in GitHub Desktop.
Save sladjandr/e7b231a4423df737f323cfc10b2eae7d to your computer and use it in GitHub Desktop.
Scrape all mailto links from a page - Python
import requests
import xlwt
from bs4 import BeautifulSoup
wb = xlwt.Workbook()
ws = wb.add_sheet('Emails')
ws.write(0,0,'Emails')
emailList= []
r=0
#add url of the page you want to scrape to urlString
urlString='Your page URL goes here'
#function that extracts all emails from a page you provided and stores them in a list
def emailExtractor(urlString):
getH=requests.get(urlString)
h=getH.content
soup=BeautifulSoup(h,'html.parser')
mailtos = soup.select('a[href^=mailto]')
for i in mailtos:
href=i['href']
try:
str1, str2 = href.split(':')
except ValueError:
break
emailList.append(str2)
emailExtractor(urlString)
#adding scraped emails to an excel sheet
for email in emailList:
r=r+1
ws.write(r,0,email)
wb.save('emails.xls')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment