Skip to content

Instantly share code, notes, and snippets.

@Miha-Pleskovic
Created August 10, 2016 16:41
Show Gist options
  • Save Miha-Pleskovic/564be31505d5979d19cc5e8396333bd0 to your computer and use it in GitHub Desktop.
Save Miha-Pleskovic/564be31505d5979d19cc5e8396333bd0 to your computer and use it in GitHub Desktop.
Data Scraper
# coding: utf-8 -*-
from urllib2 import urlopen
from BeautifulSoup import BeautifulSoup
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
# SCRAPING DATA
url = "https://scrapebook22.appspot.com"
response = urlopen(url).read()
soup = BeautifulSoup(response)
print soup.html.head.title.string
print ""
csv_file = open("emaili.csv", "w")
for link in soup.findAll("a"):
if link.string == "See full profile":
person_url = url + link["href"]
person_html = urlopen(person_url).read()
person_soup = BeautifulSoup(person_html)
email = person_soup.find("span", attrs={"class": "email"}).string
name = person_soup.findAll("h1")[1].string
city = person_soup.find("span", attrs={"data-city": True}).string
print name + ", " + email + ", " + city
csv_file.write(name + "," + email + "," + city + "\n")
csv_file.close()
# SENDING E-MAIL
sender = "john.hancock@fakemail.com"
password = "Fake_Password"
recipient = "fakey.mcfake@fakemail.com"
subject = "Scraping the barrel"
content = "I'm sending you a .csv file containing scrapped e-mails. Note that they're fake (obviously).\n\nWarm regards,\n\nJohn Hancock"
e_mail = MIMEMultipart()
e_mail ["From"] = sender
e_mail ["To"] = recipient
e_mail ["Subject"] = subject
e_mail.attach(MIMEText(content))
filename = "emaili.csv"
f = file(filename)
attachment = MIMEText(f.read())
attachment.add_header('Content-Disposition', 'attachment', filename=filename)
e_mail.attach(attachment)
try:
server = smtplib.SMTP("smtp.fakemail.com:420")
server.ehlo()
server.starttls()
server.login(sender, password)
server.sendmail(from_addr=sender, to_addrs=recipient, msg=e_mail.as_string())
server.quit()
print "The message is sent!"
except Exception as error:
print "ERROR!"
print error
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment