Last active
August 29, 2015 14:22
-
-
Save akshaykarnawat/0e2ab8ad4515917221e1 to your computer and use it in GitHub Desktop.
Crawl RIT Alumni Events Page
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import smtplib | |
import requests | |
import traceback | |
from bs4 import BeautifulSoup | |
from email.mime.multipart import MIMEMultipart | |
from email.mime.text import MIMEText | |
def crawlEventsPage(url): | |
r = requests.get(url) | |
soup = BeautifulSoup(r.text) | |
# find the table event tag | |
tableTag = soup.find_all('table') | |
#print tableTag | |
#print len(tableTag) | |
events = list() | |
# loop through all the table tags to get events | |
for i in range(len(tableTag)): | |
if(i%2 == 0): | |
# get the event date and time | |
time = tableTag[i].td.text | |
events.append(time) | |
# get the event name | |
name = tableTag[(i+1)].td.text.replace("\n", "") | |
events.append(name) | |
# get the event link | |
link = tableTag[(i+1)].td.a['href'] | |
events.append(link) | |
return parseAsHtml(events) | |
def parseAsHtml(eventsList): | |
html = list() | |
html.append("<html><body>") | |
html.append("<h2>RIT Alumni Events</h2>") | |
# put all the events in a table | |
html.append("<table><tr><th>Date and Time</th><th>Event Name</th></tr>") | |
for i in range(len(eventsList)): | |
if(i % 3 == 0): | |
html.append("<tr>") | |
html.append("<td>" + eventsList[i] + "</td>") | |
html.append("<td>" + "<a href=\"" + eventsList[i+2] + "\">" + eventsList[i+1] + "</a>" + "</td>") | |
html.append("</tr>") | |
html.append("</table>") | |
html.append("</body></html>") | |
return ''.join(html) | |
def sendEmail(): | |
# details of the user account sending the email | |
smtpUser = '{{sendersEmail}}' | |
smtpPassword = '{{password}}' | |
# details of who is receiving and sending | |
toAdd = '{{sendingTo}}' | |
fromAdd = smtpUser | |
# email imformation | |
subject = 'RIT Alumni Events' | |
msg = MIMEMultipart('alternative') | |
msg['Subject'] = subject | |
msg['From'] = fromAdd | |
msg['To'] = toAdd | |
msg.attach(MIMEText(crawlEventsPage("http://www.rit.edu/alumni/events"),'html')) | |
try: | |
s = smtplib.SMTP('smtp.live.com',587) | |
s.ehlo() | |
s.starttls() | |
s.login(smtpUser,smtpPassword) | |
s.sendmail(fromAdd, toAdd, msg.as_string()) | |
s.close() | |
except: | |
tb = traceback.format_exc() | |
print tb | |
# run the program | |
sendEmail() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment