Skip to content

Instantly share code, notes, and snippets.

@juliobguedes
Created September 23, 2018 17:04
Show Gist options
  • Save juliobguedes/1d0c416afe8d28041cf3d351081b0c7b to your computer and use it in GitHub Desktop.
Save juliobguedes/1d0c416afe8d28041cf3d351081b0c7b to your computer and use it in GitHub Desktop.
Simple code that checks for changes in a website's HTML
# coding: utf-8
# (C) Júlio Barreto
# 22/09/2018
'''
Script to monitor any website's HTML changes
'''
import urllib2, smtplib
from getpass import getpass
from time import sleep
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText
from datetime import datetime
previous = None # previous HTML
current = None # current HTML
url = '' # INSERT THE WEBSITE HERE
my_email = raw_input('Insert your email: ') # INSERT YOUR EMAIL HERE
pw = getpass("Insert your email's password: ") # User inputs his email's password
server = smtplib.SMTP('smtp.gmail.com', 587) # Connects to Gmail
# IMPORTANT: gmail often blocks the connection. If you are sure that
# you are typing your password correctly, go to your google account
# settings and allow less secure apps. To connect with hotmail, use
# smtp.live.com instead of smtp.gmail.com
server.starttls()
server.login(my_email, pw)
def download_html(errors=0):
'''
Downloads the page's HTML. If it is impossible to download,
will keep trying during the period of 1 hour. After that,
will return None.
'''
global url
try:
html = urllib2.urlopen(url).read()
#
except urllib2.URLError as e:
if (errors+1 >= 100):
print url, 'caused an error:'
print e.reason
html = None
else:
sleep(36)
html = download_html(errors+1)
return html
def send_email(first = False):
e = MIMEMultipart()
e['From'] = my_email
e['To'] = my_email
if first: # Notifies that the scipt is working
e['Subject'] = 'Script is active'
content = MIMEText("The script is now active")
e.attach(content)
msg = e.as_string()
else: # Notifies that changes occurred
e['Subject'] = '' # Email "Subject"
content = '' # Insert the content of the email here
e.attach(MIMEText(content))
msg = e.as_string()
server.sendmail(my_email, my_email, msg)
print 'Email sent to %s' % my_email
send_email(True)
while True:
interval = 60
sleep(interval) # Verifies every X seconds
print 'Trying at: ' + str(datetime.now())
html = download_html()
if (html is None):
raise Exception('Could not continue after 1 hour trying')
elif (previous is None):
previous = html
else:
current = html
if (previous != current):
send_email()
previous = current # updates page version
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment