Created
September 23, 2018 17:04
-
-
Save juliobguedes/1d0c416afe8d28041cf3d351081b0c7b to your computer and use it in GitHub Desktop.
Simple code that checks for changes in a website's HTML
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# (C) Júlio Barreto | |
# 22/09/2018 | |
''' | |
Script to monitor any website's HTML changes | |
''' | |
import urllib2, smtplib | |
from getpass import getpass | |
from time import sleep | |
from email.MIMEMultipart import MIMEMultipart | |
from email.MIMEText import MIMEText | |
from datetime import datetime | |
previous = None # previous HTML | |
current = None # current HTML | |
url = '' # INSERT THE WEBSITE HERE | |
my_email = raw_input('Insert your email: ') # INSERT YOUR EMAIL HERE | |
pw = getpass("Insert your email's password: ") # User inputs his email's password | |
server = smtplib.SMTP('smtp.gmail.com', 587) # Connects to Gmail | |
# IMPORTANT: gmail often blocks the connection. If you are sure that | |
# you are typing your password correctly, go to your google account | |
# settings and allow less secure apps. To connect with hotmail, use | |
# smtp.live.com instead of smtp.gmail.com | |
server.starttls() | |
server.login(my_email, pw) | |
def download_html(errors=0): | |
''' | |
Downloads the page's HTML. If it is impossible to download, | |
will keep trying during the period of 1 hour. After that, | |
will return None. | |
''' | |
global url | |
try: | |
html = urllib2.urlopen(url).read() | |
# | |
except urllib2.URLError as e: | |
if (errors+1 >= 100): | |
print url, 'caused an error:' | |
print e.reason | |
html = None | |
else: | |
sleep(36) | |
html = download_html(errors+1) | |
return html | |
def send_email(first = False): | |
e = MIMEMultipart() | |
e['From'] = my_email | |
e['To'] = my_email | |
if first: # Notifies that the scipt is working | |
e['Subject'] = 'Script is active' | |
content = MIMEText("The script is now active") | |
e.attach(content) | |
msg = e.as_string() | |
else: # Notifies that changes occurred | |
e['Subject'] = '' # Email "Subject" | |
content = '' # Insert the content of the email here | |
e.attach(MIMEText(content)) | |
msg = e.as_string() | |
server.sendmail(my_email, my_email, msg) | |
print 'Email sent to %s' % my_email | |
send_email(True) | |
while True: | |
interval = 60 | |
sleep(interval) # Verifies every X seconds | |
print 'Trying at: ' + str(datetime.now()) | |
html = download_html() | |
if (html is None): | |
raise Exception('Could not continue after 1 hour trying') | |
elif (previous is None): | |
previous = html | |
else: | |
current = html | |
if (previous != current): | |
send_email() | |
previous = current # updates page version |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment