Skip to content

Instantly share code, notes, and snippets.

@justinvw
Created September 16, 2012 09:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save justinvw/3731769 to your computer and use it in GitHub Desktop.
Save justinvw/3731769 to your computer and use it in GitHub Desktop.
Quick-and-dirty script that notifies you by mail when a webpage of interest changes
import cPickle
import urllib2
import hashlib
from datetime import datetime
import smtplib
from email.mime.text import MIMEText
PICKLE_FILE = '/home/justin/tools/updatedyet.pkl'
MAIL_ON_CHANGE = ['justin@vwees.net']
MAIL_FROM = 'justin@vwees.net'
URLS_OF_INTEREST = [
'http://store.apple.com/nl/browse/home/shop_iphone/family/iphone',
'http://vwees.net/justin/test.html'
]
def fetch_and_hash_content(url):
try:
contents = urllib2.urlopen(url).read()
except:
return None
hash = hashlib.sha1(contents)
return hash.hexdigest()
def notify_of_change(url, prev_hash, prev_date, curr_hash, curr_date):
message = MIMEText('Contents of %s changed.\n\nPreviously (%s): %s\nNow (%s):'\
' %s' % (url, prev_date, prev_hash, curr_date, curr_hash))
message['Subject'] = 'updatedyet.py: contents of %s changed' % url
message['From'] = MAIL_FROM
for mailaddress in MAIL_ON_CHANGE:
message['To'] = mailaddress
s = smtplib.SMTP('localhost')
s.sendmail(MAIL_FROM, [mailaddress], message.as_string())
s.quit()
def load_pickle(pickle_location=PICKLE_FILE):
try:
pickle = cPickle.load(open(pickle_location, 'r'))
except:
pickle = {}
return pickle
def save_pickle(obj, pickle_location=PICKLE_FILE):
cPickle.dump(obj, open(pickle_location, 'w'))
def check_if_updated(urls=URLS_OF_INTEREST):
prev_hashes = load_pickle()
for url in urls:
current_time = datetime.utcnow()
current_hash = fetch_and_hash_content(url)
if not current_hash:
continue
if url not in prev_hashes:
prev_hashes[url] = []
else:
prev_sample = prev_hashes[url][-1]
if prev_sample[1] != current_hash:
notify_of_change(url, prev_sample[1], prev_sample[0],\
current_hash, current_time)
prev_hashes[url].append((current_time, current_hash))
save_pickle(prev_hashes)
if __name__ == '__main__':
check_if_updated()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment