Skip to content

Instantly share code, notes, and snippets.

@jimaples
Last active August 29, 2015 14:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jimaples/a5fd49e036da17cd9488 to your computer and use it in GitHub Desktop.
Save jimaples/a5fd49e036da17cd9488 to your computer and use it in GitHub Desktop.
Lifehack to speed up going through IFTTT emails of new movies posted to Reddit
"""Lifehack to speed up going through IFTTT emails of new movies posted to Reddit
"""
import re
from itertools import count
from operator import itemgetter
import requests
from lxml import html
import webbrowser
s_header = '''<head>
<title>IFTTT Movies</title>
<style type="text/css">
body {
font-size: 75%;
}
.tagline, .flat-list, h3 {
display:none;
}
p {
margin: 4px 0;
}
blockquote {
margin: 0 20px;
}
.ifttt_movie {
border-bottom: 1px solid black;
}
</style>
<script type="text/javascript">
function showSource(){
var s = document.getElementById('pagecontent');
var source = s.innerHTML;
//now we need to escape the html special chars, javascript has escape
//but this does not do what we want
source = source.replace(/</g, "&lt;").replace(/>/g, "&gt;");
//now we remove newlines and add <pre> tags to preserve whitespace
source = "<pre>"+source.replace(/\\n/g,'')+"</pre>";
//now populate the source div
var e = document.getElementById('pagesource');
//1 movie per line
e.innerHTML = source.replace(/&lt;div class="ifttt_movie"/g,'\\n&lt;div class="ifttt_movie"');
e.style.display = 'block';
s.style.display = 'none';
document.getElementById('btn_content').style.display = 'block';
document.getElementById('btn_source').style.display = 'none';
return source;
}
function showContent(){
document.getElementById('pagecontent').style.display = 'block';
document.getElementById('pagesource').style.display = 'none';
document.getElementById('btn_content').style.display = 'none';
document.getElementById('btn_source').style.display = 'block';
}
</script>
</head>'''
re_imdb = re.compile('(?:<strong>)*(\d\.\d)(?:</strong>)*/10')
processMovieIdx = count(1)
def processMovie(name, link):
i = processMovieIdx.next()
o = {'name':name,'link':link,'index':i,'rating':'???'}
o['id'] = 'ifttt_movie{:03d}'.format(i)
# follow the link (user-agent is to get around the bot-catcher
site = requests.get(link, headers={'user-agent':'ifttt-email/0.0.'+str(i)})
o['status'] = site.status_code
print '{index:2d} ({status:d}) : {name:s}'.format(**o)
# download link from email
s = '''<div class="ifttt_movie" id="{id:s}">
<button onclick="document.getElementById('{id:s}').outerHTML = '';">X</button>
<a href="{link:s}">{name:s}</a><br>'''.format(**o)
if site.status_code == 200: # 200 = OK
# parse the HTML response
tree = html.fromstring(site.text)
# find the IMDb bot post
div = tree.xpath("//a[contains(.,'MovieGuide')]/../..")
if len(div):
s += html.tostring(div[0])
score = re_imdb.findall(s)
if len(score):
o['rating'] = score[0]
o['html'] = s+'</div>'
return o
if __name__ == '__main__':
email = '/home/admin/Documents/Python/IPython/ifttt_email.txt'
path = email.rsplit('/',1)
if len(path) == 1:
# get local directory
raise NotImplemented, 'Need to import os.path'
else:
output = path[0]+'/ifttt_movies.html'
debug = path[0]+'/ifttt_debug.txt'
s = file(email,'r').read()
links = re.findall('(.*)\n\n(http[^\n]+)', s)
print len(links), 'links found'
# process the links
movies = map(lambda l: processMovie(*l), links)
with open(debug, 'wb') as fp:
print >> fp, repr(movies)
# sort by rating
movies.sort(key=itemgetter('rating'), reverse=True)
with open(output,'wb') as fp:
print >> fp, '<html>'+s_header+'<body><ol>'
# ToC
for m in movies:
print >> fp, '<li>IMDb {rating:s} : <a href="#{id:s}">{name:s}</a></li>'.format(**m)
print >> fp, '''</ol>
<button id="btn_content" style="display:none;" onclick="showContent();">Show Content</button>
<button id="btn_source" onclick="showSource();">Show Source</button>
<hr><div id="pagecontent">'''
# MovieGuide posts
for m in movies:
print >> fp, m['html']
print >> fp, '</div><div id="pagesource"></div></body></html>'
# open file in new tab
print 'Opening results in browser: '+output
webbrowser.open('file://'+output,2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment