Last active
August 29, 2015 14:27
-
-
Save jimaples/a5fd49e036da17cd9488 to your computer and use it in GitHub Desktop.
Lifehack to speed up going through IFTTT emails of new movies posted to Reddit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Lifehack to speed up going through IFTTT emails of new movies posted to Reddit | |
""" | |
import re | |
from itertools import count | |
from operator import itemgetter | |
import requests | |
from lxml import html | |
import webbrowser | |
s_header = '''<head> | |
<title>IFTTT Movies</title> | |
<style type="text/css"> | |
body { | |
font-size: 75%; | |
} | |
.tagline, .flat-list, h3 { | |
display:none; | |
} | |
p { | |
margin: 4px 0; | |
} | |
blockquote { | |
margin: 0 20px; | |
} | |
.ifttt_movie { | |
border-bottom: 1px solid black; | |
} | |
</style> | |
<script type="text/javascript"> | |
function showSource(){ | |
var s = document.getElementById('pagecontent'); | |
var source = s.innerHTML; | |
//now we need to escape the html special chars, javascript has escape | |
//but this does not do what we want | |
source = source.replace(/</g, "<").replace(/>/g, ">"); | |
//now we remove newlines and add <pre> tags to preserve whitespace | |
source = "<pre>"+source.replace(/\\n/g,'')+"</pre>"; | |
//now populate the source div | |
var e = document.getElementById('pagesource'); | |
//1 movie per line | |
e.innerHTML = source.replace(/<div class="ifttt_movie"/g,'\\n<div class="ifttt_movie"'); | |
e.style.display = 'block'; | |
s.style.display = 'none'; | |
document.getElementById('btn_content').style.display = 'block'; | |
document.getElementById('btn_source').style.display = 'none'; | |
return source; | |
} | |
function showContent(){ | |
document.getElementById('pagecontent').style.display = 'block'; | |
document.getElementById('pagesource').style.display = 'none'; | |
document.getElementById('btn_content').style.display = 'none'; | |
document.getElementById('btn_source').style.display = 'block'; | |
} | |
</script> | |
</head>''' | |
re_imdb = re.compile('(?:<strong>)*(\d\.\d)(?:</strong>)*/10') | |
processMovieIdx = count(1) | |
def processMovie(name, link): | |
i = processMovieIdx.next() | |
o = {'name':name,'link':link,'index':i,'rating':'???'} | |
o['id'] = 'ifttt_movie{:03d}'.format(i) | |
# follow the link (user-agent is to get around the bot-catcher | |
site = requests.get(link, headers={'user-agent':'ifttt-email/0.0.'+str(i)}) | |
o['status'] = site.status_code | |
print '{index:2d} ({status:d}) : {name:s}'.format(**o) | |
# download link from email | |
s = '''<div class="ifttt_movie" id="{id:s}"> | |
<button onclick="document.getElementById('{id:s}').outerHTML = '';">X</button> | |
<a href="{link:s}">{name:s}</a><br>'''.format(**o) | |
if site.status_code == 200: # 200 = OK | |
# parse the HTML response | |
tree = html.fromstring(site.text) | |
# find the IMDb bot post | |
div = tree.xpath("//a[contains(.,'MovieGuide')]/../..") | |
if len(div): | |
s += html.tostring(div[0]) | |
score = re_imdb.findall(s) | |
if len(score): | |
o['rating'] = score[0] | |
o['html'] = s+'</div>' | |
return o | |
if __name__ == '__main__': | |
email = '/home/admin/Documents/Python/IPython/ifttt_email.txt' | |
path = email.rsplit('/',1) | |
if len(path) == 1: | |
# get local directory | |
raise NotImplemented, 'Need to import os.path' | |
else: | |
output = path[0]+'/ifttt_movies.html' | |
debug = path[0]+'/ifttt_debug.txt' | |
s = file(email,'r').read() | |
links = re.findall('(.*)\n\n(http[^\n]+)', s) | |
print len(links), 'links found' | |
# process the links | |
movies = map(lambda l: processMovie(*l), links) | |
with open(debug, 'wb') as fp: | |
print >> fp, repr(movies) | |
# sort by rating | |
movies.sort(key=itemgetter('rating'), reverse=True) | |
with open(output,'wb') as fp: | |
print >> fp, '<html>'+s_header+'<body><ol>' | |
# ToC | |
for m in movies: | |
print >> fp, '<li>IMDb {rating:s} : <a href="#{id:s}">{name:s}</a></li>'.format(**m) | |
print >> fp, '''</ol> | |
<button id="btn_content" style="display:none;" onclick="showContent();">Show Content</button> | |
<button id="btn_source" onclick="showSource();">Show Source</button> | |
<hr><div id="pagecontent">''' | |
# MovieGuide posts | |
for m in movies: | |
print >> fp, m['html'] | |
print >> fp, '</div><div id="pagesource"></div></body></html>' | |
# open file in new tab | |
print 'Opening results in browser: '+output | |
webbrowser.open('file://'+output,2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment