Skip to content

Instantly share code, notes, and snippets.

@ClarkGoble
Created March 21, 2013 20:03
Show Gist options
  • Save ClarkGoble/5216221 to your computer and use it in GitHub Desktop.
Save ClarkGoble/5216221 to your computer and use it in GitHub Desktop.
Open Safari front window NYT article with the paywall removed.
#!/usr/bin/env python
## opennyt.py
##
## Given a NYT article on the front page this reopens it without the paywall
import sys, os
import time, string, re
import urllib, json
from subprocess import PIPE, Popen
def getsource():
"""
Gets the html source of the front document in safariopen
"""
applescript = """'
tell application "Safari"
get the source of the front document
end tell
'"""
cmd = "osascript -e " + applescript
(out, err) = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True, shell=True).communicate(None)
print err
return out
def scansource():
"""
Scans source of NYT article for title
"""
source = getsource().split("\n")
for s in source:
if "NYT_HEADLINE" in s:
print s
mobj = re.search(r"([\s\w]+)</NYT_", s)
if mobj:
print mobj.group(1)
return mobj.group(1)
else:
print "***"
def safariopen(url):
"""
Opens an url in the current tab in Safari
"""
applescript = """'
tell application "Safari"
set the URL of the front document to "http://%s"
end tell
'"""%url
cmd = "osascript -e " + applescript
print applescript
(out, err) = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True, shell=True).communicate(None)
print err
return out
def parsenewsdiffs(url):
"""
Given a newsdiff's url gets the original nyts url
"""
if "newsdiffs.org" in url:
mobj = re.search(r"(www.nytimes.com.*)", url)
if mobj:
return mobj.group(1)
else:
return None
def google(query):
"""
Gets a Google search result for the term, returning the url from newsdiffs
"""
if (query == None or query == "" or query == " "):
return None
searchurl = urllib.urlencode({'q':query})
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' % searchurl
response= urllib.urlopen(url).read()
data = json.loads(response)
results = data['responseData']['results']
found_url = None
for r in results:
print r['url']
if "http://www.newsdiffs.org" in r['url']:
found_url = parsenewsdiffs(r['url'])
return found_url
def test():
result = google("to save a man's life a muddy tug of war with the earth itself")
print result
print safariopen( result )
def main():
title = scansource()
result = google( title )
safariopen( result )
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment