ClarkGoble/opennyt.py

## opennyt.py
#!/usr/bin/env python

## opennyt.py
##
## Given a NYT article on the front page this reopens it without the paywall


import sys, os
import time, string, re
import urllib, json
from subprocess import PIPE, Popen


def  getsource():
    """
    Gets the html source of the front document in safariopen
    """
    applescript = """'
        tell application "Safari"
            get the source of the front document
        end tell
        '"""
    cmd = "osascript -e " + applescript
    (out, err) = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True, shell=True).communicate(None)
    print err
    return out

def scansource():
    """
    Scans source of NYT article for title
    """

    source = getsource().split("\n")
    for s in source:
            if "NYT_HEADLINE" in s:
                print s
                mobj = re.search(r"([\s\w]+)</NYT_", s)
                if mobj:
                    print mobj.group(1)
                    return mobj.group(1)
                else:
                    print "***"

def safariopen(url):
    """
    Opens an url in the current tab in Safari
    """

    applescript = """'
        tell application "Safari"
                set the URL of the front document to "http://%s"
            end tell
        '"""%url

    cmd = "osascript -e " + applescript
    print applescript

    (out, err) = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True, shell=True).communicate(None)
    print err
    return out

def parsenewsdiffs(url):
    """
    Given a newsdiff's url gets the original nyts url
    """

    if "newsdiffs.org" in url:
        mobj = re.search(r"(www.nytimes.com.*)", url)
        if mobj:
            return mobj.group(1)
    else:
        return None

def google(query):
    """
    Gets a Google search result for the term, returning the url from newsdiffs
    """

    if (query == None or query == "" or query == " "):
        return None

    searchurl = urllib.urlencode({'q':query})
    url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' % searchurl

    response= urllib.urlopen(url).read()
    data = json.loads(response)
    results = data['responseData']['results']

    found_url = None
    for r in results:
        print r['url']
        if "http://www.newsdiffs.org" in r['url']:
            found_url = parsenewsdiffs(r['url'])

    return found_url

def test():
    result = google("to save a man's life a muddy tug of war with the earth itself")
    print result
    print safariopen( result )

def main():
    title =  scansource()
    result = google( title )
    safariopen( result )

if __name__ == '__main__':
        main()
	#!/usr/bin/env python

	## opennyt.py
	##
	## Given a NYT article on the front page this reopens it without the paywall


	import sys, os
	import time, string, re
	import urllib, json
	from subprocess import PIPE, Popen


	def getsource():
	"""
	Gets the html source of the front document in safariopen
	"""
	applescript = """'
	tell application "Safari"
	get the source of the front document
	end tell
	'"""
	cmd = "osascript -e " + applescript
	(out, err) = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True, shell=True).communicate(None)
	print err
	return out

	def scansource():
	"""
	Scans source of NYT article for title
	"""

	source = getsource().split("\n")
	for s in source:
	if "NYT_HEADLINE" in s:
	print s
	mobj = re.search(r"([\s\w]+)</NYT_", s)
	if mobj:
	print mobj.group(1)
	return mobj.group(1)
	else:
	print "***"

	def safariopen(url):
	"""
	Opens an url in the current tab in Safari
	"""

	applescript = """'
	tell application "Safari"
	set the URL of the front document to "http://%s"
	end tell
	'"""%url

	cmd = "osascript -e " + applescript
	print applescript

	(out, err) = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True, shell=True).communicate(None)
	print err
	return out

	def parsenewsdiffs(url):
	"""
	Given a newsdiff's url gets the original nyts url
	"""

	if "newsdiffs.org" in url:
	mobj = re.search(r"(www.nytimes.com.*)", url)
	if mobj:
	return mobj.group(1)
	else:
	return None

	def google(query):
	"""
	Gets a Google search result for the term, returning the url from newsdiffs
	"""

	if (query == None or query == "" or query == " "):
	return None

	searchurl = urllib.urlencode({'q':query})
	url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' % searchurl

	response= urllib.urlopen(url).read()
	data = json.loads(response)
	results = data['responseData']['results']

	found_url = None
	for r in results:
	print r['url']
	if "http://www.newsdiffs.org" in r['url']:
	found_url = parsenewsdiffs(r['url'])

	return found_url

	def test():
	result = google("to save a man's life a muddy tug of war with the earth itself")
	print result
	print safariopen( result )

	def main():
	title = scansource()
	result = google( title )
	safariopen( result )

	if __name__ == '__main__':
	main()