Glench/tv_tropes_common_tropes.py

## tv_tropes_common_tropes.py
#!/usr/bin/python
# a script to get all the common tropes for media from tv tropes

# usage:
# python tv_tropes_matcher.py name1 name2 [name3...nameN]

# please put names with spaces or special characters in quotes
# you can also pass in the urls if it won't automatch by name.
# pip install pattern
# pip install pyquery
import sys
from pprint import pprint
import re
import urllib
from pattern import web
from pyquery import PyQuery

names = sys.argv[1:]
spider_regex = re.compile(r'[A-Z](To|-)[A-Z]$')
queries = ['#wikitext > ul > li > a:first-child', '#wikitext div > ul > li > a:first-child', '#wikitext > ul > li > ul a:first-child']
trope_urls = {}

def get_tropes_by_url(url):
    page = web.URL(url).download()
    pq_page = PyQuery(page)
    print 'Page title:', pq_page('title').text()
    tropes = set()
    for query in queries:
        if len(tropes) < 3:
            for a in pq_page(query):
                pq_a = PyQuery(a)
                if spider_regex.search(pq_a.attr('href')):
                    tropes = tropes.union(get_tropes_by_url(pq_a.attr('href')))
                else:
                    trope_urls[pq_a.text()] = pq_a.attr('href')
                    tropes.add(pq_a.text())
    return tropes

def get_tropes(name):
    # TODO: turn name into url somehow
    if 'http:' in name:
        url = name
    else:
        url = 'http://www.google.com/search?ie=UTF-8&oe=UTF-8&sourceid=navclient&gfns=1&q={}'.format(urllib.quote('tv tropes ' + name))
    print url
    return get_tropes_by_url(url)

def trope_intersection(tropes1, tropes2):
    return tropes1.intersection(tropes2)

if len(names) > 1:
    common_tropes = reduce(trope_intersection, (get_tropes(name) for name in names))

    if common_tropes:
        print 'Common matches are: {}'.format(len(common_tropes))
        for trope in common_tropes:
            print '\t', trope #, '\t', trope_urls[trope]

    else:
        print 'There are no common tropes!'
else:
    print 'Please enter enter 2 or more shows/movies/books/etc'
    sys.exit(1)
	#!/usr/bin/python
	# a script to get all the common tropes for media from tv tropes

	# usage:
	# python tv_tropes_matcher.py name1 name2 [name3...nameN]

	# please put names with spaces or special characters in quotes
	# you can also pass in the urls if it won't automatch by name.
	# pip install pattern
	# pip install pyquery
	import sys
	from pprint import pprint
	import re
	import urllib
	from pattern import web
	from pyquery import PyQuery

	names = sys.argv[1:]
	spider_regex = re.compile(r'[A-Z](To\|-)[A-Z]$')
	queries = ['#wikitext > ul > li > a:first-child', '#wikitext div > ul > li > a:first-child', '#wikitext > ul > li > ul a:first-child']
	trope_urls = {}

	def get_tropes_by_url(url):
	page = web.URL(url).download()
	pq_page = PyQuery(page)
	print 'Page title:', pq_page('title').text()
	tropes = set()
	for query in queries:
	if len(tropes) < 3:
	for a in pq_page(query):
	pq_a = PyQuery(a)
	if spider_regex.search(pq_a.attr('href')):
	tropes = tropes.union(get_tropes_by_url(pq_a.attr('href')))
	else:
	trope_urls[pq_a.text()] = pq_a.attr('href')
	tropes.add(pq_a.text())
	return tropes

	def get_tropes(name):
	# TODO: turn name into url somehow
	if 'http:' in name:
	url = name
	else:
	url = 'http://www.google.com/search?ie=UTF-8&oe=UTF-8&sourceid=navclient&gfns=1&q={}'.format(urllib.quote('tv tropes ' + name))
	print url
	return get_tropes_by_url(url)

	def trope_intersection(tropes1, tropes2):
	return tropes1.intersection(tropes2)

	if len(names) > 1:
	common_tropes = reduce(trope_intersection, (get_tropes(name) for name in names))

	if common_tropes:
	print 'Common matches are: {}'.format(len(common_tropes))
	for trope in common_tropes:
	print '\t', trope #, '\t', trope_urls[trope]

	else:
	print 'There are no common tropes!'
	else:
	print 'Please enter enter 2 or more shows/movies/books/etc'
	sys.exit(1)