Skip to content

Instantly share code, notes, and snippets.

@jilljenn
Last active August 29, 2015 14:13
Show Gist options
  • Save jilljenn/a130e60aa29ccdd7a79f to your computer and use it in GitHub Desktop.
Save jilljenn/a130e60aa29ccdd7a79f to your computer and use it in GitHub Desktop.
Data retriever of Oscars nominees
from urllib.request import urlopen
from urllib.parse import urlencode
from bs4 import BeautifulSoup
import json
import re
# Get your API key at http://developer.rottentomatoes.com
RT_API_KEY = ""
def get_oscars_nominees():
oscars = BeautifulSoup(urlopen('http://oscar.go.com/nominees').read())
best_picture = oscars.find('div', 'nomineeRowContainer')
titles = []
for movie in best_picture.select('.nomineesList li .title'):
titles.append(movie.text)
return titles
def get_rt_data(title):
movie_json = urlopen(
'http://api.rottentomatoes.com/api/public/v1.0/movies.json?'
+ urlencode({'apikey': RT_API_KEY, 'q': title})).read()
for movie in json.loads(movie_json.decode('utf-8'))['movies']:
if movie['year'] >= 2014:
return {'score': movie['ratings']['critics_score'],
'runtime': movie['runtime']}
def get_wiki_release(title):
wiki_json = urlopen(
'https://fr.wikipedia.org/w/api.php?'
+ urlencode({'action': 'opensearch', 'limit': 15, 'namespace': 0,
'format': 'json', 'search': title})).read()
_, names, _, urls = json.loads(wiki_json.decode('utf-8'))
for i, name in enumerate(names):
if len(names) == 1 or 'film' in name:
wiki = BeautifulSoup(urlopen(urls[i]).read())
for line in wiki.select('#mw-content-text ul ul li'):
span = line.find('span')
if (span and 'data-sort-value' in span.attrs
and span.attrs['data-sort-value'] == 'France'):
return line.find('span', 'date-lien').text
print(title, names, urls)
print('Nominations aux Oscars (% Rotten Tomatoes, durée, sortie française) :')
for title in get_oscars_nominees():
safe_title = re.sub(' or \(.*\)', '', title)
data = get_rt_data(safe_title)
release_date = get_wiki_release(safe_title)
assert data is not None
assert release_date is not None
print('- %s (%d %%, %d h %2d, %s)'
% (safe_title, data['score'],
data['runtime'] // 60, data['runtime'] % 60, release_date))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment