Created
January 22, 2018 07:51
-
-
Save crashish/d64d415d5912c883aa6089154d6ac3de to your computer and use it in GitHub Desktop.
golden globes best comedy/best director overlap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# twitter + insomnia + curiosity = crappy python code | |
# https://twitter.com/criznash/status/950268556544942080 | |
import requests | |
import bs4 | |
comedy = "https://en.wikipedia.org/wiki/Golden_Globe_Award_for_Best_Motion_Picture_%E2%80%93_Musical_or_Comedy" | |
director = "https://en.wikipedia.org/wiki/Golden_Globe_Award_for_Best_Director" | |
d = requests.get(comedy) | |
soup = bs4.BeautifulSoup(d.text, "html.parser") | |
tables = soup.select(".wikitable") | |
pt = {} | |
for table in tables: | |
year = "" | |
for row in table.find_all('tr')[1:]: | |
fields = row.find_all('a', title=True) | |
if len(fields) == 1: | |
continue | |
if 'Golden Globe Awards' in fields[0].attrs['title'] or fields[0].text in ['1951', '1954', '1955']: | |
year = fields[0].text | |
pt[year] = [(fields[1].text+"*", fields[2].text)] | |
else: | |
pt[year].append( (fields[0].text, fields[1].text) ) | |
d2 = requests.get(director) | |
soup2 = bs4.BeautifulSoup(d2.text, "html.parser") | |
tables2 = soup2.select(".wikitable") | |
pt2 = {} | |
for table in tables2: | |
year = "" | |
for row in table.find_all('tr')[1:]: | |
fields = row.find_all('a', title=True) | |
if len(fields) == 1: | |
continue | |
if 'Golden Globe Awards' in fields[0].attrs['title'] or fields[0].text in ['1951', '1954', '1955']: | |
year = fields[0].text | |
pt2[year] = [(fields[2].text, fields[1].text)] | |
else: | |
pt2[year].append( (fields[1].text, fields[0].text) ) | |
res = {} | |
for year in pt: | |
for tup in pt[year]: | |
for tup2 in pt2[year]: | |
if tup2[0] in tup[0]: | |
if year not in res: | |
res[year] = [tup] | |
else: | |
res[year].append(tup) | |
import pprint | |
pprint.pprint(res) | |
print "{}/{}".format(len(res), len(pt)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment