Skip to content

Instantly share code, notes, and snippets.

@willettk
Created October 10, 2014 15:46
Show Gist options
  • Save willettk/8e1356696de72b2105f9 to your computer and use it in GitHub Desktop.
Save willettk/8e1356696de72b2105f9 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup as bs
import requests
import io
# Scrape the Nobel website to get data on what was served at the Nobel banquets.
# Can be turned into word clouds via Tagxedo
def get_result(year):
result = requests.get('http://www.nobelprize.org/ceremonies/menus/menu-%4i.html' % year)
return result
def menus(year=1901):
result = get_result(year)
content = result.content
soup = bs(content)
centered = soup.find_all('div',{'align':'center'})
c0 = centered[0]
menu_str = []
vins_str = []
if year in (1948,1949,1989):
menu = c0.find_all('p')[0]
vins = c0.find_all('p')[2]
elif year > 2008:
menu = c0.find_all('p')[1]
vins = c0.find_all('p')[2]
else:
menu = c0.find_all('p')[0]
vins = c0.find_all('p')[1]
for m in menu.stripped_strings:
menu_str.append(' '.join(m.split()))
for v in vins.stripped_strings:
vins_str.append(' '.join(v.split()))
mstr = ' '.join(menu_str)
vstr = ' '.join(vins_str[1:])
# Data format changes for 2004 and later
# All food in the first child; wines separated
if year > 2003 and year < 2009:
vins = c0.find_all('p')[1:]
vins_str = []
for vv in vins:
for v in vv.stripped_strings:
vins_str.append(' '.join(v.split()))
vstr = ' '.join(vins_str[1:]).split('Translation')[0]
# Food in the second child; wines separated
if year == 2009:
vins = c0.find_all('p')[2:]
vins_str = []
for vv in vins:
for v in vv.stripped_strings:
vins_str.append(' '.join(v.split()))
vstr = ' '.join(vins_str[1:]).split('Translation')[0]
# Food in the first six children, now separated by dish
if year > 2009:
menu = c0.find_all('p')[:6]
menu_str = []
for vv in menu:
for v in vv.stripped_strings:
menu_str.append(' '.join(v.split()))
mstr = ' '.join(menu_str)
vins = c0.find_all('p')[6:]
vins_str = []
for vv in vins:
for v in vv.stripped_strings:
vins_str.append(' '.join(v.split()))
vstr = ' '.join(vins_str[1:]).split('Translation')[0]
return mstr,vstr
def get_all(verbose=False):
years = range(1901,2014,1)
# Banquet not held during WWI or WWII, as well as a couple of others; remove those years
bad_years = range(1914,1920) + range(1939,1945) + [1907,1923,1924]
years_good = filter(lambda y: y not in bad_years, years)
m_all,v_all = [],[]
for y in years_good:
m,v = menus(year=y)
if verbose:
print '\nYear %4i' % y
print 'Menu: %s' % m
print 'Vins: %s' % v
m_all.append(m)
v_all.append(v)
all_menu = ' '.join(m_all)
all_vins = ' '.join(v_all)
# Write to file
with io.open('/Users/willettk/python/data/nobel_menus.txt','w',encoding='utf8') as f:
f.write(all_menu)
with io.open('/Users/willettk/python/data/nobel_vins.txt','w',encoding='utf8') as f:
f.write(all_vins)
return None
if __name__ == "__main__":
get_all()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment