Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@shuozhang1985
Created August 22, 2016 00:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save shuozhang1985/388551d7417d5877219462e23ba39f44 to your computer and use it in GitHub Desktop.
Save shuozhang1985/388551d7417d5877219462e23ba39f44 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import urllib2
web='http://www.sports-reference.com/olympics/summer/2012/'
req = urllib2.Request(web)
page = urllib2.urlopen(req)
soup = BeautifulSoup(page, "lxml")
table = soup.find("div", { "class" : "table_container" })
cells=[]
for row in table.findAll("tr"):
result = row.findAll("td")
if len(result) == 6:
dic = {}
dic["Rank"] = result[0].get_text()
dic["Country"] = result[1].get_text()
dic["Gold"] = result[2].get_text()
dic["Silver"] = result[3].get_text()
dic["Bronze"] = result[4].get_text()
dic["Total"] = result[5].get_text()
cells.append(dic)
import pandas as pd
import numpy as np
data=pd.DataFrame(cells)
data['year']=np.repeat(2012,data.shape[0])
data.to_csv(r'output.txt', header=True, index=None, sep=',', mode='a')
webs=['http://www.sports-reference.com/olympics/summer/2008/',\
'http://www.sports-reference.com/olympics/summer/2004/',\
'http://www.sports-reference.com/olympics/summer/2000/',\
'http://www.sports-reference.com/olympics/summer/1996/',\
'http://www.sports-reference.com/olympics/summer/1992/',\
'http://www.sports-reference.com/olympics/summer/1988/',\
'http://www.sports-reference.com/olympics/summer/1984/',\
'http://www.sports-reference.com/olympics/summer/1980/',\
'http://www.sports-reference.com/olympics/summer/1976/',\
'http://www.sports-reference.com/olympics/summer/1972/',\
'http://www.sports-reference.com/olympics/summer/1968/',\
'http://www.sports-reference.com/olympics/summer/1964/',\
'http://www.sports-reference.com/olympics/summer/1960/',\
'http://www.sports-reference.com/olympics/summer/1956/',\
'http://www.sports-reference.com/olympics/summer/1952/',\
'http://www.sports-reference.com/olympics/summer/1948/',\
'http://www.sports-reference.com/olympics/summer/1936/',\
'http://www.sports-reference.com/olympics/summer/1932/',\
'http://www.sports-reference.com/olympics/summer/1928/',\
'http://www.sports-reference.com/olympics/summer/1924/',\
'http://www.sports-reference.com/olympics/summer/1920/',\
'http://www.sports-reference.com/olympics/summer/1912/',\
'http://www.sports-reference.com/olympics/summer/1908/',\
'http://www.sports-reference.com/olympics/summer/1906/',\
'http://www.sports-reference.com/olympics/summer/1904/',\
'http://www.sports-reference.com/olympics/summer/1900/',\
'http://www.sports-reference.com/olympics/summer/1896/']
years=[2008, 2004, 2000, 1996, 1992, 1988, 1984, 1980, 1976, 1972, 1968, 1964, 1960, 1956, 1952,\
1948, 1936, 1932, 1928, 1924, 1920, 1912, 1908, 1906, 1904, 1900, 1896]
for i in range(len(webs)):
req = urllib2.Request(webs[i])
page = urllib2.urlopen(req)
soup = BeautifulSoup(page, "lxml")
table = soup.find("div", { "class" : "table_container" })
cells=[]
for row in table.findAll("tr"):
result = row.findAll("td")
if len(result) == 6:
dic = {}
dic["Rank"] = result[0].get_text()
dic["Country"] = result[1].get_text()
dic["Gold"] = result[2].get_text()
dic["Silver"] = result[3].get_text()
dic["Bronze"] = result[4].get_text()
dic["Total"] = result[5].get_text()
cells.append(dic)
data=pd.DataFrame(cells)
data['year']=np.repeat(years[i],data.shape[0])
data.to_csv(r'output.txt', header=None, index=None, sep=',', mode='a')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment