Created
January 29, 2020 02:27
-
-
Save mrakitin/d0bb4b8e60f7e086cc5c429231f096dc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
d = pd.read_html('http://www.espn.com/nfl/superbowl/history/winners', header=1) | |
scores = [] | |
for s in d[0]['RESULT']: | |
parts = s.split(',') | |
scores.append([int(parts[0].split(' ')[-1]), int(parts[1].split(' ')[-1])]) | |
scores = np.array(scores) | |
# sum_scores = scores.sum(axis=1) | |
dates = [] | |
for s in d[0]['DATE']: | |
dates.append(datetime.datetime.strptime(s, '%b. %d, %Y')) | |
df = pd.DataFrame(data=[[d, w, l] for d, (w, l) in zip(dates, scores)], columns=['date', 'winner', 'looser']) | |
df['total'] = df['winner'] + df['looser'] | |
df.plot(x='date', grid=True) |
Author
mrakitin
commented
Jan 29, 2020
In [2]: df.describe()
Out[2]:
winner looser total
count 53.000000 53.000000 53.000000
mean 30.094340 16.132075 46.226415
std 9.859131 7.465402 14.057083
min 13.000000 3.000000 16.000000
25% 23.000000 10.000000 37.000000
50% 30.000000 17.000000 46.000000
75% 35.000000 21.000000 56.000000
max 55.000000 33.000000 75.000000
In [3]: df
Out[3]:
date winner looser total
0 1967-01-15 35 10 45
1 1968-01-14 33 14 47
2 1969-01-12 16 7 23
3 1970-01-11 23 7 30
4 1971-01-17 16 13 29
5 1972-01-16 24 3 27
6 1973-01-14 14 7 21
7 1974-01-13 24 7 31
8 1975-01-12 16 6 22
9 1976-01-18 21 17 38
10 1977-01-09 32 14 46
11 1978-01-15 27 10 37
12 1979-01-21 35 31 66
13 1980-01-20 31 19 50
14 1981-01-25 27 10 37
15 1982-01-24 26 21 47
16 1983-01-30 27 17 44
17 1984-01-22 38 9 47
18 1985-01-20 38 16 54
19 1986-01-26 46 10 56
20 1987-01-25 39 20 59
21 1988-01-31 42 10 52
22 1989-01-22 20 16 36
23 1990-01-28 55 10 65
24 1991-01-27 20 19 39
25 1992-01-26 37 24 61
26 1993-01-31 52 17 69
27 1994-01-30 30 13 43
28 1995-01-29 49 26 75
29 1996-01-28 27 17 44
30 1997-01-26 35 21 56
31 1998-01-25 31 24 55
32 1999-01-31 34 19 53
33 2000-01-30 23 16 39
34 2001-01-28 34 7 41
35 2002-02-03 20 17 37
36 2003-01-26 48 21 69
37 2004-02-01 32 29 61
38 2005-02-06 24 21 45
39 2006-02-05 21 10 31
40 2007-02-04 29 17 46
41 2008-02-03 17 14 31
42 2009-02-01 27 23 50
43 2010-02-07 31 17 48
44 2011-02-06 31 25 56
45 2012-02-05 21 17 38
46 2013-02-03 34 31 65
47 2014-02-02 43 8 51
48 2015-02-01 28 24 52
49 2016-02-07 24 10 34
50 2017-02-05 34 28 62
51 2018-02-04 41 33 74
52 2019-02-03 13 3 16
In [4]:
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment