Skip to content

Instantly share code, notes, and snippets.

@RuyiLi
Last active December 31, 2018 18:22
Show Gist options
  • Save RuyiLi/4ad4a3e6e1c9cdd6f4ae7c570d5f2456 to your computer and use it in GitHub Desktop.
Save RuyiLi/4ad4a3e6e1c9cdd6f4ae7c570d5f2456 to your computer and use it in GitHub Desktop.
Scripts used in my Math IA
import os
import pandas
from scipy import stats
# 3-digit team code
# This code is the same regardless of sport, just change the teams
teams = ['ari', 'atl', 'bal', 'bos', 'chc', 'chw', 'cin', 'cle', 'col', 'det', 'hou', 'kcr', 'laa', 'lad', 'mia', 'mil', 'min', 'nym', 'nyy', 'oak', 'phi', 'pit', 'sd', 'sea', 'sf', 'stl', 'tb', 'tex', 'tor', 'wsn']
indices = [ 'Win', 'Loss', 'Total' ]
overall = []
for team in teams:
print(f'{"=" * 9}\nTEAM: {team.upper()}\n{"=" * 9}')
record = {
'home': {
'wins' : 0,
'losses': 0,
},
'away': {
'wins' : 0,
'losses': 0,
},
}
for file in os.listdir('csv'):
if not file.startswith(team): continue
file = 'csv/' + file
data = pandas.read_csv(file)
for i in range(len(data['Gm#'])):
location = 'away' if data['Unnamed: 4'][i] == '@' else 'home'
win = 'W' in data['W/L'][i]
if win:
record[location]['wins'] += 1
else:
record[location]['losses'] += 1
"""
record['home']['wins'] /= 10
record['home']['losses'] /= 10
record['away']['wins'] /= 10
record['away']['losses'] /= 10
"""
vals = [ record['home']['wins'], record['home']['losses'], record['away']['wins'], record['away']['losses'] ]
a, c, b, d = vals
t_h = round(record['home']['wins'] + record['home']['losses'], 1)
t_a = round(record['away']['wins'] + record['away']['losses'], 1)
t_w = round(record['home']['wins'] + record['away']['wins'], 1)
t_l = round(record['home']['losses'] + record['away']['losses'], 1)
t = t_l + t_w
"""
# t_l + t_w == t_a + t_h)
print('OBSERVED')
print(f'{" " * 10}{"Home":>10}{"Away":>10}{"Total":>10}')
print(f'{"Wins":>10}{a:>10}{b:>10}{t_w:>10}')
print(f'{"Losses":>10}{c:>10}{d:>10}{t_l:>10}')
print(f'{"Total":>10}{t_h:>10}{t_a:>10}{t:>10}')
# Expected Values
print('EXPECTED')
print(f'{" " * 10}{"Home":>10}{"Away":>10}{"Total":>10}')
print(f'{"Wins":>10}{a:>10}{b:>10}{t_w:>10}')
print(f'{"Losses":>10}{c:>10}{d:>10}{t_l:>10}')
print(f'{"Total":>10}{t_h:>10}{t_a:>10}{t:>10}')
"""
e_v = [ t_w * t_a / t, t_l * t_a / t ] * 2
observed = pandas.DataFrame({
'Home' : pandas.Series([ a, c ], index=['OWin', 'OLoss']),
'Away' : pandas.Series([ b, d ], index=['OWin', 'OLoss']),
'Total' : pandas.Series([ t_w, t_l ], index=['OWin', 'OLoss']),
})
expected = pandas.DataFrame({
'Home' : pandas.Series([ e_v[0], e_v[1], t_h ], index=['EWin', 'ELoss', 'Totals']),
'Away' : pandas.Series([ e_v[0], e_v[1], t_a ], index=['EWin', 'ELoss', 'Totals']),
'Total' : pandas.Series([ t_w, t_l, t ], index=['EWin', 'ELoss', 'Totals']),
})
"""
=============================================
We don't need this cuz SciPy does this for us
=============================================
# Chi-Squared Value
def c_c(i):
return (expected[i] - vals[i]) ** 2 / expected[i]
x_2 = sum([ c_c(i) for i in range(4) ])
print('Chi-Squared Value:', x_2)
"""
# 1 = 4 - 1 - ddof
results = stats.chisquare(vals, e_v, 2)
print()
print('Chi-Squared Statistic:', results.statistic)
print(f'Probability (DOF = 1): {results.pvalue}%')
print()
phi = (a * d - b * c) / ( (t_w * t_l * t_a * t_h) ** 0.5 )
chi_data = pandas.DataFrame({
'Home': pandas.Series([ results.statistic ], index=['ChiData']),
'Away': pandas.Series([ results.pvalue ], index=['ChiData']),
'Total': pandas.Series([ team ], index=['ChiData']),
'Phi': pandas.Series([ phi ], index=['ChiData']),
})
merged = pandas.concat([ observed, expected, chi_data ], sort=False)
overall.append(chi_data) # we only care about chi data
print(merged)
print()
alldata = pandas.concat(overall)
alldata.to_csv('overall_with_phi.csv')
const p = require('puppeteer');
const { writeFile } = require('fs');
const YEARS = 10;
//const teams = ['ari', 'atl', 'bal', 'bos', 'chc', 'chw', 'cin', 'cle', 'col', 'det', 'hou', 'kcr', 'laa', 'lad', 'mil', 'min', 'nym', 'nyy', 'oak', 'phi', 'pit', 'sd', 'sea', 'sf', 'stl', 'tb', 'tex', 'tor', 'wsn']
const teams = ['fla']; // Miama Marlins renamed themselves. According to teams page, the code was FLA /shrug
//BEFORE 2011 THEY WERE CALLED FLA SO GUESS WHO HAS TO RENAME THAT
const BASE = `https://www.baseball-reference.com/teams`; //2017-schedule-scores.shtml
(async () => {
const browser = await p.launch({ headless: true });
const page = await browser.newPage();
for(const team of teams){
for(let i = 2018; i >= 2018 - YEARS; i--){
const url = `${BASE}/${team.toUpperCase()}/${i}-schedule-scores.shtml`;
console.log(url);
//await page.setViewport({ width: 1920, height: 5000 });
await page.goto(url);
// await page.screenshot({path:'screenshot.png'})
await page.evaluate('document.querySelector(`button[tip="Export table as <br>suitable for use with excel"]`).click()');
let data = await page.$eval('pre#csv_team_schedule', pre => pre.innerHTML);
data = data.slice('<!-- ALREADYCSV --> '.length);
// console.log(data);
console.log(await new Promise((res, rej) => {
writeFile(`${__dirname}/${team}${i}.csv`, data, (err) => {
if(err) return rej(err);
res('Successfully saved.');
});
}));
}
}
await browser.close();
})()
const p = require('puppeteer');
const { writeFile } = require('fs');
const YEARS = 10;
//const teams = ["ATL", "BOS", "NJN", "CHA", "CHI", "CLE", "DAL", "DEN", "DET", "GSW", "HOU", "IND", "LAC", "LAL", "MEM", "MIA", "MIL", "MIN", "NOH", "NYK", "OKC", "ORL", "PHI", "PHO", "POR", "SAC", "SAS", "TOR", "UTA", "WAS"]
const teams = ["SEA"]
/*
Exceptions
NJN 2008-2012, BRK 2013-2018
CHA 2008-2014, CHO 2015-2018
NOH 2008-2013, NOP 2014-2018
SEA 2008, OKC 2009-2018
*/
const BASE = `https://www.basketball-reference.com/teams`;
(async () => {
const browser = await p.launch({ headless: true });
const page = await browser.newPage();
for(const team of teams){
//for(let i = 2018; i >= 2018 - YEARS; i--) {
for(let i = 2008; i >= 2008; i--) {
const url = `${BASE}/${team}/${i}_games.html`;
console.log(url);
//await page.setViewport({ width: 1920, height: 5000 });
await page.goto(url);
// await page.screenshot({path:'screenshot.png'})
await page.evaluate('document.querySelector(`button[tip="Export table as <br>suitable for use with excel"]`).click()');
let data = await page.$eval('pre#csv_games', pre => pre.innerHTML);
data = data.slice('<!-- ALREADYCSV --> '.length);
// console.log(data);
console.log(await new Promise((res, rej) => {
writeFile(`${__dirname}/csv/OKC${i}.csv`, data, (err) => {
if(err) return rej(err);
res('Successfully saved.');
});
}));
}
}
await browser.close();
})()
const p = require('puppeteer');
const { writeFile } = require('fs');
const YEARS = 10;
const teams = ["crd", "atl", "rav", "buf", "car", "chi", "cin", "cle", "dal", "den", "det", "gnb", "htx", "clt", "jax", "kan", "sdg", "ram", "mia", "min", "nwe", "nor", "nyg", "nyj", "rai", "phi", "pit", "sfo", "sea", "tam", "oti", "was"]
const BASE = `https://www.pro-football-reference.com/teams`;
(async () => {
const browser = await p.launch({ headless: true });
const page = await browser.newPage();
for(const team of teams){
for(let i = 2017; i >= 2017 - YEARS; i--){
const url = `${BASE}/${team}/${i}/gamelog/`;
console.log(url);
//await page.setViewport({ width: 1920, height: 5000 });
await page.goto(url);
// await page.screenshot({path:'screenshot.png'})
await page.evaluate('document.querySelector(`button[tip="Export table as <br>suitable for use with excel"]`).click()');
let data = await page.$eval('pre#csv_gamelog' + i, pre => pre.innerHTML);
data = data.slice('<!-- ALREADYCSV --> '.length);
// console.log(data);
console.log(await new Promise((res, rej) => {
writeFile(`${__dirname}/csv/${team}${i}.csv`, data, (err) => {
if(err) return rej(err);
res('Successfully saved.');
});
}));
}
}
await browser.close();
})()
const p = require('puppeteer');
const { writeFile } = require('fs');
const YEARS = 10;
//const teams = ['ana', 'ari', 'bos', 'buf', 'cgy', 'car', 'chi', 'col', 'cbj', 'dal', 'det', 'edm', 'fla', 'lak', 'min', 'mtl', 'nsh', 'njd', 'nyi', 'nyr', 'ott', 'phi', 'pit', 'sjs', 'stl', 'tbl', 'tor', 'van', 'wsh', 'wpg']
const teams = ['atl']
/*
EXCEPTIONS
ARI (Arizona Coyotes)
Renamed from PHX in 2014
WPG (Winnipeg Jets)
Renamed from ATL in 2011
*/
const BASE = `https://www.hockey-reference.com/teams`;
(async () => {
const browser = await p.launch({ headless: true });
const page = await browser.newPage();
for(const team of teams){
//for(let i = 2018; i >= 2018 - YEARS; i--){
for(let i = 2011; i >= 2008; i--){
const url = `${BASE}/${team.toUpperCase()}/${i}_games.html`;
console.log(url);
//await page.setViewport({ width: 1920, height: 5000 });
await page.goto(url);
// await page.screenshot({path:'screenshot.png'})
await page.evaluate('document.querySelector(`button[tip="Export table as <br>suitable for use with excel"]`).click()');
let data = await page.$eval('pre#csv_games', pre => pre.innerHTML);
data = data.slice('<!-- ALREADYCSV --> '.length);
// console.log(data);
console.log(await new Promise((res, rej) => {
writeFile(`${__dirname}/hockey_csv/${team}${i}.csv`, data, (err) => {
if(err) return rej(err);
res('Successfully saved.');
});
}));
}
}
await browser.close();
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment