Last active
December 31, 2018 18:22
-
-
Save RuyiLi/4ad4a3e6e1c9cdd6f4ae7c570d5f2456 to your computer and use it in GitHub Desktop.
Scripts used in my Math IA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas | |
from scipy import stats | |
# 3-digit team code | |
# This code is the same regardless of sport, just change the teams | |
teams = ['ari', 'atl', 'bal', 'bos', 'chc', 'chw', 'cin', 'cle', 'col', 'det', 'hou', 'kcr', 'laa', 'lad', 'mia', 'mil', 'min', 'nym', 'nyy', 'oak', 'phi', 'pit', 'sd', 'sea', 'sf', 'stl', 'tb', 'tex', 'tor', 'wsn'] | |
indices = [ 'Win', 'Loss', 'Total' ] | |
overall = [] | |
for team in teams: | |
print(f'{"=" * 9}\nTEAM: {team.upper()}\n{"=" * 9}') | |
record = { | |
'home': { | |
'wins' : 0, | |
'losses': 0, | |
}, | |
'away': { | |
'wins' : 0, | |
'losses': 0, | |
}, | |
} | |
for file in os.listdir('csv'): | |
if not file.startswith(team): continue | |
file = 'csv/' + file | |
data = pandas.read_csv(file) | |
for i in range(len(data['Gm#'])): | |
location = 'away' if data['Unnamed: 4'][i] == '@' else 'home' | |
win = 'W' in data['W/L'][i] | |
if win: | |
record[location]['wins'] += 1 | |
else: | |
record[location]['losses'] += 1 | |
""" | |
record['home']['wins'] /= 10 | |
record['home']['losses'] /= 10 | |
record['away']['wins'] /= 10 | |
record['away']['losses'] /= 10 | |
""" | |
vals = [ record['home']['wins'], record['home']['losses'], record['away']['wins'], record['away']['losses'] ] | |
a, c, b, d = vals | |
t_h = round(record['home']['wins'] + record['home']['losses'], 1) | |
t_a = round(record['away']['wins'] + record['away']['losses'], 1) | |
t_w = round(record['home']['wins'] + record['away']['wins'], 1) | |
t_l = round(record['home']['losses'] + record['away']['losses'], 1) | |
t = t_l + t_w | |
""" | |
# t_l + t_w == t_a + t_h) | |
print('OBSERVED') | |
print(f'{" " * 10}{"Home":>10}{"Away":>10}{"Total":>10}') | |
print(f'{"Wins":>10}{a:>10}{b:>10}{t_w:>10}') | |
print(f'{"Losses":>10}{c:>10}{d:>10}{t_l:>10}') | |
print(f'{"Total":>10}{t_h:>10}{t_a:>10}{t:>10}') | |
# Expected Values | |
print('EXPECTED') | |
print(f'{" " * 10}{"Home":>10}{"Away":>10}{"Total":>10}') | |
print(f'{"Wins":>10}{a:>10}{b:>10}{t_w:>10}') | |
print(f'{"Losses":>10}{c:>10}{d:>10}{t_l:>10}') | |
print(f'{"Total":>10}{t_h:>10}{t_a:>10}{t:>10}') | |
""" | |
e_v = [ t_w * t_a / t, t_l * t_a / t ] * 2 | |
observed = pandas.DataFrame({ | |
'Home' : pandas.Series([ a, c ], index=['OWin', 'OLoss']), | |
'Away' : pandas.Series([ b, d ], index=['OWin', 'OLoss']), | |
'Total' : pandas.Series([ t_w, t_l ], index=['OWin', 'OLoss']), | |
}) | |
expected = pandas.DataFrame({ | |
'Home' : pandas.Series([ e_v[0], e_v[1], t_h ], index=['EWin', 'ELoss', 'Totals']), | |
'Away' : pandas.Series([ e_v[0], e_v[1], t_a ], index=['EWin', 'ELoss', 'Totals']), | |
'Total' : pandas.Series([ t_w, t_l, t ], index=['EWin', 'ELoss', 'Totals']), | |
}) | |
""" | |
============================================= | |
We don't need this cuz SciPy does this for us | |
============================================= | |
# Chi-Squared Value | |
def c_c(i): | |
return (expected[i] - vals[i]) ** 2 / expected[i] | |
x_2 = sum([ c_c(i) for i in range(4) ]) | |
print('Chi-Squared Value:', x_2) | |
""" | |
# 1 = 4 - 1 - ddof | |
results = stats.chisquare(vals, e_v, 2) | |
print() | |
print('Chi-Squared Statistic:', results.statistic) | |
print(f'Probability (DOF = 1): {results.pvalue}%') | |
print() | |
phi = (a * d - b * c) / ( (t_w * t_l * t_a * t_h) ** 0.5 ) | |
chi_data = pandas.DataFrame({ | |
'Home': pandas.Series([ results.statistic ], index=['ChiData']), | |
'Away': pandas.Series([ results.pvalue ], index=['ChiData']), | |
'Total': pandas.Series([ team ], index=['ChiData']), | |
'Phi': pandas.Series([ phi ], index=['ChiData']), | |
}) | |
merged = pandas.concat([ observed, expected, chi_data ], sort=False) | |
overall.append(chi_data) # we only care about chi data | |
print(merged) | |
print() | |
alldata = pandas.concat(overall) | |
alldata.to_csv('overall_with_phi.csv') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const p = require('puppeteer'); | |
const { writeFile } = require('fs'); | |
const YEARS = 10; | |
//const teams = ['ari', 'atl', 'bal', 'bos', 'chc', 'chw', 'cin', 'cle', 'col', 'det', 'hou', 'kcr', 'laa', 'lad', 'mil', 'min', 'nym', 'nyy', 'oak', 'phi', 'pit', 'sd', 'sea', 'sf', 'stl', 'tb', 'tex', 'tor', 'wsn'] | |
const teams = ['fla']; // Miama Marlins renamed themselves. According to teams page, the code was FLA /shrug | |
//BEFORE 2011 THEY WERE CALLED FLA SO GUESS WHO HAS TO RENAME THAT | |
const BASE = `https://www.baseball-reference.com/teams`; //2017-schedule-scores.shtml | |
(async () => { | |
const browser = await p.launch({ headless: true }); | |
const page = await browser.newPage(); | |
for(const team of teams){ | |
for(let i = 2018; i >= 2018 - YEARS; i--){ | |
const url = `${BASE}/${team.toUpperCase()}/${i}-schedule-scores.shtml`; | |
console.log(url); | |
//await page.setViewport({ width: 1920, height: 5000 }); | |
await page.goto(url); | |
// await page.screenshot({path:'screenshot.png'}) | |
await page.evaluate('document.querySelector(`button[tip="Export table as <br>suitable for use with excel"]`).click()'); | |
let data = await page.$eval('pre#csv_team_schedule', pre => pre.innerHTML); | |
data = data.slice('<!-- ALREADYCSV --> '.length); | |
// console.log(data); | |
console.log(await new Promise((res, rej) => { | |
writeFile(`${__dirname}/${team}${i}.csv`, data, (err) => { | |
if(err) return rej(err); | |
res('Successfully saved.'); | |
}); | |
})); | |
} | |
} | |
await browser.close(); | |
})() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const p = require('puppeteer'); | |
const { writeFile } = require('fs'); | |
const YEARS = 10; | |
//const teams = ["ATL", "BOS", "NJN", "CHA", "CHI", "CLE", "DAL", "DEN", "DET", "GSW", "HOU", "IND", "LAC", "LAL", "MEM", "MIA", "MIL", "MIN", "NOH", "NYK", "OKC", "ORL", "PHI", "PHO", "POR", "SAC", "SAS", "TOR", "UTA", "WAS"] | |
const teams = ["SEA"] | |
/* | |
Exceptions | |
NJN 2008-2012, BRK 2013-2018 | |
CHA 2008-2014, CHO 2015-2018 | |
NOH 2008-2013, NOP 2014-2018 | |
SEA 2008, OKC 2009-2018 | |
*/ | |
const BASE = `https://www.basketball-reference.com/teams`; | |
(async () => { | |
const browser = await p.launch({ headless: true }); | |
const page = await browser.newPage(); | |
for(const team of teams){ | |
//for(let i = 2018; i >= 2018 - YEARS; i--) { | |
for(let i = 2008; i >= 2008; i--) { | |
const url = `${BASE}/${team}/${i}_games.html`; | |
console.log(url); | |
//await page.setViewport({ width: 1920, height: 5000 }); | |
await page.goto(url); | |
// await page.screenshot({path:'screenshot.png'}) | |
await page.evaluate('document.querySelector(`button[tip="Export table as <br>suitable for use with excel"]`).click()'); | |
let data = await page.$eval('pre#csv_games', pre => pre.innerHTML); | |
data = data.slice('<!-- ALREADYCSV --> '.length); | |
// console.log(data); | |
console.log(await new Promise((res, rej) => { | |
writeFile(`${__dirname}/csv/OKC${i}.csv`, data, (err) => { | |
if(err) return rej(err); | |
res('Successfully saved.'); | |
}); | |
})); | |
} | |
} | |
await browser.close(); | |
})() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const p = require('puppeteer'); | |
const { writeFile } = require('fs'); | |
const YEARS = 10; | |
const teams = ["crd", "atl", "rav", "buf", "car", "chi", "cin", "cle", "dal", "den", "det", "gnb", "htx", "clt", "jax", "kan", "sdg", "ram", "mia", "min", "nwe", "nor", "nyg", "nyj", "rai", "phi", "pit", "sfo", "sea", "tam", "oti", "was"] | |
const BASE = `https://www.pro-football-reference.com/teams`; | |
(async () => { | |
const browser = await p.launch({ headless: true }); | |
const page = await browser.newPage(); | |
for(const team of teams){ | |
for(let i = 2017; i >= 2017 - YEARS; i--){ | |
const url = `${BASE}/${team}/${i}/gamelog/`; | |
console.log(url); | |
//await page.setViewport({ width: 1920, height: 5000 }); | |
await page.goto(url); | |
// await page.screenshot({path:'screenshot.png'}) | |
await page.evaluate('document.querySelector(`button[tip="Export table as <br>suitable for use with excel"]`).click()'); | |
let data = await page.$eval('pre#csv_gamelog' + i, pre => pre.innerHTML); | |
data = data.slice('<!-- ALREADYCSV --> '.length); | |
// console.log(data); | |
console.log(await new Promise((res, rej) => { | |
writeFile(`${__dirname}/csv/${team}${i}.csv`, data, (err) => { | |
if(err) return rej(err); | |
res('Successfully saved.'); | |
}); | |
})); | |
} | |
} | |
await browser.close(); | |
})() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const p = require('puppeteer'); | |
const { writeFile } = require('fs'); | |
const YEARS = 10; | |
//const teams = ['ana', 'ari', 'bos', 'buf', 'cgy', 'car', 'chi', 'col', 'cbj', 'dal', 'det', 'edm', 'fla', 'lak', 'min', 'mtl', 'nsh', 'njd', 'nyi', 'nyr', 'ott', 'phi', 'pit', 'sjs', 'stl', 'tbl', 'tor', 'van', 'wsh', 'wpg'] | |
const teams = ['atl'] | |
/* | |
EXCEPTIONS | |
ARI (Arizona Coyotes) | |
Renamed from PHX in 2014 | |
WPG (Winnipeg Jets) | |
Renamed from ATL in 2011 | |
*/ | |
const BASE = `https://www.hockey-reference.com/teams`; | |
(async () => { | |
const browser = await p.launch({ headless: true }); | |
const page = await browser.newPage(); | |
for(const team of teams){ | |
//for(let i = 2018; i >= 2018 - YEARS; i--){ | |
for(let i = 2011; i >= 2008; i--){ | |
const url = `${BASE}/${team.toUpperCase()}/${i}_games.html`; | |
console.log(url); | |
//await page.setViewport({ width: 1920, height: 5000 }); | |
await page.goto(url); | |
// await page.screenshot({path:'screenshot.png'}) | |
await page.evaluate('document.querySelector(`button[tip="Export table as <br>suitable for use with excel"]`).click()'); | |
let data = await page.$eval('pre#csv_games', pre => pre.innerHTML); | |
data = data.slice('<!-- ALREADYCSV --> '.length); | |
// console.log(data); | |
console.log(await new Promise((res, rej) => { | |
writeFile(`${__dirname}/hockey_csv/${team}${i}.csv`, data, (err) => { | |
if(err) return rej(err); | |
res('Successfully saved.'); | |
}); | |
})); | |
} | |
} | |
await browser.close(); | |
})() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment