Skip to content

Instantly share code, notes, and snippets.

View bayesball's full-sized avatar

Jim Albert bayesball

View GitHub Profile
@bayesball
bayesball / mathplacement.R
Last active August 29, 2015 14:08
Math Placement data from a 35-question exam adminstered to 500 students
datamatrix <- matrix(c(
1,1,1,0,1,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,1,
1,0,1,1,1,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,1,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,0,1,1,0,0,0,0,
0,1,1,0,1,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,
1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,1,1,0,0,0,1,1,0,1,0,0,0,0,0,
1,0,1,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,
0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,
1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,
1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,
@bayesball
bayesball / plot.prob.home.R
Last active January 28, 2019 03:43
R function to compute and plot the probability a MLB home team wins a game at the end of each inning given a particular lead.
plot.prob.home <- function(year, plot=TRUE){
require(arm)
require(ggplot2)
load.gamelog <- function(season, headers){
download.file(
url=paste("http://www.retrosheet.org/gamelogs/gl", season, ".zip"
, sep="")
, destfile=paste("gl", season, ".zip", sep="")
)
unzip(paste("gl", season, ".zip", sep=""))
@bayesball
bayesball / plot.batting.average.R
Created January 3, 2015 14:10
R function to plot the components of a batting average
plot.batting.average <- function(name, season){
require(Lahman)
require(dplyr)
Name <- unlist(strsplit(name, split=" "))
id <- subset(Master, nameFirst==Name[1] &
nameLast==Name[2])$playerID[1]
data <- subset(Batting, playerID==id & yearID==season)
data <- summarize(data, H=sum(H), AB=sum(AB),
SO=sum(SO), HR=sum(HR))
so.rate <- with(data, SO / AB)
@bayesball
bayesball / graph.game.R
Created January 23, 2015 20:03
R function to plot win probabilities for a specific baseball game
graph.game <- function(d, game.id){
require(ggplot2)
d1 <- subset(d, substr(HALF.INNING, 1, 12) == game.id)
d1$Play <- 1:dim(d1)[1]
yr <- substr(d1[1, "HALF.INNING"], 4, 7)
mo <- substr(d1[1, "HALF.INNING"], 8, 9)
day <- substr(d1[1, "HALF.INNING"], 10, 11)
print(ggplot(d1, aes(Play, P.NEW)) + geom_line() +
ylim(0, 1) + geom_hline(yintercept=0.5, color="red") +
ylab("Probability Home Team Wins") +
@bayesball
bayesball / R markdown file
Last active April 2, 2019 09:53
R functions for computing win probabilities
---
title: "WPA work"
author: "Jim Albert"
date: "January 23, 2015"
output: html_document
---
First source three functions:
* prob.win.game2 computes the win probabilities after each inning
@bayesball
bayesball / batter.matchup.ggplot.R
Last active December 16, 2016 20:23
Finds estimates of true batting averages for all batters against a specific pitcher.
# loading in Retrosheet data for the seasons 1960 through 2013 from my website
load(url("http://bayes.bgsu.edu/baseball/pbp.1960.1979.Rdata"))
load(url("http://bayes.bgsu.edu/baseball/pbp.1980.1999.Rdata"))
load(url("http://bayes.bgsu.edu/baseball/pbp.2000.2013.Rdata"))
batter.matchup.ggplot <- function(Name, graph=TRUE, retroid=FALSE){
# this function assumes the data frames pbp.60.79, pbp.80.99, and pbp.00.13 are
# in the workspace
fit.model <- function(y, n){
require(LearnBayes)
@bayesball
bayesball / helpers.R
Last active August 29, 2015 14:21
Shiny application to plot a trajectory of a home run hitter
plot.trajectory <- function(player){
d <- subset(sluggerdata, Player==player)
p <- ggplot(d, aes(Age, HR/AB)) +
geom_point(size=4, color="red") +
geom_smooth(size=2, color="blue") +
labs(title = player)
print(p)
}
@bayesball
bayesball / Platoon.R
Created June 4, 2015 18:47
R code to construct graphs of handedness and platoon advantage
# Assuming that the retrosheet play-by-play files are available for a particulr
# season
# Function righthand computes the proportion of right-handed
# pitchers and right-handed batters for each inning for a
# particular season
# inputs are the season and the retrosheet data for that season
righthand <- function(season, d){
require(dplyr)
@bayesball
bayesball / platoon.R
Last active April 14, 2019 10:32
R code to estimate platoon ability distribution using random effects model
# code for estimating platoon effects for 2014 season
# one needs Retrosheet play-by-play for this season
# also uses the Master file in the Lahman database to get the player batting sides
# last, one needs a file from fangraphs to compute the weights in the wOBA formula
library(dplyr)
# retrieved wOBA weights from http://www.fangraphs.com/guts.aspx?type=cn
# stored in file fangraphs.csv
fangraphs <- read.csv("~/Dropbox/2014 WORK/situational/fangraphs.csv")
@bayesball
bayesball / attendance.R
Last active August 29, 2015 14:24
R code to look at attendance drops for each team
# load relevant packages
library(dplyr)
library(ggplot2)
# function will download retrosheet game log data for a particular
# season
load.gamelog <- function(season, headers){
download.file(