Skip to content

Instantly share code, notes, and snippets.

@MonkmanMH
Last active December 12, 2015 05:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MonkmanMH/4720641 to your computer and use it in GitHub Desktop.
Save MonkmanMH/4720641 to your computer and use it in GitHub Desktop.
MLB runs per game per team
# THE HISTORICAL RECORD - RUNS PER GAME BY TEAM
#
# public Gist of this code can be found at
# https://gist.github.com/MonkmanMH/4720641
#
# data source: Lahman Database
# http://www.seanlahman.com/baseball-archive/statistics/
# 2012 version (1871-2012)
# data table "Teams.csv"
#
# note: requires that code found at
# https://gist.github.com/4702915
# has already been run, creating the data frame "Teams.merge"
#
# select the team you want from the franchID variable in the Teams.merge data frame
# note the use of double "=" !
Team1 <- as.data.frame(subset (Teams.merge, franchID == "SEA"))
#
# then pull first and last years of the analysis and team name fields
firstyear <- Team1$yearID[1]
lastyear <- tail(Team1$yearID, 1)
#
# either one of these lines of code work to pull the team name
team.name <- Team1$name[1]
team.name <- tail(Team1$name, 1)
# now put the team name, years, and the rest of the title text into a single string
title.text <- paste(team.name, firstyear, "-", lastyear,
": Runs scored relative to the league average")
#
# BASIC PLOT
# index
plot(Team1$R_index ~ Team1$yearID,
main=title.text,
type = "l")
#
#
# TREND LINE -- using loess modeling
# references:
# http://princeofslides.blogspot.ca/2011/05/sab-r-metrics-basics-of-loess.html
# http://research.stowers-institute.org/efg/R/Statistics/loess.htm
#
# create new object Runs.LO for loess model
RunIndex.LO <- loess(Team1$R_index ~ Team1$yearID)
RunIndex.LO.predict <- predict(RunIndex.LO)
#
# plot the data, add loess curve
ylim <- c(60,140)
plot(Team1$R_index ~ Team1$yearID,
ylim = ylim,
main = title.text,
xlab = "year", ylab = "index (league average = 100)")
# chart tidying
grid()
# loess predicted value line
lines(Team1$yearID, RunIndex.LO.predict,
lty="solid", col="red", lwd=2)
#
# MODEL VERSION 2 (with more nuance/zig-zag to the trend line)
# create new object RunIndex.LO for loess model, span=0.25 and span=0.5
RunIndex.LO.25 <- loess(Team1$R_index ~ Team1$yearID, span=0.25)
RunIndex.LO.25.predict <- predict(RunIndex.LO.25)
#
RunIndex.LO.5 <- loess(Team1$R_index ~ Team1$yearID, span=0.5)
RunIndex.LO.5.predict <- predict(RunIndex.LO.5)
#
# plot the data, add loess curves
ylim <- c(60,140)
plot(Team1$R_index ~ Team1$yearID,
ylim = ylim,
main = title.text,
xlab = "Year", ylab = "Index (league average = 100)")
# loess predicted value line
lines(Team1$yearID, RunIndex.LO.predict, lty="solid", col="black", lwd=1.5)
lines(Team1$yearID, RunIndex.LO.25.predict, lty="dashed", col="red", lwd=2)
lines(Team1$yearID, RunIndex.LO.5.predict, lty="dotdash", col="blue", lwd=1.5)
# chart legend and grid lines
legend(firstyear+2, 140,
c("default", "span=0.25", "span=0.50"),
lty=c("solid", "dashed", "dotdash"),
col=c("black", "red", "blue"),
lwd=c(1.5, 2, 1.5))
grid()
# straight line at 100
abline(h = 100, lty="dotdash")
#
#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment