Skip to content

Instantly share code, notes, and snippets.

@graywh
Created December 20, 2010 16:43
Show Gist options
  • Save graywh/748609 to your computer and use it in GitHub Desktop.
Save graywh/748609 to your computer and use it in GitHub Desktop.
Fantasy Football League Stats
Team 1 Team 2 Team 3 Team 4 Team 5 Team 6 Team 7 Team 8 Team 9 Team 10 Team 11 Team 12 Team 13 Team 14 Team 15 Team 16
2 2 2 1 1 1 2 2 1 1 2 2 2 1 1 1
probWins <- function(probs, wins=length(probs)) {
if (length(probs) == 0)
return(1)
p <- 0
if (wins > 0)
p <- probs[1] * probWins(probs[-1], wins - 1)
if (wins < length(probs))
p <- p + (1 - probs[1]) * probWins(probs[-1], wins)
names(p) <- NULL
p
}
winsDistribution <- function(points) {
w <- nrow(points)
probs <- expectedWins(points)
dist <- 1
for (i in 1:w) {
x <- probs[,i]
dist <- cbind((1-x) * dist, 0) + cbind(0, x * dist)
}
rownames(dist) <- colnames(points)
colnames(dist) <- as.character(0:w)
dist
}
expectedWins <- function(points) {
round((apply(points, 1, rank) - 1) / (ncol(points) - 1), 4)
}
gamesData <- function(points, schedule, divisions=NULL) {
l <- ncol(points)
w <- nrow(points)
week <- rep(1:w, l)
team <- factor(rep(colnames(points), each=w))
pts <- c(points)
rank <- c(t(apply(-points, 1, rank)))
opp <- c(schedule[1:w,])
map <- (opp - 1) * w + week
opp.team <- team[map]
opp.pts <- pts[map]
opp.rank <- rank[map]
diff.pts <- pts - opp.pts
ratio <- round(pts / opp.pts, 2)
#diff.rank <- opp.rank - rank
worl <- factor(c("Win","Loss","Tie")[2 - (diff.pts > 0) + (diff.pts == 0)])
if (!is.null(divisions)) {
div <- rep(divisions, each=w)
divg <- div == div[map]
data.frame(week,team,pts,rank,opp.team,opp.pts,opp.rank,diff.pts,ratio,worl,divg)
} else {
data.frame(week,team,pts,rank,opp.team,opp.pts,opp.rank,diff.pts,ratio,worl)
}
}
streak <- function(x) {
x <- x[!is.na(x)]
x <- rle(as.character(x))
n <- length(x$lengths)
paste(substr(x$values[n], 1, 1), x$lengths[n], sep="-")
}
order.streak <- function(x) {
i <- (1:length(x))*2
x <- unlist(strsplit(as.character(x), ' '))
n <- as.numeric(x[i])
l <- x[i-1]
t <- c(W = -1, L = 1)
order(t[l] * n)
}
standings <- function(x, divisions=NULL, bye.points=FALSE) {
l <- length(levels(x$team))
if (!bye.points)
x <- x[!is.na(x$opp.team),]
n <- nrow(x)
if (!is.null(divisions)) {
colnames <- c("W","L","T","DW","DL","DT","streak","pts","opp.pts","e.wins","luck")#,"a.rank","a.opp.rank")
} else {
colnames <- c("W","L","T","streak","pts","opp.pts","e.wins","luck")#,"a.rank","a.opp.rank")
}
process <- function(t) {
W <- sum(t$worl == "Win" , na.rm=TRUE)
L <- sum(t$worl == "Loss", na.rm=TRUE)
T <- sum(t$worl == "Tie" , na.rm=TRUE)
if (!is.null(divisions)) {
DW <- sum(t$worl == "Win" & t$divg, na.rm=TRUE)
DL <- sum(t$worl == "Loss" & t$divg, na.rm=TRUE)
DT <- sum(t$worl == "Tie" & t$divg, na.rm=TRUE)
} else {
DW <- DL <- DT <- NULL
}
strk <- streak(t$worl)
pts <- sum(t$pts)
opp.pts <- sum(t$opp.pts, na.rm=TRUE)
e.wins <- (n - sum(t$rank)) / (l - 1)
luck <- W+T/2 - e.wins + (nrow(t)-W-L-T)/2
#a.rank <- mean(t$rank)
#a.opp.rank <- mean(t$opp.rank, na.rm=TRUE)
structure(as.data.frame(c(c(W,L,T, DW,DL,DT), list(strk), round(c(pts,opp.pts,e.wins,luck), 3))), names=colnames) # ,a.rank,a.opp.rank
}
result <- do.call(rbind, lapply(split(x, x$team), process))
rownames(result) <- levels(x$team)
if (!is.null(divisions)) {
dw <- do.call(rbind, structure(by(result, divisions[rownames(result)], function(x) { order.standings(x, division=TRUE)[1,] }), dimnames=NULL))
other <- result[!(rownames(result) %in% rownames(dw)),]
x <- rbind(order.standings(dw), order.standings(other))
} else {
x <- order.standings(result)
}
format.standings(x)
}
order.standings <- function(x, division=FALSE) {
if (division) {
as.data.frame(order2(x, c("W","L","DW","DL","pts"), decreasing=c(TRUE,FALSE,TRUE,FALSE,TRUE)))
} else {
as.data.frame(order2(x, c("W","L","pts"), decreasing=c(TRUE,FALSE,TRUE)))
}
}
format.standings <- function(x) {
x[,c("W","L","T")] <- list(do.call(paste, c(x[,c("W","L","T")], sep="-")), NULL, NULL)
colnames(x)[colnames(x) == "W"] <- "W-L-T"
if (all(c("DW","DL","DT") %in% colnames(x))) {
x[,c("DW","DL","DT")] <- list(do.call(paste, c(x[,c("DW","DL","DT")], sep="-")), NULL, NULL)
colnames(x)[colnames(x) == "DW"] <- "Div"
}
x
}
unprojected <- function(x, p) {
wins <- x$worl == "Win"
loss <- x$worl == "Loss"
ties <- x$worl == "Tie"
wins <- summary(x[wins,][p[wins,]$worl != "Win",]$team)
loss <- summary(x[loss,][p[loss,]$worl != "Loss",]$team)
ties <- summary(x[ties,][p[ties,]$worl != "Tie",]$team)
perf <- table(x$team, x$opp.pts < p$opp.pts)
colnames(perf) <- c("opp.up", "opp.dn")
cbind(luck2=wins-loss, W.U=wins, L.U=loss, T.U=ties, perf)
}
order2 <- function(x, indices, na.last=TRUE, decreasing=FALSE) {
len <- length(indices)
len2 <- length(na.last)
len3 <- length(decreasing)
if (len < len3)
decreasing <- decreasing[1:len]
if (len > len3)
decreasing[(len3 + 1) : len] <- FALSE
if (len < len2)
na.last <- na.last[1:len]
if (len > len2)
na.last[(len2 + 1) : len] <- TRUE
for (i in len:1) {
x <- x[order(x[,indices[i]], na.last=na.last[i], decreasing=decreasing[i]),]
}
x
}
plotExpectedWins <- function(points) {
l <- ncol(points)
w <- nrow(points)
plot(c(0,w), c(0,w), type="n", xlab="Week", ylab="E[Number of Wins]", main="Expected Number of Wins", ylim=c(0,w), xlim=c(0,w), las=1)
colors <- 1:6
style <- rep(1:6, each=length(colors))
matlines(0:w, rbind(0, apply(expectedWins(points), 1, cumsum)), type='b', col=colors, lty=style, lwd=1, pch='°')
legend("topleft", colnames(points), col=colors, lty=style)
}
plotWinsDistribution <- function(points) {
dist <- winsDistribution(points)
l <- ncol(points)
w <- nrow(points)
mx <- max(dist)
par(mfrow=c(1,1))
nr <- ceiling(sqrt(l))
par(mfrow=c(nr, ceiling(l / nr)))
for (i in 1:l) {
barplot(dist[i,], xlim=c(0,w+2), ylim=c(0,mx), main=colnames(points)[i], xlab="E[Wins] Distribution", space=0, xpd=FALSE)
}
par(mfrow=c(1,1))
invisible(dist)
}
schedulePermutations <- function(points, schedule, reps=1000) {
teams <- colnames(points)
l <- length(teams)
wins <- matrix(0, reps, l)
colnames(wins) <- teams
for (i in 1:reps) {
wins[i,] <- with(standings(gamesData(points[,sample(l)], schedule))[teams,], W + T / 2)
}
wins
}
plotWinsDistribution2 <- function(wins) {
l <- ncol(wins)
mx <- max(wins)
par(mfrow=c(1,1))
nr <- ceiling(sqrt(l))
par(mfrow=c(nr, ceiling(l / nr)))
for (i in 1:l) {
hist(wins[,i], xlim=c(0,mx+2), ylim=c(0,1), main=colnames(wins)[i], xlab="Random Win Distribution")
}
par(mfrow=c(1,1))
}
rrt <- function(x, team) {
w <- max(x$week) - min(x$week) + 1
l <- nrow(x) / w
teams <- as.character(x$team[seq(1, l * w, w)])
subset <- x[x$team == team,]
setsub <- x[x$team != team,]
plot(range(subset$pts), range(gd$pts), type='n', xlab="My Scores", ylab="Their Scores", main=team)
abline(0, 1, lty=2, col=8)
points(rep(subset$pts, l-1), setsub$pts, pch=1, cex=0.8)
points(subset$pts, subset$pts, pch=19, cex=0.8, col=heat.colors(l)[subset$rank])
points(subset$pts, subset$opp.pts, pch=1, cex=0.8, col=heat.colors(l)[subset$opp.rank])
}
rrt2 <- function(x, team) {
w1 <- min(x$week)
wN <- max(x$week)
w <- wN - w1 + 1
l <- nrow(x) / w
teams <- as.character(x$team[seq(1, l * w, w)])
subset <- x[x$team == team,]
setsub <- x[x$team != team,]
diffs <- setsub$pts - rep(subset$pts, l-1)
#opp <- apply(outer(subset$opp.team, teams, "=="), 1, which)
plot(c(w1,wN), range(diffs), type='n', xlab="Weeks", ylab="Diffs", main=team)
abline(h=0,lty=2, col=8)
points(rep(w1:wN, l-1), diffs, pch=1, cex=0.8)
points(w1:wN, rep(0, w), pch=18, cex=0.8, col=heat.colors(l)[subset$rank])
points(w1:wN, -subset$diff.pts, pch=1, cex=0.8, col=heat.colors(l)[subset$opp.rank])
}
Team 1 Team 2 Team 3 Team 4 Team 5 Team 6 Team 7 Team 8 Team 9 Team 10 Team 11 Team 12 Team 13 Team 14 Team 15 Team 16
47.16 59.29 62.65 68.22 100.79 66.75 54.84 110.06 43.28 69.56 70.45 74.6 67.83 79.76 70.79 33.2
58.53 64.74 91.5 48.15 102.09 72.09 77.78 114 40.65 96.25 49.71 110.24 78.77 53.87 52.53 47.08
66.95 59.67 73.17 102.35 72.07 53.52 58.25 91.74 51.25 80.57 67.15 65.85 73.71 90.4 82.28 76.57
93.82 74.04 44.82 61.95 134.09 57.37 70.7 69.89 59.2 78.02 77.16 61.22 62.76 92.95 48.65 37.53
78.37 67.55 74.82 52.77 45.27 68.1 62.88 93.38 71.49 114.86 57.8 81.68 73.91 61.72 81.92 81.93
49.63 64.15 55.61 90.36 91 42.08 86.96 73.89 80.77 71.34 54.94 31.35 59.27 74.87 69.9 49.58
47.12 83.52 41.45 69.75 67.97 59.56 84.02 68.33 53.47 107.25 56.69 89.29 53.34 130.86 73.03 66.74
40.45 67.98 39.37 31.05 107.97 41.78 83.95 61.46 67.85 72.7 27.7 94.12 71.41 87.84 62.76 75.64
37.8 85.11 44.17 41.87 92.84 63.15 77.36 73.43 61.9 101.48 27.3 63.56 66.75 98.77 61.58 46
10.4 110.39 83.83 59.4 75.38 108.42 84.92 60 61.26 67.41 58.09 84.89 68.19 101.53 65.69 104.31
58.26 53.77 70.35 51.67 105.88 60.59 103.89 127.87 63.68 96.52 83.4 89.95 89.7 73.66 41.08 87.61
34.3 91.91 39.67 48.13 93.51 100.34 96.99 100.72 62.5 91.23 79.87 59.55 105.39 71.81 62.04 63.01
44.75 63.03 37.73 69.27 88.54 39.19 81.06 81.75 55.91 81.01 72.21 31.08 63.98 89.57 58.47 63.24
\documentclass[]{article}
\usepackage{setspace,relsize,comment,Sweave}
\setlength{\oddsidemargin}{0.35cm}
\setlength{\evensidemargin}{0.35cm}
\setlength{\textwidth}{16.2cm}
\setlength{\topmargin}{-1cm}
\setlength{\headsep}{0cm}
\setlength{\textheight}{23.4cm}
\SweaveOpts{prefix.string=graphics/}
\title{Biostat -- Fantasy Football 2010}
\author{Tatsuki Koyama and Will Gray}
\begin{document}
\maketitle
<<echo=f>>=
library(VICCBiostat)
source('func.r')
how.many.to.display <- 20 # for miscellaneous informtion.
## Data ##
sco <- as.matrix(read.csv('points.csv'))
sch <- as.matrix(read.csv('schedule.csv'))
div <- as.matrix(read.csv('divisions.csv'))[1,]
ord <- order(colnames(sco))
gd <- gamesData(sco, sch, div)
st <- standings(gd, div)
gd <- gd[,!(colnames(gd) %in% 'divg')]
sco <- sco[,ord]
sch <- sch[,ord]
@
\section*{Week 1 to Week 13}
\subsection*{Standings}
<<echo=f>>=
st
@
\newpage
\subsection*{By Points}
<<echo=f>>=
order2(st, c('pts'), decreasing=c(TRUE))
@
\subsection*{By Expected Wins}
<<echo=f>>=
order2(st, c('e.wins','pts'), decreasing=c(TRUE,TRUE))
@
\newpage
\subsection*{By Luck}
<<echo=f>>=
order2(st, c('luck','pts'), decreasing=c(TRUE,TRUE))
@
\subsection*{By Imaginary Defense}
<<echo=f>>=
order2(st, c('opp.pts','pts'), decreasing=c(FALSE,TRUE))
@
<<echo=f>>=
h <- how.many.to.display
wins <- gd[gd$worl == 'Win',]
loss <- gd[gd$worl == 'Loss',]
@
\newpage
\section*{Miscellaneous Information}
\subsection*{The highest points}
<<echo=f>>=
data.frame(gd[with(gd, order(pts, decreasing=TRUE)),][1:h, ], row.names=1:h)
@
\subsection*{The lowest points}
<<echo=f>>=
data.frame(gd[with(gd, order(pts, decreasing=FALSE)),][1:h, ], row.names=1:h)
@
\newpage
\subsection*{The lowest points to win a game}
<<echo=f>>=
data.frame(wins[with(wins, order(pts, decreasing=FALSE)),][1:h, ], row.names=1:h)
@
\subsection*{The highest points to lose a game}
<<echo=f>>=
data.frame(loss[with(loss, order(pts, decreasing=TRUE)),][1:h, ], row.names=1:h)
@
\newpage
\subsection*{The lowest rank to win a game}
<<echo=f>>=
data.frame(wins[with(wins, order(rank, decreasing=TRUE)),][1:h, ], row.names=1:h)
@
\subsection*{The highest rank to lose a game}
<<echo=f>>=
data.frame(loss[with(loss, order(rank, decreasing=FALSE)),][1:h, ], row.names=1:h)
@
\newpage
\subsection*{The most lopsided games by points}
<<echo=f>>=
data.frame(wins[with(wins, order(diff.pts, decreasing=TRUE)),][1:h, ], row.names=1:h)
@
\subsection*{The closest games by points}
<<echo=f>>=
data.frame(wins[with(wins, order(diff.pts, decreasing=FALSE)),][1:h, ], row.names=1:h)
@
\newpage
\subsection*{The most lopsided games by ratio}
<<echo=f>>=
data.frame(wins[with(wins, order(-ratio, decreasing=FALSE)),][1:h, ], row.names=1:h)
@
\subsection*{The closest games by ratio}
<<echo=f>>=
data.frame(wins[with(wins, order(-ratio, decreasing=TRUE)),][1:h, ], row.names=1:h)
@
\begin{center}
<<echo=f, fig=t, height=12, width=8>>=
plotExpectedWins(sco)
@
\subsection*{}
<<echo=f, fig=t, height=12, width=10>>=
plotWinsDistribution(sco)
@
\subsection*{}
<<echo=f, fig=t, height=12, width=8>>=
tplot(gd$pts ~ gd$team, type="db", jit=.2, dist=1.2, col=c(Loss=2,Tie=4,Win=1)[as.character(gd$worl)], las=2, pch=20, cex=.8, panel.first=quote({
abline(h=median(gd$pts, na.rm=TRUE), lty=3, col=gray(.75))
abline(h=quantile(gd$pts, .25, na.rm=TRUE), lty=3, col=gray(.75))
abline(h=quantile(gd$pts, .75, na.rm=TRUE), lty=3, col=gray(.75))
}), main="Points Scored")
@
\subsection*{}
<<echo=f, fig=t, height=12, width=8>>=
tplot(gd$opp.pts ~ gd$team, type="db", jit=.2, dist=1.2, col=c(Loss=2,Tie=4,Win=1)[as.character(gd$worl)], las=2, pch=20, cex=.8, panel.first=quote({
abline(h=median(gd$opp.pts, na.rm=TRUE), lty=3, col=gray(.75))
abline(h=quantile(gd$opp.pts, .25, na.rm=TRUE), lty=3, col=gray(.75))
abline(h=quantile(gd$opp.pts, .75, na.rm=TRUE), lty=3, col=gray(.75))
}), main="Points Against")
@
\subsection*{}
<<echo=f, results=tex>>=
for (team in colnames(sco)) {
postscript(paste("graphics/-",team,".eps",sep=""), width=8, height=8, paper="special", horizontal=FALSE)
rrt2(gd, team)
dev.off()
pdf(paste("graphics/-",team,".pdf",sep=""), width=8, height=8, version="1.1", encoding="default")
rrt2(gd, team)
dev.off()
cat("\\includegraphics{graphics/-",team,"}\n", sep="")
}
@
\end{center}
<<echo=f, fig=f>>=
save(sco, sch, div, gd, st, wins, loss, file=paste('data.Rdata', sep=''))
@
\end{document}
Team 1 Team 2 Team 3 Team 4 Team 5 Team 6 Team 7 Team 8 Team 9 Team 10 Team 11 Team 12 Team 13 Team 14 Team 15 Team 16
4 6 15 1 12 2 9 14 7 11 10 5 16 8 3 13
14 15 10 11 8 7 6 5 13 3 4 16 9 1 2 12
5 10 4 3 1 13 15 16 12 2 14 9 6 11 7 8
16 4 14 2 11 12 10 9 8 7 5 6 15 3 13 1
11 3 2 10 14 15 13 12 16 4 1 8 7 5 6 9
12 13 7 5 4 16 3 11 15 14 8 1 2 10 9 6
8 7 13 14 10 9 2 1 6 5 12 11 3 4 16 15
7 8 12 9 15 14 1 2 4 16 13 3 11 6 5 10
13 12 11 16 6 5 8 7 14 15 3 2 1 9 10 4
3 11 1 15 9 10 12 13 5 6 2 7 8 16 4 14
2 1 8 6 16 4 11 3 10 9 7 13 12 15 14 5
9 14 5 7 3 8 4 6 1 13 16 15 10 2 12 11
6 5 16 13 2 1 14 15 11 12 9 10 4 7 8 3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment