Created
October 17, 2016 14:29
-
-
Save CSJCampbell/6823d9e3f1a7ea132c2160d89cd58e03 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################### | |
# | |
# Rate My Captain WTC 2015 | |
# csjcampbell | |
# 2016-10-17 | |
# | |
# Copyright 2016 CSJCampbell | |
# This document may be reproduced in whole or part provided | |
# that this notice is retained. | |
# | |
########################################################### | |
library(WTCTools) | |
library(dplyr) | |
setwd("C:/Users/ccampbell/Dropbox/Dev/Blog") | |
# wtc pairing | |
pairings <- read.csv(file = "wtc2015-pairings2.csv", stringsAsFactors = FALSE) | |
bf0 <- "Benjamin Futzi" | |
bf1 <- "Benjamin Fützi" | |
bf2 <- "Benjamin Fützi" | |
# wtc 2015 fix team names | |
wtc15f0 <- mutate(filter(wtc, year == 2015), | |
team1 = gsub(pattern = "^Team ", replacement = "", x = team1), | |
team2 = gsub(pattern = "^Team ", replacement = "", x = team2)) | |
wtc15f0$player1[wtc15f0$player1 == bf2] <- bf0 | |
wtc15f0$player2[wtc15f0$player2 == bf2] <- bf0 | |
# games where pairing recorded | |
wtc15f <- filter(wtc15f0, | |
(round == 1 & (team1 == "Ireland Craic" | team2 == "Ireland Craic")) | | |
(round == 2 & (team1 == "England Lions" | team2 == "England Lions")) | | |
(round == 3 & (team1 == "USA Stripes" | team2 == "USA Stripes")) | | |
(round == 4 & (team1 == "Canada Goose" | team2 == "Canada Goose")) | | |
((round == 5 | round == 6) & (team1 == "Finland Blue" | team2 == "Finland Blue"))) | |
nrow(pairings) | |
nrow(wtc15f) | |
wtc15f$teamb <- pairings$TeamB | |
# number of games won by team B | |
bwon <- summarise(group_by(wtc15f, round), | |
wins = sum(TP[team1 == teamb], | |
as.numeric(!as.logical(TP[team2 == teamb])))) | |
# Source: local data frame [6 x 2] | |
# | |
# round wins | |
# (int) (dbl) | |
#1 1 1 | |
#2 2 3 | |
#3 3 3 | |
#4 4 1 | |
#5 5 3 | |
#6 6 3 | |
# ratings | |
load("rating2015.RData") | |
head(rating2015$ratings, n = 2) | |
# pairings | |
load("pairLookup15.RData") | |
getMatrixVal(x = pairLookup15, | |
list1 = "Asphyxious2", list2 = c("Xerxis", "Rasheth")) | |
# @title Get Values | |
# @description Extract probability values from data frame | |
# @param x vector of levels in data[xname] | |
# @param y vector of levels in data[yname] | |
# @param data data.frame with columns xname, yname and valname | |
# @param valname character vector naming of column containing values to extract | |
# @param xname single character name of column specifying row | |
# @param yname single character name of column specifying row | |
# @return vector of length x or y | |
# @examples | |
# dat1 <- data.frame(x = letters[1:3], y = letters[3:1], val1 = 2:4, val2 = 3:5) | |
# getVals(x = "b", y = "b", data = dat1) | |
## [1] 3 | |
# getVals(x = "b", y = "b", data = dat1, valname = "val2") | |
## [1] 4 | |
# getVals(x = "b", y = c("b", "c"), data = dat1, valname = c("val1", "val2")) | |
## [1] 3 0 | |
# getVals(x = c("b", "a"), y = c("b", "c"), data = dat1, valname = c("val1", "val2")) | |
## [1] 3 3 | |
getVals <- function(x, y, | |
data, valname = 3, | |
xname = 1, yname = 2) { | |
if (missing(x)) { stop("x is missing") } | |
if (missing(y)) { stop("y is missing") } | |
if (length(valname) < 1) { stop("valname must be length 1 or more") } | |
maxlen <- max(length(x), length(y), length(valname)) | |
if (length(x) != length(y) && length(x) != 1L && length(y) != 1L) { | |
stop("x and y must be length y or length 1") | |
} | |
if (length(x) != maxlen) { | |
x <- rep(x, times = maxlen) | |
} | |
if (length(y) != maxlen) { | |
y <- rep(y, times = maxlen) | |
} | |
if (length(valname) != maxlen) { | |
valname <- rep(valname, times = maxlen) | |
} | |
res <- logical(maxlen) | |
for (ind in seq_len(maxlen)) { | |
ans <- data[data[[xname]] == x[ind] & data[[yname]] == y[ind], valname[ind]] | |
if (length(ans) != 1L) { warning(length(ans), " records identified in data: ", | |
paste(ans, collapse = ", ")) } else { | |
res[ind] <- ans | |
} | |
} | |
res | |
} | |
# register players | |
playerUnique <- unique(c(pairings$PlayerA, pairings$PlayerB)) | |
playerUnique[!playerUnique %in% rating2015$ratings$Player] | |
#if (!"Benjamin Futzi" %in% playerUnique) { bf <- NULL } | |
#bf2 %in% rating2015$ratings$Player | |
rating2015$ratings$Player[rating2015$ratings$Player == bf2] <- bf0 | |
ratingSub <- rating2015$ratings[rating2015$ratings$Player %in% c(playerUnique, bf2), ] | |
rownames(ratingSub) <- ratingSub$Player | |
pairings$RatingA <- ratingSub[pairings$PlayerA, "Rating"] | |
pairings$RatingB <- ratingSub[pairings$PlayerB, "Rating"] | |
library(tidyr) | |
# for permutations | |
library(gtools) | |
res <- vector(mode = "list", length = 6) | |
nsim <- 130 | |
if (!file.exists("res_sim_pairings.RData")) { | |
system.time( | |
for (i in 1:6) { | |
# games for round | |
teamsi <- select(filter(wtc15f, round == i), -round, -game_id) | |
# team A | |
team1isA <- teamsi$teamb != teamsi$team1 | |
teams <- data.frame( | |
teama = 0, | |
playera = 0, | |
winsa = 0, | |
teamb = teamsi$teamb, | |
playerb = 0, | |
winsb = 0, | |
stringsAsFactors = FALSE) | |
teams$teama[team1isA] <- teamsi$team1[team1isA] | |
teams$teama[!team1isA] <- teamsi$team2[!team1isA] | |
# player a | |
teams$playera[team1isA] <- teamsi$player1[team1isA] | |
teams$playera[!team1isA] <- teamsi$player2[!team1isA] | |
# player b | |
teams$playerb[!team1isA] <- teamsi$player1[!team1isA] | |
teams$playerb[team1isA] <- teamsi$player2[team1isA] | |
# wins a | |
teams$winsa[team1isA] <- teamsi$TP[team1isA] | |
teams$winsa[!team1isA] <- as.numeric(!as.logical(teamsi$TP[!team1isA])) | |
# wins b | |
teams$winsb[!team1isA] <- teamsi$TP[!team1isA] | |
teams$winsb[team1isA] <- as.numeric(!as.logical(teamsi$TP[team1isA])) | |
# a: get possible lists for each player for each round | |
castersaTemp <- gather( | |
select( | |
filter(wtc15f0, player1 %in% teams$playera | player2 %in% teams$playera), | |
player1, player2, list1, list2), | |
key = role, value = player, player1:player2) | |
castersaTemp <- filter(castersaTemp, | |
player %in% teams$playera) | |
castersaTemp$list1[castersaTemp$role == "player2"] <- NA | |
castersaTemp$list2[castersaTemp$role == "player1"] <- NA | |
castersaTemp <- na.omit(gather(castersaTemp, | |
key = playernum, value = lista, list1:list2)) | |
lista <- tapply( | |
X = castersaTemp$lista, | |
INDEX = castersaTemp$player, | |
FUN = unique) | |
# b: get possible lists for each player for each round | |
castersbTemp <- gather( | |
select( | |
filter(wtc15f0, player1 %in% teams$playerb | player2 %in% teams$playerb), | |
player1, player2, list1, list2), | |
key = role, value = player, player1:player2) | |
castersbTemp <- filter(castersbTemp, | |
player %in% teams$playerb) | |
castersbTemp$list1[castersbTemp$role == "player2"] <- NA | |
castersbTemp$list2[castersbTemp$role == "player1"] <- NA | |
castersbTemp <- na.omit(gather(castersbTemp, | |
key = playernum, value = listb, list1:list2)) | |
listb <- tapply( | |
X = castersbTemp$listb, | |
INDEX = castersbTemp$player, | |
FUN = unique) | |
########################################################################### | |
# calculate outcome probability for each player combination | |
# for each (120) list combination | |
combs <- expand.grid(teams[c("playera", "playerb")]) | |
combs$scorea <- ratingSub[levels(combs$playera)[as.numeric(combs$playera)], "Rating"] | |
combs$scoreb <- ratingSub[levels(combs$playerb)[as.numeric(combs$playerb)], "Rating"] | |
combs[, c("lista1", "lista2")] <- do.call("rbind", lista[combs$playera]) | |
combs[, c("listb1", "listb2")] <- do.call("rbind", listb[combs$playerb]) | |
combs[, "homeadva11"] <- getMatrixVal(x = pairLookup15, | |
list1 = combs[, "lista1"], list2 = combs[, "listb1"]) | |
combs[, "homeadva12"] <- getMatrixVal(x = pairLookup15, | |
list1 = combs[, "lista1"], list2 = combs[, "listb2"]) | |
combs[, "homeadva21"] <- getMatrixVal(x = pairLookup15, | |
list1 = combs[, "lista2"], list2 = combs[, "listb1"]) | |
combs[, "homeadva22"] <- getMatrixVal(x = pairLookup15, | |
list1 = combs[, "lista2"], list2 = combs[, "listb2"]) | |
combs[, "proba11"] <- predict(object = rating2015, | |
newdata = cbind(round = i, combs[c("playera", "playerb")]), | |
tng = 0, gamma = combs$homeadva11) | |
combs[, "proba12"] <- predict(object = rating2015, | |
newdata = cbind(round = i, combs[c("playera", "playerb")]), | |
tng = 0, gamma = combs$homeadva12) | |
combs[, "proba21"] <- predict(object = rating2015, | |
newdata = cbind(round = i, combs[c("playera", "playerb")]), | |
tng = 0, gamma = combs$homeadva21) | |
combs[, "proba22"] <- predict(object = rating2015, | |
newdata = cbind(round = i, combs[c("playera", "playerb")]), | |
tng = 0, gamma = combs$homeadva22) | |
########################################################################### | |
# print which team was advantaged | |
teamATot <- unlist(summarise(filter(combs, !duplicated(playera)), sum(scorea))) | |
teamBTot <- unlist(summarise(filter(combs, !duplicated(playerb)), sum(scoreb))) | |
pc <- 100 * unname((teamATot - teamBTot) / teamATot) | |
if (pc < -5) { | |
descr <- ", has lower skill than Team B, " | |
} else { | |
if (pc < 5) { | |
descr <- ", has similar skill to Team B, " | |
} else { | |
descr <- ", has greater skill than Team B, " | |
} | |
} | |
cat(paste0("Team A, ", teams$teama[1], | |
descr, teams$teamb[1], | |
", (", round(pc), "%)\n")) | |
########################################################################### | |
# | |
########################################################################### | |
# state combinations | |
permsa <- teams[, "playera"] | |
# matrix, 1 row per combination | |
permsb <- permutations(n = 5, r = 5, v = teams[, "playerb"], | |
set = TRUE, repeats.allowed = FALSE) | |
# combine probabilities | |
probname <- c("proba11", "proba12", "proba21", "proba22") | |
pn <- c(0, 0.1, 0.5, 0.9, 1) | |
probs <- matrix(NA_real_, nrow = nrow(permsb), ncol = length(pn), | |
dimnames = list(NULL, pn)) | |
# randomly pair lists nsim times, then find intervals of probabilities of outcomes | |
for (pp in seq_len(nrow(permsb))) { | |
outcome <- numeric(nsim) | |
for (pcomb in seq_len(nsim)) { | |
# extract probability values from data frame | |
prb <- matrix(getVals(x = permsa, y = permsb[pp, ], | |
data = combs, valname = sample(x = probname, size = 5 * nsim, replace = TRUE), | |
xname = "playera", yname = "playerb"), | |
nrow = nsim, ncol = 5, byrow = TRUE) | |
sim <- cbind(rbinom(n = nsim, size = 1, prob = prb[, 1]), | |
rbinom(n = nsim, size = 1, prob = prb[, 2]), | |
rbinom(n = nsim, size = 1, prob = prb[, 3]), | |
rbinom(n = nsim, size = 1, prob = prb[, 4]), | |
rbinom(n = nsim, size = 1, prob = prb[, 5])) | |
outcome[pcomb] <- sum(apply(X = sim, | |
MARGIN = 1, FUN = function(x) { sum(x) >= 3 })) / nsim | |
} | |
probs[pp, ] <- quantile(outcome, probs = pn) | |
} | |
########################################################################### | |
# aggregate | |
########################################################################### | |
res[[i]] <- list(teama = teams$teama[1], | |
teamb = teams$teamb[1], | |
selecta = teams$playera, | |
selectb = teams$playerb, | |
ratinga = ratingSub[teams$playera, "Rating"], | |
ratingb = ratingSub[teams$playerb, "Rating"], | |
ranka = 6 - rank(ratingSub[teams$playera, "Rating"]), | |
rankb = 6 - rank(ratingSub[teams$playerb, "Rating"]), | |
permsa = permsa, | |
permsb = permsb, | |
probs = probs, | |
combs = combs) | |
} | |
) | |
############################################################################### | |
save(res, file = "res_sim_pairings.RData") | |
} else { | |
load("res_sim_pairings.RData") | |
} | |
# which combination was selected | |
# @examples | |
# zp <- letters[c(1:3, 5:4)] | |
# pp <- permutations(n = 5, r = 5, v = zp, | |
# set = TRUE, repeats.allowed = FALSE) | |
# whichCombSelected(singleround = list(selectb = zp, permsb = pp)) | |
whichCombSelected <- function(singleround) { | |
selecta <- singleround$selecta | |
selectb <- singleround$selectb | |
permsa <- singleround$permsa | |
permsb <- singleround$permsb | |
if (!identical(permsa, selecta)) { | |
warning("expecting selecta and permsa to be identical") } | |
which(apply(X = permsb, MARGIN = 1, | |
FUN = function(x, y) { all(x == y) }, | |
y = selectb)) | |
} | |
#' @title Remove Plot Elements | |
#' @descrition Sets non-plot area graphical elements to | |
#' \code{element_blank}. | |
#' @param gg a ggplot object | |
#' @return a ggplot object | |
#' @examples | |
#' gg1 <- ggplot(data = mtcars, aes(x = disp, y = mpg)) + geom_point() | |
#' ggwipe(gg = gg1) | |
ggwipe <- function(gg) { | |
gg + #scale_x_continuous(expand = c(0, 0)) + | |
#scale_y_continuous(expand = c(0, 0)) + | |
xlab(label = NULL) + | |
ylab(label = NULL) + | |
theme(axis.line = element_blank(), | |
#axis.text.x = element_blank(), | |
#axis.text.y = element_blank(), | |
#axis.ticks = element_blank(), | |
legend.position = "none", | |
panel.grid.minor = element_blank(), | |
panel.grid.major = element_blank(), | |
panel.background = element_blank(), | |
plot.background = element_blank()) | |
} | |
########################################################### | |
# combinations probability density | |
library(tidyr) | |
library(dplyr) | |
library(ggplot2) | |
library(grid) | |
library(viridis) | |
doOrd <- TRUE | |
for (i in seq_along(res)) { | |
# team a | |
ca <- res[[i]]$permsa | |
# team b | |
cb <- res[[i]]$permsb[1, ] | |
# permutations of team b that a could face | |
cbdat <- as.data.frame(res[[i]]$permsb) | |
nn <- length(ca) | |
# coerce to factor class for plotting | |
for (cc in seq_along(cbdat)) { | |
cbdat[[cc]] <- factor(cbdat[[cc]], | |
levels = res[[i]]$selectb[res[[i]]$rankb]) | |
} | |
# estimated outcomes | |
pdat <- cbind(index = seq_len(nrow(res[[i]]$probs)), | |
as.data.frame(res[[i]]$probs)) | |
colnames(pdat)[-1] <- paste0("q", colnames(pdat)[-1]) | |
# reshape permutations for plotting | |
cbdatm <- mutate( | |
gather(cbind(cbdat, index = seq_len(nrow(cbdat))), | |
key = pick, value = who, -index), | |
pick = as.integer(factor(pick)), | |
who = factor(who)) | |
# order permutations from worst for a | |
if (doOrd) { | |
ord <- order(pdat$q0.5) | |
pdat$q0.1 <- pdat$q0.1[ord] | |
pdat$q0.5 <- pdat$q0.5[ord] | |
pdat$q0.9 <- pdat$q0.9[ord] | |
keyrow <- filter(cbdatm, pick == 1) | |
keyrow$pick <- 0 | |
for (mi in seq_along(ca)) { | |
cbdatm[seq.int(from = (mi - 1) * nrow(cbdat) + 1, to = mi * nrow(cbdat)), "who"] <- | |
cbdatm[seq.int(from = (mi - 1) * nrow(cbdat) + 1, to = mi * nrow(cbdat)), "who"][ord] | |
} | |
cbdatm <- rbind(keyrow, cbdatm) | |
nn <- nn + 1 | |
} | |
# create tiles with probability trace | |
gp <- ggplot(data = pdat, | |
aes(x = index, y = q0.5)) + | |
geom_tile(data = cbdatm, | |
aes(x = index, | |
y = (pick + 0.5) / nn, fill = who)) + | |
geom_line(lwd = 2, color = I("gray")) + | |
geom_line(aes(y = q0.1), lwd = 1.1, color = I("gray")) + | |
geom_line(aes(y = q0.9), lwd = 1.1, color = I("gray")) + | |
geom_line(lwd = 1.2) + | |
geom_line(aes(y = q0.1), lwd = 0.6) + | |
geom_line(aes(y = q0.9), lwd = 0.6) + | |
ggtitle(paste0("Round ", i, ": Probabilty ", | |
res[[i]]$teama, " beats ", res[[i]]$teamb)) | |
# tidy up appearance | |
gp2 <- ggwipe(gp) + | |
scale_x_continuous(breaks = 1:5 * 24 - 12, | |
labels = gsub(pattern = " ", replacement = "\n", x = cb)) + | |
scale_fill_manual(values = viridis(5, end = 0.9)) + | |
theme(plot.margin = unit(c(1, 3, 1, 1), "lines"), | |
axis.text = element_text(colour = "black")) | |
# add secondary axis labels | |
for (ai in seq_along(ca)) { | |
gp2 <- gp2 + annotation_custom( | |
grob = textGrob(label = gsub(pattern = " ", replacement = "\n", x = ca)[ai], | |
hjust = 0, gp = gpar(cex = 0.8)), | |
ymin = (((nn - length(ca) + 0.5):nn) / nn)[ai], # Vertical position of the textGrob | |
ymax = (((nn - length(ca) + 0.5):nn) / nn)[ai], | |
xmin = 123, # Note: The grobs are positioned outside the plot area | |
xmax = 123) | |
} | |
gt <- ggplot_gtable(ggplot_build(gp2)) | |
gt$layout$clip[gt$layout$name == "panel"] <- "off" | |
# create file | |
png(file = paste0("probs_round", i, "_", | |
res[[i]]$teama, "_", res[[i]]$teamb, | |
ifelse(doOrd, "_ordered", ""), ".png"), | |
height = 600, width = 600, res = 100) | |
grid.draw(gt) | |
dev.off() | |
} | |
########################################################### | |
# captain performance | |
labs <- matrix(NA, nrow = length(res), ncol = 2) | |
out <- matrix(NA, nrow = length(res), ncol = 7, | |
dimnames = list(NULL, c("teama", "teamb", "whichv", "whichq", "min", "max", "med"))) | |
# collate results | |
for (ii in seq_along(res)) { | |
out[ii, c("min", "max")] <- range(res[[ii]]$probs[, "0.5"]) | |
out[ii, "med"] <- median(res[[ii]]$probs[, "0.5"]) | |
wc <- whichCombSelected(res[[ii]]) | |
out[ii, "whichv"] <- res[[ii]]$probs[wc, "0.5"] | |
out[ii, "whichq"] <- which(order(res[[ii]]$probs[, "0.5"]) == wc) / 120 | |
labs[ii, ] <- c(res[[ii]]$teama, res[[ii]]$teamb) | |
} | |
png(file = paste0("probs_team_a_wins_and_actual.png"), | |
height = 600, width = 700, res = 110, pointsize = 14) | |
par(mar = c(4.5, 7, 3.2, 7), cex.main = 0.9, cex = 0.9) | |
# new plotting area | |
plot(0:5 * 20, 1:6, type = "n", ylim = c(0.5, 6.5), | |
xlab = "Quantile of Predicted Outcomes (%)", ylab = "", | |
main = paste0("10th, 50th & 90th Percentiles of Probability that Team A beats Team B\n", | |
"for all Combinations of Pairings, and Selected Pairing")) | |
# each round | |
abline(h = 1:6) | |
points(x = rep(c(10, 50, 90), times = 6), | |
y = rep(1:6, each = 3), cex = 7, pch = 21, bg = "white") | |
text(x = rep(c(10, 50, 90), times = 6), | |
y = rep(1:6, each = 3), | |
labels = sprintf(fmt = "%.2f", c(t(out[, c("min", "med", "max")])))) | |
points(x = 100 * out[, "whichq"], | |
y = 1:6, cex = 7, pch = 21, bg = "#44015422") | |
par(xpd = TRUE) | |
# tidy labels | |
lb <- gsub(pattern = " ", replacement = "\n", x = c(t(labs))) | |
lb[6] <- "Germany\nDichter & Denker" | |
text(x = rep(c(-31, 126), times = 6), | |
y = rep(1:6, each = 2), | |
labels = lb) | |
par("xpd" = FALSE) | |
dev.off() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment