Skip to content

Instantly share code, notes, and snippets.

@CSJCampbell
Created October 17, 2016 14:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CSJCampbell/6823d9e3f1a7ea132c2160d89cd58e03 to your computer and use it in GitHub Desktop.
Save CSJCampbell/6823d9e3f1a7ea132c2160d89cd58e03 to your computer and use it in GitHub Desktop.
###########################################################
#
# Rate My Captain WTC 2015
# csjcampbell
# 2016-10-17
#
# Copyright 2016 CSJCampbell
# This document may be reproduced in whole or part provided
# that this notice is retained.
#
###########################################################
library(WTCTools)
library(dplyr)
setwd("C:/Users/ccampbell/Dropbox/Dev/Blog")
# wtc pairing
pairings <- read.csv(file = "wtc2015-pairings2.csv", stringsAsFactors = FALSE)
bf0 <- "Benjamin Futzi"
bf1 <- "Benjamin Fützi"
bf2 <- "Benjamin Fützi"
# wtc 2015 fix team names
wtc15f0 <- mutate(filter(wtc, year == 2015),
team1 = gsub(pattern = "^Team ", replacement = "", x = team1),
team2 = gsub(pattern = "^Team ", replacement = "", x = team2))
wtc15f0$player1[wtc15f0$player1 == bf2] <- bf0
wtc15f0$player2[wtc15f0$player2 == bf2] <- bf0
# games where pairing recorded
wtc15f <- filter(wtc15f0,
(round == 1 & (team1 == "Ireland Craic" | team2 == "Ireland Craic")) |
(round == 2 & (team1 == "England Lions" | team2 == "England Lions")) |
(round == 3 & (team1 == "USA Stripes" | team2 == "USA Stripes")) |
(round == 4 & (team1 == "Canada Goose" | team2 == "Canada Goose")) |
((round == 5 | round == 6) & (team1 == "Finland Blue" | team2 == "Finland Blue")))
nrow(pairings)
nrow(wtc15f)
wtc15f$teamb <- pairings$TeamB
# number of games won by team B
bwon <- summarise(group_by(wtc15f, round),
wins = sum(TP[team1 == teamb],
as.numeric(!as.logical(TP[team2 == teamb]))))
# Source: local data frame [6 x 2]
#
# round wins
# (int) (dbl)
#1 1 1
#2 2 3
#3 3 3
#4 4 1
#5 5 3
#6 6 3
# ratings
load("rating2015.RData")
head(rating2015$ratings, n = 2)
# pairings
load("pairLookup15.RData")
getMatrixVal(x = pairLookup15,
list1 = "Asphyxious2", list2 = c("Xerxis", "Rasheth"))
# @title Get Values
# @description Extract probability values from data frame
# @param x vector of levels in data[xname]
# @param y vector of levels in data[yname]
# @param data data.frame with columns xname, yname and valname
# @param valname character vector naming of column containing values to extract
# @param xname single character name of column specifying row
# @param yname single character name of column specifying row
# @return vector of length x or y
# @examples
# dat1 <- data.frame(x = letters[1:3], y = letters[3:1], val1 = 2:4, val2 = 3:5)
# getVals(x = "b", y = "b", data = dat1)
## [1] 3
# getVals(x = "b", y = "b", data = dat1, valname = "val2")
## [1] 4
# getVals(x = "b", y = c("b", "c"), data = dat1, valname = c("val1", "val2"))
## [1] 3 0
# getVals(x = c("b", "a"), y = c("b", "c"), data = dat1, valname = c("val1", "val2"))
## [1] 3 3
getVals <- function(x, y,
data, valname = 3,
xname = 1, yname = 2) {
if (missing(x)) { stop("x is missing") }
if (missing(y)) { stop("y is missing") }
if (length(valname) < 1) { stop("valname must be length 1 or more") }
maxlen <- max(length(x), length(y), length(valname))
if (length(x) != length(y) && length(x) != 1L && length(y) != 1L) {
stop("x and y must be length y or length 1")
}
if (length(x) != maxlen) {
x <- rep(x, times = maxlen)
}
if (length(y) != maxlen) {
y <- rep(y, times = maxlen)
}
if (length(valname) != maxlen) {
valname <- rep(valname, times = maxlen)
}
res <- logical(maxlen)
for (ind in seq_len(maxlen)) {
ans <- data[data[[xname]] == x[ind] & data[[yname]] == y[ind], valname[ind]]
if (length(ans) != 1L) { warning(length(ans), " records identified in data: ",
paste(ans, collapse = ", ")) } else {
res[ind] <- ans
}
}
res
}
# register players
playerUnique <- unique(c(pairings$PlayerA, pairings$PlayerB))
playerUnique[!playerUnique %in% rating2015$ratings$Player]
#if (!"Benjamin Futzi" %in% playerUnique) { bf <- NULL }
#bf2 %in% rating2015$ratings$Player
rating2015$ratings$Player[rating2015$ratings$Player == bf2] <- bf0
ratingSub <- rating2015$ratings[rating2015$ratings$Player %in% c(playerUnique, bf2), ]
rownames(ratingSub) <- ratingSub$Player
pairings$RatingA <- ratingSub[pairings$PlayerA, "Rating"]
pairings$RatingB <- ratingSub[pairings$PlayerB, "Rating"]
library(tidyr)
# for permutations
library(gtools)
res <- vector(mode = "list", length = 6)
nsim <- 130
if (!file.exists("res_sim_pairings.RData")) {
system.time(
for (i in 1:6) {
# games for round
teamsi <- select(filter(wtc15f, round == i), -round, -game_id)
# team A
team1isA <- teamsi$teamb != teamsi$team1
teams <- data.frame(
teama = 0,
playera = 0,
winsa = 0,
teamb = teamsi$teamb,
playerb = 0,
winsb = 0,
stringsAsFactors = FALSE)
teams$teama[team1isA] <- teamsi$team1[team1isA]
teams$teama[!team1isA] <- teamsi$team2[!team1isA]
# player a
teams$playera[team1isA] <- teamsi$player1[team1isA]
teams$playera[!team1isA] <- teamsi$player2[!team1isA]
# player b
teams$playerb[!team1isA] <- teamsi$player1[!team1isA]
teams$playerb[team1isA] <- teamsi$player2[team1isA]
# wins a
teams$winsa[team1isA] <- teamsi$TP[team1isA]
teams$winsa[!team1isA] <- as.numeric(!as.logical(teamsi$TP[!team1isA]))
# wins b
teams$winsb[!team1isA] <- teamsi$TP[!team1isA]
teams$winsb[team1isA] <- as.numeric(!as.logical(teamsi$TP[team1isA]))
# a: get possible lists for each player for each round
castersaTemp <- gather(
select(
filter(wtc15f0, player1 %in% teams$playera | player2 %in% teams$playera),
player1, player2, list1, list2),
key = role, value = player, player1:player2)
castersaTemp <- filter(castersaTemp,
player %in% teams$playera)
castersaTemp$list1[castersaTemp$role == "player2"] <- NA
castersaTemp$list2[castersaTemp$role == "player1"] <- NA
castersaTemp <- na.omit(gather(castersaTemp,
key = playernum, value = lista, list1:list2))
lista <- tapply(
X = castersaTemp$lista,
INDEX = castersaTemp$player,
FUN = unique)
# b: get possible lists for each player for each round
castersbTemp <- gather(
select(
filter(wtc15f0, player1 %in% teams$playerb | player2 %in% teams$playerb),
player1, player2, list1, list2),
key = role, value = player, player1:player2)
castersbTemp <- filter(castersbTemp,
player %in% teams$playerb)
castersbTemp$list1[castersbTemp$role == "player2"] <- NA
castersbTemp$list2[castersbTemp$role == "player1"] <- NA
castersbTemp <- na.omit(gather(castersbTemp,
key = playernum, value = listb, list1:list2))
listb <- tapply(
X = castersbTemp$listb,
INDEX = castersbTemp$player,
FUN = unique)
###########################################################################
# calculate outcome probability for each player combination
# for each (120) list combination
combs <- expand.grid(teams[c("playera", "playerb")])
combs$scorea <- ratingSub[levels(combs$playera)[as.numeric(combs$playera)], "Rating"]
combs$scoreb <- ratingSub[levels(combs$playerb)[as.numeric(combs$playerb)], "Rating"]
combs[, c("lista1", "lista2")] <- do.call("rbind", lista[combs$playera])
combs[, c("listb1", "listb2")] <- do.call("rbind", listb[combs$playerb])
combs[, "homeadva11"] <- getMatrixVal(x = pairLookup15,
list1 = combs[, "lista1"], list2 = combs[, "listb1"])
combs[, "homeadva12"] <- getMatrixVal(x = pairLookup15,
list1 = combs[, "lista1"], list2 = combs[, "listb2"])
combs[, "homeadva21"] <- getMatrixVal(x = pairLookup15,
list1 = combs[, "lista2"], list2 = combs[, "listb1"])
combs[, "homeadva22"] <- getMatrixVal(x = pairLookup15,
list1 = combs[, "lista2"], list2 = combs[, "listb2"])
combs[, "proba11"] <- predict(object = rating2015,
newdata = cbind(round = i, combs[c("playera", "playerb")]),
tng = 0, gamma = combs$homeadva11)
combs[, "proba12"] <- predict(object = rating2015,
newdata = cbind(round = i, combs[c("playera", "playerb")]),
tng = 0, gamma = combs$homeadva12)
combs[, "proba21"] <- predict(object = rating2015,
newdata = cbind(round = i, combs[c("playera", "playerb")]),
tng = 0, gamma = combs$homeadva21)
combs[, "proba22"] <- predict(object = rating2015,
newdata = cbind(round = i, combs[c("playera", "playerb")]),
tng = 0, gamma = combs$homeadva22)
###########################################################################
# print which team was advantaged
teamATot <- unlist(summarise(filter(combs, !duplicated(playera)), sum(scorea)))
teamBTot <- unlist(summarise(filter(combs, !duplicated(playerb)), sum(scoreb)))
pc <- 100 * unname((teamATot - teamBTot) / teamATot)
if (pc < -5) {
descr <- ", has lower skill than Team B, "
} else {
if (pc < 5) {
descr <- ", has similar skill to Team B, "
} else {
descr <- ", has greater skill than Team B, "
}
}
cat(paste0("Team A, ", teams$teama[1],
descr, teams$teamb[1],
", (", round(pc), "%)\n"))
###########################################################################
#
###########################################################################
# state combinations
permsa <- teams[, "playera"]
# matrix, 1 row per combination
permsb <- permutations(n = 5, r = 5, v = teams[, "playerb"],
set = TRUE, repeats.allowed = FALSE)
# combine probabilities
probname <- c("proba11", "proba12", "proba21", "proba22")
pn <- c(0, 0.1, 0.5, 0.9, 1)
probs <- matrix(NA_real_, nrow = nrow(permsb), ncol = length(pn),
dimnames = list(NULL, pn))
# randomly pair lists nsim times, then find intervals of probabilities of outcomes
for (pp in seq_len(nrow(permsb))) {
outcome <- numeric(nsim)
for (pcomb in seq_len(nsim)) {
# extract probability values from data frame
prb <- matrix(getVals(x = permsa, y = permsb[pp, ],
data = combs, valname = sample(x = probname, size = 5 * nsim, replace = TRUE),
xname = "playera", yname = "playerb"),
nrow = nsim, ncol = 5, byrow = TRUE)
sim <- cbind(rbinom(n = nsim, size = 1, prob = prb[, 1]),
rbinom(n = nsim, size = 1, prob = prb[, 2]),
rbinom(n = nsim, size = 1, prob = prb[, 3]),
rbinom(n = nsim, size = 1, prob = prb[, 4]),
rbinom(n = nsim, size = 1, prob = prb[, 5]))
outcome[pcomb] <- sum(apply(X = sim,
MARGIN = 1, FUN = function(x) { sum(x) >= 3 })) / nsim
}
probs[pp, ] <- quantile(outcome, probs = pn)
}
###########################################################################
# aggregate
###########################################################################
res[[i]] <- list(teama = teams$teama[1],
teamb = teams$teamb[1],
selecta = teams$playera,
selectb = teams$playerb,
ratinga = ratingSub[teams$playera, "Rating"],
ratingb = ratingSub[teams$playerb, "Rating"],
ranka = 6 - rank(ratingSub[teams$playera, "Rating"]),
rankb = 6 - rank(ratingSub[teams$playerb, "Rating"]),
permsa = permsa,
permsb = permsb,
probs = probs,
combs = combs)
}
)
###############################################################################
save(res, file = "res_sim_pairings.RData")
} else {
load("res_sim_pairings.RData")
}
# which combination was selected
# @examples
# zp <- letters[c(1:3, 5:4)]
# pp <- permutations(n = 5, r = 5, v = zp,
# set = TRUE, repeats.allowed = FALSE)
# whichCombSelected(singleround = list(selectb = zp, permsb = pp))
whichCombSelected <- function(singleround) {
selecta <- singleround$selecta
selectb <- singleround$selectb
permsa <- singleround$permsa
permsb <- singleround$permsb
if (!identical(permsa, selecta)) {
warning("expecting selecta and permsa to be identical") }
which(apply(X = permsb, MARGIN = 1,
FUN = function(x, y) { all(x == y) },
y = selectb))
}
#' @title Remove Plot Elements
#' @descrition Sets non-plot area graphical elements to
#' \code{element_blank}.
#' @param gg a ggplot object
#' @return a ggplot object
#' @examples
#' gg1 <- ggplot(data = mtcars, aes(x = disp, y = mpg)) + geom_point()
#' ggwipe(gg = gg1)
ggwipe <- function(gg) {
gg + #scale_x_continuous(expand = c(0, 0)) +
#scale_y_continuous(expand = c(0, 0)) +
xlab(label = NULL) +
ylab(label = NULL) +
theme(axis.line = element_blank(),
#axis.text.x = element_blank(),
#axis.text.y = element_blank(),
#axis.ticks = element_blank(),
legend.position = "none",
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
plot.background = element_blank())
}
###########################################################
# combinations probability density
library(tidyr)
library(dplyr)
library(ggplot2)
library(grid)
library(viridis)
doOrd <- TRUE
for (i in seq_along(res)) {
# team a
ca <- res[[i]]$permsa
# team b
cb <- res[[i]]$permsb[1, ]
# permutations of team b that a could face
cbdat <- as.data.frame(res[[i]]$permsb)
nn <- length(ca)
# coerce to factor class for plotting
for (cc in seq_along(cbdat)) {
cbdat[[cc]] <- factor(cbdat[[cc]],
levels = res[[i]]$selectb[res[[i]]$rankb])
}
# estimated outcomes
pdat <- cbind(index = seq_len(nrow(res[[i]]$probs)),
as.data.frame(res[[i]]$probs))
colnames(pdat)[-1] <- paste0("q", colnames(pdat)[-1])
# reshape permutations for plotting
cbdatm <- mutate(
gather(cbind(cbdat, index = seq_len(nrow(cbdat))),
key = pick, value = who, -index),
pick = as.integer(factor(pick)),
who = factor(who))
# order permutations from worst for a
if (doOrd) {
ord <- order(pdat$q0.5)
pdat$q0.1 <- pdat$q0.1[ord]
pdat$q0.5 <- pdat$q0.5[ord]
pdat$q0.9 <- pdat$q0.9[ord]
keyrow <- filter(cbdatm, pick == 1)
keyrow$pick <- 0
for (mi in seq_along(ca)) {
cbdatm[seq.int(from = (mi - 1) * nrow(cbdat) + 1, to = mi * nrow(cbdat)), "who"] <-
cbdatm[seq.int(from = (mi - 1) * nrow(cbdat) + 1, to = mi * nrow(cbdat)), "who"][ord]
}
cbdatm <- rbind(keyrow, cbdatm)
nn <- nn + 1
}
# create tiles with probability trace
gp <- ggplot(data = pdat,
aes(x = index, y = q0.5)) +
geom_tile(data = cbdatm,
aes(x = index,
y = (pick + 0.5) / nn, fill = who)) +
geom_line(lwd = 2, color = I("gray")) +
geom_line(aes(y = q0.1), lwd = 1.1, color = I("gray")) +
geom_line(aes(y = q0.9), lwd = 1.1, color = I("gray")) +
geom_line(lwd = 1.2) +
geom_line(aes(y = q0.1), lwd = 0.6) +
geom_line(aes(y = q0.9), lwd = 0.6) +
ggtitle(paste0("Round ", i, ": Probabilty ",
res[[i]]$teama, " beats ", res[[i]]$teamb))
# tidy up appearance
gp2 <- ggwipe(gp) +
scale_x_continuous(breaks = 1:5 * 24 - 12,
labels = gsub(pattern = " ", replacement = "\n", x = cb)) +
scale_fill_manual(values = viridis(5, end = 0.9)) +
theme(plot.margin = unit(c(1, 3, 1, 1), "lines"),
axis.text = element_text(colour = "black"))
# add secondary axis labels
for (ai in seq_along(ca)) {
gp2 <- gp2 + annotation_custom(
grob = textGrob(label = gsub(pattern = " ", replacement = "\n", x = ca)[ai],
hjust = 0, gp = gpar(cex = 0.8)),
ymin = (((nn - length(ca) + 0.5):nn) / nn)[ai], # Vertical position of the textGrob
ymax = (((nn - length(ca) + 0.5):nn) / nn)[ai],
xmin = 123, # Note: The grobs are positioned outside the plot area
xmax = 123)
}
gt <- ggplot_gtable(ggplot_build(gp2))
gt$layout$clip[gt$layout$name == "panel"] <- "off"
# create file
png(file = paste0("probs_round", i, "_",
res[[i]]$teama, "_", res[[i]]$teamb,
ifelse(doOrd, "_ordered", ""), ".png"),
height = 600, width = 600, res = 100)
grid.draw(gt)
dev.off()
}
###########################################################
# captain performance
labs <- matrix(NA, nrow = length(res), ncol = 2)
out <- matrix(NA, nrow = length(res), ncol = 7,
dimnames = list(NULL, c("teama", "teamb", "whichv", "whichq", "min", "max", "med")))
# collate results
for (ii in seq_along(res)) {
out[ii, c("min", "max")] <- range(res[[ii]]$probs[, "0.5"])
out[ii, "med"] <- median(res[[ii]]$probs[, "0.5"])
wc <- whichCombSelected(res[[ii]])
out[ii, "whichv"] <- res[[ii]]$probs[wc, "0.5"]
out[ii, "whichq"] <- which(order(res[[ii]]$probs[, "0.5"]) == wc) / 120
labs[ii, ] <- c(res[[ii]]$teama, res[[ii]]$teamb)
}
png(file = paste0("probs_team_a_wins_and_actual.png"),
height = 600, width = 700, res = 110, pointsize = 14)
par(mar = c(4.5, 7, 3.2, 7), cex.main = 0.9, cex = 0.9)
# new plotting area
plot(0:5 * 20, 1:6, type = "n", ylim = c(0.5, 6.5),
xlab = "Quantile of Predicted Outcomes (%)", ylab = "",
main = paste0("10th, 50th & 90th Percentiles of Probability that Team A beats Team B\n",
"for all Combinations of Pairings, and Selected Pairing"))
# each round
abline(h = 1:6)
points(x = rep(c(10, 50, 90), times = 6),
y = rep(1:6, each = 3), cex = 7, pch = 21, bg = "white")
text(x = rep(c(10, 50, 90), times = 6),
y = rep(1:6, each = 3),
labels = sprintf(fmt = "%.2f", c(t(out[, c("min", "med", "max")]))))
points(x = 100 * out[, "whichq"],
y = 1:6, cex = 7, pch = 21, bg = "#44015422")
par(xpd = TRUE)
# tidy labels
lb <- gsub(pattern = " ", replacement = "\n", x = c(t(labs)))
lb[6] <- "Germany\nDichter & Denker"
text(x = rep(c(-31, 126), times = 6),
y = rep(1:6, each = 2),
labels = lb)
par("xpd" = FALSE)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment