Instantly share code, notes, and snippets.

Embed
What would you like to do?
Foosball model
## Analyse table football
FILE_MODEL <- "bradley_terry_model/models.stan"
FILE_FUNCTIONS <- "bradley_terry_model/functions.R"
FILE_SETTINGS <- "settings.R"
FILE_SCORES <- "Scores/TV scores.xlsx"
FILE_OUTPUT <- "Scores/TV Ranking %s (after %i games).jpg"
source(FILE_SETTINGS)
source(FILE_FUNCTIONS)
show_scores_info <- function(FILE_SCORES) {
data_scores <- read_data(FILE_SCORES)
n_games <- max(data_scores$game_id)
last_games <- data_scores[game_id >= (n_games - 3)] %>%
dcast(game_id ~ team + member, value.var = "player")
cat(
"\r\n",
sprintf("Last modified datetime: %s\r\n", file.info(FILE_SCORES)$mtime),
sprintf("Total number of games: %i\r\n", max(data_scores$game_id)),
sprintf("Last 3 games:\r\n")
)
print(last_games)
return(data_scores)
}
get_scores <- function(data_scores, file_model, SETTINGS) {
lambda <- run_model(data_scores, file_model, SETTINGS)
sigma <- attr(lambda, "sigma")
score <- create_report(data_scores, lambda)
lambda <- lambda[, intersect(names(lambda), score$player), with = F]
output <- list(lambda = lambda, sigma = sigma, score = score)
return(output)
}
write_output <- function(data_scores, result, SETTINGS) {
timestamp <- strftime(Sys.time(), "%Y-%m-%d")
n_games <- max(data_scores$game_id)
filename <- sprintf(FILE_OUTPUT, timestamp, n_games)
ignore <- SETTINGS$IGNORE(result$score)
jpeg(filename = filename, width = 2200, height = 1100, quality = 72, res = 180)
p <- plot_model(result$lambda, result$score, ignore, print = FALSE)
grid.arrange(p[[1]], p[[2]], ncol = 2)
dev.off()
}
main <- function() {
## read data
data_scores <- show_scores_info(FILE_SCORES)
## create model
result <- get_scores(data_scores, FILE_MODEL, SETTINGS)
## write outcomes
write_output(data_scores, result, SETTINGS)
## post to slack
if(SETTINGS$SLACK_POST) post_slack(filename, "tafelvoetbal", SETTINGS)
}
suppressPackageStartupMessages({
library(data.table)
library(readxl)
library(magrittr)
library(stringr)
library(rstan)
library(ggplot2)
library(scales)
library(gridExtra)
library(RColorBrewer)
library(slackr)
})
# READ DATA
#read_data <- function () {
# data_scores <- fread("scores.csv", sep=",")
# data_scores <- as.data.frame(lapply(data_scores, as.character), stringsAsFactors = F)
# ## to avoid "-" being interpreted as zero in the formula
# data_scores <- replace(data_scores, data_scores == "Jan-Willem", "JW")
# return(data_scores)
#}
read_data <- function(file_data) {
data_orig <- data.table(read_excel(file_data, "Results Seizoen 2"))
data_orig <- data_orig[, c("team1_1", "team1_2", "team2_1", "team2_2",
"score_team_1", "score_team_2")]
data <- data_orig
data_orig[, game_id := .I]
data[, winst := (score_team_2 > score_team_1) + 1]
data[, diff := (score_team_1 - score_team_2)]
data[, score_team_1 := NULL]
data[, score_team_2 := NULL]
data <- melt(data, id.vars = c("game_id", "winst", "diff"),
measure.vars = c("team1_1", "team1_2", "team2_1", "team2_2"),
variable.factor = FALSE,
value.name = "player")
players <- unique(data$player) %>% sort
data[, member := str_extract(variable, "[1-2]$")]
data[, team := str_extract(variable, "^team[1-2]") %>% str_replace("team", "")]
data[, variable := NULL]
data[, player := factor(data$player, labels = players)]
data <- data[!is.na(winst)]
return(data)
}
# MODEL
run_model <- function(data, file_model, SETTINGS) {
players <- unique(data$player)
stan_data <- list(
X = matrix(as.integer(data$player), ncol = 4),
y = rep(1, max(data$game_id)),
p = data[member == 1 & team == 1, diff],
M = length(levels(players)),
N = max(data$game_id),
beta_prior = 5
)
file_model_rds <- gsub("\\.stan$", ".rds", file_model)
if (file.exists(file_model_rds)) {
mod <- readRDS(file_model_rds)
} else {
mod <- stan_model(file_model, model_name = "tv", auto_write = TRUE)
}
model <- stan(file_model, fit = mod, model_name = "tv",
chains = SETTINGS$SAMPLING$N_CHAIN,
thin = SETTINGS$SAMPLING$THIN,
iter = SETTINGS$SAMPLING$ITER,
verbose = FALSE,
data = stan_data)
output <- as.matrix(model)[, 1:stan_data$M]
sigma <- as.matrix(model)[, stan_data$M + 1]
lambda <- data.table(output)
names(lambda) <- sort(levels(data$player))
attr(lambda, "sigma") <- sigma
return(lambda)
}
# ANALYZE RESULT
create_report <- function(data, lambda) {
score <- data[, .(.N, winst = mean(2 - as.integer(team))), by = player][order(player)]
output <- as.matrix(lambda)
output <- output[, colnames(output) %in% score$player]
score[, score := apply(output, 2, function(x) median(x))]
score[, score_sd := apply(output, 2, function(x) sd(x))]
#score[, score_corrected := score * N / (N + 5)]
score[N > 30, score_corrected := score]
score[N <= 30, score_corrected := score * N / (15 + N^2/60)]
score <- score[order(-score_corrected)]
score[N > 5][order(-score), .(rank = .I, player, n_games = N, won = winst, score, score_sd)]
return(score)
}
## DENSITY PLOT
plot_model <- function(lambda, score, ignore = c("David", "Anna", "Winter"), print = TRUE) {
data <- melt(lambda, measure.vars = colnames(lambda),
variable.name = "player", value.name = "score")
players <- data[, .(score = median(score), sd = sd(score)), by = player]
players <- players[order(score)]
data$player <- factor(data$player, levels = rev(as.character(score$player)))
players$player <- factor(players$player, levels = rev(as.character(score$player)))
data_adj <- merge(data, players[, .(player, median = score)], by = "player")
data_adj <- data_adj[!player %in% ignore]
p <- data_adj[player %in% players[sd < .2, player]] %>% ggplot +
geom_violin(aes(x = player, y = score, fill = median), alpha = 1) +
scale_fill_gradient(low = brewer.pal(3,"Dark2")[1],
high = brewer.pal(3,"Dark2")[2]) +
#theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_reverse() +
coord_flip() +
guides(fill = FALSE)
tbl <- score[
!player %in% ignore & score_sd < .2
, .(
player,
N,
winst = sprintf("%4.2f %%", winst * 100),
score = sprintf("%4.2f %%", score * 100),
sd = sprintf("%4.2f %%", score_sd * 100),
score_norm = sprintf("%4.2f %%", score_corrected * 100)
)
]
if(print == TRUE) {
dev.new(width=20, height=11)
grid.arrange(
p,
tableGrob(tbl),
ncol = 2
)
} else {
return(list(p, tableGrob(tbl)))
}
invisible()
}
## PREDICTIONS
predict_model <- function(lambda, data) {
if (class(data) == "character") {
data <- data.table(
game_id = 0,
winst = rep(1, 4),
player = data,
member = c(1, 2, 1, 2),
team = c(1, 1, 2, 2)
)
}
data_wide <- dcast(data, game_id ~ team + member, value.var = "player")
data_pred <- data_wide[, .(dlambda =
(lambda[[`1_1`]] + lambda[[`1_2`]]) -
((lambda[[`2_1`]] + lambda[[`2_2`]]))
), by = "game_id"]
data_pred[, dlambda := dlambda * attr(lambda, "sigma")]
data_pred[, prob := (1 + exp(-dlambda))^-1]
return(data_pred)
}
check_sampling <- function(lambda, ignore = c("David", "Anna")) {
data_lambda <- copy(lambda)
data_lambda <- melt(data_lambda[, id := .I],
id.vars = "id",
variable.name = "player",
value.name = "score")
data_lambda <- merge(data_lambda[, .(id, player, score)],
data_lambda[, .(id = id - 1, player, score_prev = score)],
by = c("id", "player"))
data_lambda <- data_lambda[!player %in% ignore]
p <- list()
p[[1]] <- ggplot(data_lambda) +
geom_line(aes(x = id, y = score)) +
facet_wrap(~ player) +
ggtitle("Traceplot")
p[[2]] <- ggplot(data_lambda) +
geom_jitter(aes(x = score_prev, y = score), alpha = 0.7) +
facet_wrap(~ player) +
geom_abline(intercept = 0, slope = 1, color = "gray") +
coord_equal() +
ggtitle("Autocorrelation plot")
invisible(p)
}
plot_biplot_chances <- function(lambda, ignore = c("David", "Anna")) {
data <- melt(lambda, measure.vars = colnames(lambda),
value.name = "score", variable.name = "player")
data[, ID := seq_along(score), by = player]
data <- data[!player %in% ignore]
ranking <- data[, .(score = median(score)), by = player]
ranking <- ranking[order(score)]
ranking_upside <- ranking[order(-score)]
data_pairwise <- merge(
data[, .(ID, player1 = player, score_x = score)],
data[, .(ID, player2 = player, score_y = score)],
allow.cartesian = TRUE,
by = "ID"
)
data_matrix <- data_pairwise[,
.( prob = mean((score_x - score_y) > 0)),
by = c("player1", "player2")]
data_matrix[, player1 := factor(player1, levels = ranking$player)]
data_matrix[, player2 := factor(player2, levels = ranking_upside$player)]
data_matrix[, label := sprintf("%5.1f%%", prob * 100)]
data_matrix[player1 == player2, label := ""]
data_matrix[player1 == player2, prob := NA]
ggplot(data_matrix) +
geom_tile(aes(x = player2, y = player1, fill=prob)) +
scale_fill_gradient(low = brewer.pal(3,"Dark2")[1],
high = brewer.pal(3,"Dark2")[2],
labels = percent) +
geom_text(aes(x = player2, y = player1, label = label), size=4) +
ggtitle("Chances that Player1 is better than Player2") +
guides(fill=guide_legend("Probability"))
}
post_slack <- function(filename, channel = "tafelvoetbal", SETTINGS) {
slackrSetup(channel = paste0("#", channel),
incoming_webhook_url = SETTINGS$SLACK_WEBHOOK)
Sys.setenv(SLACK_API_TOKEN = SETTINGS$SLACK_TOKEN)
slackrUpload(filename, channels = channel)
}
data {
int<lower=0> M;
int<lower=0> N;
int X[N,4];
int y[N];
}
parameters {
real<lower=0,upper=1> lambda[M];
real<lower=0> sigma;
}
model {
real p[N];
for(i in 1:N) {
p[i] = lambda[X[i,1]] + lambda[X[i,2]] - lambda[X[i,3]] - lambda[X[i,4]];
y[i] ~ bernoulli_logit(p[i] * sigma);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment