Created
December 19, 2018 15:20
-
-
Save vftools/6417d85a681b2ee21d4c012d00c8fb2c to your computer and use it in GitHub Desktop.
Foosball model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Analyse table football | |
FILE_MODEL <- "bradley_terry_model/models.stan" | |
FILE_FUNCTIONS <- "bradley_terry_model/functions.R" | |
FILE_SETTINGS <- "settings.R" | |
FILE_SCORES <- "Scores/TV scores.xlsx" | |
FILE_OUTPUT <- "Scores/TV Ranking %s (after %i games).jpg" | |
source(FILE_SETTINGS) | |
source(FILE_FUNCTIONS) | |
show_scores_info <- function(FILE_SCORES) { | |
data_scores <- read_data(FILE_SCORES) | |
n_games <- max(data_scores$game_id) | |
last_games <- data_scores[game_id >= (n_games - 3)] %>% | |
dcast(game_id ~ team + member, value.var = "player") | |
cat( | |
"\r\n", | |
sprintf("Last modified datetime: %s\r\n", file.info(FILE_SCORES)$mtime), | |
sprintf("Total number of games: %i\r\n", max(data_scores$game_id)), | |
sprintf("Last 3 games:\r\n") | |
) | |
print(last_games) | |
return(data_scores) | |
} | |
get_scores <- function(data_scores, file_model, SETTINGS) { | |
lambda <- run_model(data_scores, file_model, SETTINGS) | |
sigma <- attr(lambda, "sigma") | |
score <- create_report(data_scores, lambda) | |
lambda <- lambda[, intersect(names(lambda), score$player), with = F] | |
output <- list(lambda = lambda, sigma = sigma, score = score) | |
return(output) | |
} | |
write_output <- function(data_scores, result, SETTINGS) { | |
timestamp <- strftime(Sys.time(), "%Y-%m-%d") | |
n_games <- max(data_scores$game_id) | |
filename <- sprintf(FILE_OUTPUT, timestamp, n_games) | |
ignore <- SETTINGS$IGNORE(result$score) | |
jpeg(filename = filename, width = 2200, height = 1100, quality = 72, res = 180) | |
p <- plot_model(result$lambda, result$score, ignore, print = FALSE) | |
grid.arrange(p[[1]], p[[2]], ncol = 2) | |
dev.off() | |
} | |
main <- function() { | |
## read data | |
data_scores <- show_scores_info(FILE_SCORES) | |
## create model | |
result <- get_scores(data_scores, FILE_MODEL, SETTINGS) | |
## write outcomes | |
write_output(data_scores, result, SETTINGS) | |
## post to slack | |
if(SETTINGS$SLACK_POST) post_slack(filename, "tafelvoetbal", SETTINGS) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
suppressPackageStartupMessages({ | |
library(data.table) | |
library(readxl) | |
library(magrittr) | |
library(stringr) | |
library(rstan) | |
library(ggplot2) | |
library(scales) | |
library(gridExtra) | |
library(RColorBrewer) | |
library(slackr) | |
}) | |
# READ DATA | |
#read_data <- function () { | |
# data_scores <- fread("scores.csv", sep=",") | |
# data_scores <- as.data.frame(lapply(data_scores, as.character), stringsAsFactors = F) | |
# ## to avoid "-" being interpreted as zero in the formula | |
# data_scores <- replace(data_scores, data_scores == "Jan-Willem", "JW") | |
# return(data_scores) | |
#} | |
read_data <- function(file_data) { | |
data_orig <- data.table(read_excel(file_data, "Results Seizoen 2")) | |
data_orig <- data_orig[, c("team1_1", "team1_2", "team2_1", "team2_2", | |
"score_team_1", "score_team_2")] | |
data <- data_orig | |
data_orig[, game_id := .I] | |
data[, winst := (score_team_2 > score_team_1) + 1] | |
data[, diff := (score_team_1 - score_team_2)] | |
data[, score_team_1 := NULL] | |
data[, score_team_2 := NULL] | |
data <- melt(data, id.vars = c("game_id", "winst", "diff"), | |
measure.vars = c("team1_1", "team1_2", "team2_1", "team2_2"), | |
variable.factor = FALSE, | |
value.name = "player") | |
players <- unique(data$player) %>% sort | |
data[, member := str_extract(variable, "[1-2]$")] | |
data[, team := str_extract(variable, "^team[1-2]") %>% str_replace("team", "")] | |
data[, variable := NULL] | |
data[, player := factor(data$player, labels = players)] | |
data <- data[!is.na(winst)] | |
return(data) | |
} | |
# MODEL | |
run_model <- function(data, file_model, SETTINGS) { | |
players <- unique(data$player) | |
stan_data <- list( | |
X = matrix(as.integer(data$player), ncol = 4), | |
y = rep(1, max(data$game_id)), | |
p = data[member == 1 & team == 1, diff], | |
M = length(levels(players)), | |
N = max(data$game_id), | |
beta_prior = 5 | |
) | |
file_model_rds <- gsub("\\.stan$", ".rds", file_model) | |
if (file.exists(file_model_rds)) { | |
mod <- readRDS(file_model_rds) | |
} else { | |
mod <- stan_model(file_model, model_name = "tv", auto_write = TRUE) | |
} | |
model <- stan(file_model, fit = mod, model_name = "tv", | |
chains = SETTINGS$SAMPLING$N_CHAIN, | |
thin = SETTINGS$SAMPLING$THIN, | |
iter = SETTINGS$SAMPLING$ITER, | |
verbose = FALSE, | |
data = stan_data) | |
output <- as.matrix(model)[, 1:stan_data$M] | |
sigma <- as.matrix(model)[, stan_data$M + 1] | |
lambda <- data.table(output) | |
names(lambda) <- sort(levels(data$player)) | |
attr(lambda, "sigma") <- sigma | |
return(lambda) | |
} | |
# ANALYZE RESULT | |
create_report <- function(data, lambda) { | |
score <- data[, .(.N, winst = mean(2 - as.integer(team))), by = player][order(player)] | |
output <- as.matrix(lambda) | |
output <- output[, colnames(output) %in% score$player] | |
score[, score := apply(output, 2, function(x) median(x))] | |
score[, score_sd := apply(output, 2, function(x) sd(x))] | |
#score[, score_corrected := score * N / (N + 5)] | |
score[N > 30, score_corrected := score] | |
score[N <= 30, score_corrected := score * N / (15 + N^2/60)] | |
score <- score[order(-score_corrected)] | |
score[N > 5][order(-score), .(rank = .I, player, n_games = N, won = winst, score, score_sd)] | |
return(score) | |
} | |
## DENSITY PLOT | |
plot_model <- function(lambda, score, ignore = c("David", "Anna", "Winter"), print = TRUE) { | |
data <- melt(lambda, measure.vars = colnames(lambda), | |
variable.name = "player", value.name = "score") | |
players <- data[, .(score = median(score), sd = sd(score)), by = player] | |
players <- players[order(score)] | |
data$player <- factor(data$player, levels = rev(as.character(score$player))) | |
players$player <- factor(players$player, levels = rev(as.character(score$player))) | |
data_adj <- merge(data, players[, .(player, median = score)], by = "player") | |
data_adj <- data_adj[!player %in% ignore] | |
p <- data_adj[player %in% players[sd < .2, player]] %>% ggplot + | |
geom_violin(aes(x = player, y = score, fill = median), alpha = 1) + | |
scale_fill_gradient(low = brewer.pal(3,"Dark2")[1], | |
high = brewer.pal(3,"Dark2")[2]) + | |
#theme(axis.text.x = element_text(angle = 45, hjust = 1)) + | |
scale_y_reverse() + | |
coord_flip() + | |
guides(fill = FALSE) | |
tbl <- score[ | |
!player %in% ignore & score_sd < .2 | |
, .( | |
player, | |
N, | |
winst = sprintf("%4.2f %%", winst * 100), | |
score = sprintf("%4.2f %%", score * 100), | |
sd = sprintf("%4.2f %%", score_sd * 100), | |
score_norm = sprintf("%4.2f %%", score_corrected * 100) | |
) | |
] | |
if(print == TRUE) { | |
dev.new(width=20, height=11) | |
grid.arrange( | |
p, | |
tableGrob(tbl), | |
ncol = 2 | |
) | |
} else { | |
return(list(p, tableGrob(tbl))) | |
} | |
invisible() | |
} | |
## PREDICTIONS | |
predict_model <- function(lambda, data) { | |
if (class(data) == "character") { | |
data <- data.table( | |
game_id = 0, | |
winst = rep(1, 4), | |
player = data, | |
member = c(1, 2, 1, 2), | |
team = c(1, 1, 2, 2) | |
) | |
} | |
data_wide <- dcast(data, game_id ~ team + member, value.var = "player") | |
data_pred <- data_wide[, .(dlambda = | |
(lambda[[`1_1`]] + lambda[[`1_2`]]) - | |
((lambda[[`2_1`]] + lambda[[`2_2`]])) | |
), by = "game_id"] | |
data_pred[, dlambda := dlambda * attr(lambda, "sigma")] | |
data_pred[, prob := (1 + exp(-dlambda))^-1] | |
return(data_pred) | |
} | |
check_sampling <- function(lambda, ignore = c("David", "Anna")) { | |
data_lambda <- copy(lambda) | |
data_lambda <- melt(data_lambda[, id := .I], | |
id.vars = "id", | |
variable.name = "player", | |
value.name = "score") | |
data_lambda <- merge(data_lambda[, .(id, player, score)], | |
data_lambda[, .(id = id - 1, player, score_prev = score)], | |
by = c("id", "player")) | |
data_lambda <- data_lambda[!player %in% ignore] | |
p <- list() | |
p[[1]] <- ggplot(data_lambda) + | |
geom_line(aes(x = id, y = score)) + | |
facet_wrap(~ player) + | |
ggtitle("Traceplot") | |
p[[2]] <- ggplot(data_lambda) + | |
geom_jitter(aes(x = score_prev, y = score), alpha = 0.7) + | |
facet_wrap(~ player) + | |
geom_abline(intercept = 0, slope = 1, color = "gray") + | |
coord_equal() + | |
ggtitle("Autocorrelation plot") | |
invisible(p) | |
} | |
plot_biplot_chances <- function(lambda, ignore = c("David", "Anna")) { | |
data <- melt(lambda, measure.vars = colnames(lambda), | |
value.name = "score", variable.name = "player") | |
data[, ID := seq_along(score), by = player] | |
data <- data[!player %in% ignore] | |
ranking <- data[, .(score = median(score)), by = player] | |
ranking <- ranking[order(score)] | |
ranking_upside <- ranking[order(-score)] | |
data_pairwise <- merge( | |
data[, .(ID, player1 = player, score_x = score)], | |
data[, .(ID, player2 = player, score_y = score)], | |
allow.cartesian = TRUE, | |
by = "ID" | |
) | |
data_matrix <- data_pairwise[, | |
.( prob = mean((score_x - score_y) > 0)), | |
by = c("player1", "player2")] | |
data_matrix[, player1 := factor(player1, levels = ranking$player)] | |
data_matrix[, player2 := factor(player2, levels = ranking_upside$player)] | |
data_matrix[, label := sprintf("%5.1f%%", prob * 100)] | |
data_matrix[player1 == player2, label := ""] | |
data_matrix[player1 == player2, prob := NA] | |
ggplot(data_matrix) + | |
geom_tile(aes(x = player2, y = player1, fill=prob)) + | |
scale_fill_gradient(low = brewer.pal(3,"Dark2")[1], | |
high = brewer.pal(3,"Dark2")[2], | |
labels = percent) + | |
geom_text(aes(x = player2, y = player1, label = label), size=4) + | |
ggtitle("Chances that Player1 is better than Player2") + | |
guides(fill=guide_legend("Probability")) | |
} | |
post_slack <- function(filename, channel = "tafelvoetbal", SETTINGS) { | |
slackrSetup(channel = paste0("#", channel), | |
incoming_webhook_url = SETTINGS$SLACK_WEBHOOK) | |
Sys.setenv(SLACK_API_TOKEN = SETTINGS$SLACK_TOKEN) | |
slackrUpload(filename, channels = channel) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data { | |
int<lower=0> M; | |
int<lower=0> N; | |
int X[N,4]; | |
int y[N]; | |
} | |
parameters { | |
real<lower=0,upper=1> lambda[M]; | |
real<lower=0> sigma; | |
} | |
model { | |
real p[N]; | |
for(i in 1:N) { | |
p[i] = lambda[X[i,1]] + lambda[X[i,2]] - lambda[X[i,3]] - lambda[X[i,4]]; | |
y[i] ~ bernoulli_logit(p[i] * sigma); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment