Skip to content

Instantly share code, notes, and snippets.

@CSJCampbell
Last active September 26, 2017 22:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CSJCampbell/a956f8762aa4fc4c08afef9257a4498d to your computer and use it in GitHub Desktop.
Save CSJCampbell/a956f8762aa4fc4c08afef9257a4498d to your computer and use it in GitHub Desktop.
WTC 2017 Analysis
country Rating
Australia 2445.549203
England 2343.962608
USA 2342.762909
Sweden 2313.943643
Poland 2279.733775
Germany 2263.613631
Canada 2256.044501
Austria 2246.198598
NA 2216.573624
Finland 2213.208031
Italy 2199.732591
Denmark 2181.675218
Slovakia 2151.784601
Ireland 2146.959651
France 2140.708093
United.Nations 2131.291862
Scotland 2118.545808
Malta 2112.417012
Netherlands 2111.028661
Russia 2089.290038
Norway 2078.400923
Hungary 2042.690559
Wales 2039.363156
Greece 2022.818299
Belgium 2014.507336
Spain 2007.025194
Middle.East 2006.221647
Portugal 1995.792035
Latvia 1977.852884
Switzerland 1976.917115
Ukraine 1972.035878
Czech.Republic 1963.651928
China 1909.536554
Northern.Ireland 1904.77978
Slovenia 1800
UAE 1800
# master script to make predictions
library(dplyr)
library(jsonlite)
library(WTCTools)
players2017 <- read_json("World_Team_Championship_2017.json")
# matrix is efficient for data extraction
players17 <- matrix(NA, nrow = length(players2017) * 5,
ncol = 7,
dimnames = list(NULL,
c("team", "player", "faction", "caster1", "caster2", "theme1", "theme2")))
for (i in seq_along(players2017)) {
team <- matrix(NA, nrow = 5, ncol = 7)
dat <- players2017[[i]]
team[, 1] <- dat$name
# extract and transpose
team[, -1] <- t(vapply(dat$players[1:5],
FUN = function(x) c(x$name, x$faction, x$list1$list[[1]], x$list2$list[[1]],
ifelse(is.null(x$list1$theme), yes = "", no = x$list1$theme),
ifelse(is.null(x$list2$theme), yes = "", no = x$list2$theme)),
FUN.VALUE = character(6)))
players17[seq.int(((i - 1) * 5) + 1, i * 5), ] <- team
}
# to data.frame
players17 <- as.data.frame(players17, stringsAsFactors = FALSE)
# check names
players17 <- mutate(players17,
caster1 = harmonizeCasters(caster1),
caster2 = harmonizeCasters(caster2))
# remove nicknames
grep(" \\\".+\\\" ", x = players17$player, value = TRUE)
players17$player <- gsub(" \\\".+\\\" ", x = players17$player, replacement = " ")
players17$player[players17$player == "Anthony O Reilly "] <- "Anthony O'Reilly"
players17$player[players17$player == "Dmtrii Tochenov"] <- "Dmitrii Tochenov"
players17$player[grep(".*Mader", x = players17$player)]
write.csv(players17, file = "players2017.csv", row.names = FALSE)
players17 <- read.csv(file = "players2017.csv", stringsAsFactors = FALSE)
###############################################################################
# get ratings
###############################################################################
# moved to wtc_predictions_2017_player_reg.R
#source("wtc_predictions_2017_update_rating.R")
load("rating2015-reg16.rda")
wtc$allrounds <- wtc$round + (wtc$year - 2013) * 6
###########################################################
# ratings based on pairings with selected caster pairings
rating2016 <- steph(x = select(filter(wtc, year == 2016),
allrounds, player1, player2, TP),
status = rating2015$ratings)
save(rating2016, file = "rating2016.rda")
rankings <- rating2016$ratings
write.csv(rankings, file = "rankings2016.csv", row.names = FALSE)
###########################################################
# register players
source("toPlain.R")
players17$player <- toPlain(players17$player)
players17$player <- gsub(pattern = " \\\".+\\\"",
x = players17$player, replacement = "")
players17$player <- gsub(pattern = " '.+'$",
x = players17$player, replacement = "")
players17$player <- gsub(pattern = " '[A-Za-z ]+' ",
x = players17$player, replacement = " ")
players17$player <- gsub(pattern = "\\\"[A-Za-z ]+(''|\\\" )",
x = players17$player, replacement = " ")
players17$player <- gsub(pattern = " \\(.+\\)$",
x = players17$player, replacement = "")
# remove all caps
ind <- grepl("^[A-Z -]+$", x = players17$player)
players17$player[ind] <- tools::toTitleCase(casefold(players17$player[ind]))
newPlayers <- players17$player[!players17$player %in%
rating2015$ratings$Player]
newPlayers
# replace names in players17 with correct similar names
pNames <- showSimilarNames(c(newPlayers, rating2015$ratings$Player))
# keep matched name pairs
pNames <- pNames[c(10, 11, 13, 14, 15, 16, 17, 20, 21, 32, 33, 34, 35), ]
# align replacements
bool <- pNames[, 2] %in% rating2015$ratings$Player
for (i in seq_along(bool)) if (bool[i]) pNames[i, ] <- rev(pNames[i, ])
# "Georg Buscha" %in% rating2015$ratings$Player
# replace values in second column with values in first
for (i in seq_len(nrow(pNames))) {
players17$player[players17$player == pNames[i, 2]] <- pNames[i, 1]
}
players17$player[players17$player == "kerazoglou nikos"] <- "Kerazoglou Nikolaos"
players17$player[players17$player == "Victor Pons"] <- "Victor Pons Tec"
write.csv(players17, file = "players2017.csv", row.names = FALSE)
players17 <- read.csv(file = "players2017.csv", stringsAsFactors = FALSE)
###########################################################
# check players in rating
p17 <- unique(players17$player)
knownp <- p17[p17 %in% rating2015$ratings$Player]
length(knownp)
unknownp <- p17[!p17 %in% rating2015$ratings$Player]
length(unknownp)
i = 3
pp17 <- grep(pattern = paste0("^", LETTERS[i]), x = unknownp,
value = TRUE)
pp15 <- grep(pattern = paste0("^", LETTERS[i]),
x = rating2015$ratings$Player[!rating2015$ratings$Player %in% knownp],
value = TRUE)
nm <- matrix(NA, nrow = max(length(pp17), length(pp15)), ncol = 3)
nm[seq_along(pp17), 1] <- sort(pp17)
nm[seq_along(pp15), 2] <- sort(pp15)
rownames(nm) <- sort(pp15)
nm[rating2015$ratings$Player[rating2015$ratings$Player %in% pp15], 3] <- rating2015$ratings$country[rating2015$ratings$Player %in% pp15]
rownames(nm) <- NULL
nm
players17$country <- gsub(pattern = "Team ", replacement = "",
x = players17$team)
players17$country <- gsub(pattern = "Epic ", replacement = "",
x = players17$country)
players17$country <- gsub(pattern = "Prime ", replacement = "",
x = players17$country)
players17$country <- gsub(pattern = "Northern ", replacement = "Northern.",
x = players17$country)
players17$country <- gsub(pattern = "United ", replacement = "United.",
x = players17$country)
players17$country <- gsub(pattern = "Middle ", replacement = "Middle.",
x = players17$country)
players17$country <- gsub(pattern = "^.*Czech.*$", replacement = "Czech.Republic",
x = players17$country)
players17$country <- sapply(strsplit(x = players17$country, split = " "), function(x) x[1])
players17$country[players17$country == "United.States"] <- "USA"
sort(unique(players17$country))
library(dplyr)
nationalityRating <- as.data.frame(
summarise(group_by(rating2015$ratings, country),
Rating = quantile(Rating, probs = 0.35)))
write.csv(nationalityRating,
file = "nationalityRating.csv",
quote = FALSE, row.names = FALSE)
players17 <- left_join(x = players17,
y = nationalityRating)
players17 <- left_join(x = players17,
y = transmute(rating2015$ratings,
player = Player, Rating1 = Rating, country = country))
dim(players17)
# set unrated players to have average score
players17$Rating1[is.na(players17$Rating1)] <- players17$Rating[is.na(players17$Rating1)]
sum(is.na(players17$Rating1))
players17$Rating1[is.na(players17$Rating1)] <- 1800
#help(package = "data.table")
# complete ratings object
new <- transmute(filter(players17,
!players17$player %in% rating2015$ratings$Player),
Player = player, Rating = Rating1, Deviation = 200, Games = 0,
Win = 0, Draw = 0, Loss = 0, Lag = 0, country = country)
new <- slice(group_by(new, Player), 1)
# add records to object
rating2015$ratings <- arrange(as.data.frame(rbind(rating2015$ratings, as.data.frame(new))),
Player)
players17 <- gather(players17, key = listno, value = List,
caster1, caster2)
players17 <- rename(players17, Team = team, Player = player)
#uniCasters <- sort(unique(players17$List))
#pairlookup16 <- matrix(0, nrow = length(casters),
# ncol = length(casters), dimnames = list(casters, casters))
# pairlookup16 <- setMatrixVal(x = pairlookup16,
# list1 = rep("Wurmwood1", times = length(uniCasters[!uniCasters %in% "Irusk2"])), list2 = uniCasters[!uniCasters %in% "Irusk2"], val = 2)
# pairlookup16 <- setMatrixVal(x = pairlookup16,
# list1 = rep("Madrak2", times = length(uniCasters[!uniCasters %in% "Haley2"])), list2 = uniCasters[!uniCasters %in% "Haley2"], val = 2)
players17 <- arrange(players17, Team, Player)
#rating2015$ratings$Player[!rating2015$ratings$Player %in% players17$Player]
players17[players17$Team == "Austria Mozart", ]
players17[players17$Team == "Czech Eagles", ]
players17[players17$Team == "Germany Loreley", ]
# remove duplicates
players17 <- slice(players17, -c(14, 337, 76, 399, 140, 463))
###############################################################################
#
# Simulation
#
###############################################################################
load("rating2016.rda")
nteams <- length(unique(players17$Team))
nn <- 10000L
out <- matrix(NA_character_, nrow = nteams, ncol = nn)
library(foreach)
library(doSNOW)
cl <- makeCluster(2L)
registerDoSNOW(cl)
set.seed(287934)
sseed <- as.integer(runif(n = nn, min = 1, max = nn) * 1000)
sseed[duplicated(sseed)] <- as.integer(
runif(n = sum(duplicated(sseed)),
min = nn, max = nn * 10) * 1000)
sum(duplicated(sseed))
#[1] 0
system.time(
#out <- foreach(ii = seq_len(nn)) %do% {
out <- foreach(ii = seq_len(nn), .packages = "WTCTools") %dopar% {
#for (ii in seq_len(nn)) {
set.seed(sseed[ii])
thisRound <- doRound(data = players17, results = NULL,
ratings = rating2016,
pairlookup = mark3lookup)
for (jj in 8:12) {
thisRound <- doRound(data = players17,
round = jj,
results = thisRound,
ratings = rating2016,
pairlookup = mark3lookup)
}
tmp <- rownames(thisRound)
tmp[order(thisRound$Matches.Won,
thisRound$Total.Games.Won,
decreasing = TRUE)]
})
stopCluster(cl)
save(out, file = "out21092017.RData")
###############################################################################
#
# Summarise Simulation Results
#
###############################################################################
load("out21092017.RData")
nn <- length(out)
out <- t(do.call(what = "rbind", out))
# order for each simulation
tab <- apply(X = out, MARGIN = 1L, FUN = function(x) {
sort(table(x), decreasing = TRUE) })
(tab[[1]] / (nn/100))[1:6]
res <- matrix(NA, nrow = length(tab), ncol = length(tab))
teamNames <- out[, 1]
rownames(res) <- teamNames
for (i in seq_along(tab)) {
perc <- tab[[i]] / (nn/100)
perc[teamNames[!teamNames %in% names(perc)]] <- 0
res[teamNames, i] <- perc[teamNames]
}
###############################################################################
#
# Summarize Results
#
###############################################################################
moment <- function(x) {
mean(x * seq_along(x))
}
mmax <- apply(X = res, MARGIN = 1L, FUN = moment)
reso <- res[order(mmax), ]
mmax <- mmax[order(mmax)]
mmax
dat <- data.frame(Rank = seq_along(mmax), Team = names(mmax), "Score Moment" = round(mmax, 3))
rownames(dat) <- NULL
library(xtable)
print(xtable(dat), type = "html")
###############################################################################
#
# Expert Opinion Bootstrap
#
###############################################################################
klaw <- matrix(c(
"Sascha Maisel", "Australia Drop Bear", "Canada Beaver", "England Lions", "Germany Loreley", "Italy Michelangelo", "Poland Hussar", "Sweden Götaland",
"Chris Davies", "Australia Drop Bear", "Canada Beaver", "England Lions", "Germany Loreley", "Norway Hugin", "Poland Hussar", "Sweden Norrland",
"Bubba Dalton", "Australia Drop Bear", "Canada Beaver", "England Lions", "Germany Loreley", "Poland Hussar", "Sweden Götaland", "USA Texas",
"Tom Guan", "Australia Drop Bear", "Canada Beaver", "England Lions", "Germany Loreley", "Italy Michelangelo", "Poland Hussar", "USA Texas",
"Ryan Evans", "Australia Drop Bear", "Canada Moose", "England Lions", "Germany Loreley", "Norway Hugin", "Poland Hussar", "Sweden Götaland",
"Konrad Sosnowski", "Australia Drop Bear", "Canada Moose", "Germany Loreley", "Sweden Götaland", "USA Texas", "Poland Hussar", "Poland Knights",
"Johan Dyrlind", "Australia Drop Bear", "Canada Beaver", "Germany Loreley", "Poland Hussar", "Sweden Götaland", "Sweden Norrland", "Sweden Svealand",
"Norbert Brunhuber", "Australia Drop Bear", "Canada Beaver", "England Lions", "Germany Loreley", "Poland Hussar", "Sweden Götaland", "USA Texas"),
nrow = 8, byrow = TRUE)
klaw <- as.data.frame(klaw, stringsAsFactors = FALSE)
klaw <- rename(klaw, Player = V1)
klaw <- gather(klaw, key = Rank, value = Team, -Player)
klaw <- mutate(klaw, Rank = (1:7)[factor(Rank)])
klaw <- arrange(klaw, Player, Rank)
head(klaw)
unique(klaw$Team[!klaw$Team %in% teamNames])
# showSimilarNames(c(klaw$Team, teamNames))
# borda count
klaw1 <- arrange(summarize(group_by(klaw, Team),
Borda = sum(7 - Rank, na.rm = TRUE)), desc(Borda))
print(xtable(klaw1), type = "html")
###############################################################################
#
# Simulation Plot
#
###############################################################################
library(ggplot2)
png("Australia_Drop_Bear_Prob_Density_2017.png", height = 600, width = 800, res = 120)
qplot(data = cbind.data.frame(Rank = c(0.9, seq_len(nrow(res))),
`Probability Density` = c(0, res["Australia Drop Bear",])),
x = Rank, y = `Probability Density`,
geom = "polygon", fill = I("gold")) +
ggtitle("Likelihood of Rank of Australia Drop Bear") +
theme_bw()
dev.off()
# cleaning of wtc data and rating data
library(WTCTools)
library(tidyr)
wtc$country <- gsub(pattern = "Team ", replacement = "",
x = wtc$team1)
wtc$country <- gsub(pattern = "Epic ", replacement = "",
x = wtc$country)
wtc$country <- gsub(pattern = "Prime ", replacement = "",
x = wtc$country)
wtc$country <- gsub(pattern = "Northern ", replacement = "Northern.",
x = wtc$country)
wtc$country <- gsub(pattern = "United ", replacement = "United.",
x = wtc$country)
wtc$country <- gsub(pattern = "Middle ", replacement = "Middle.",
x = wtc$country)
wtc$country <- gsub(pattern = "Czech ", replacement = "Czech.",
x = wtc$country)
wtc$country <- sapply(strsplit(x = wtc$country, split = " "), function(x) x[1])
wtc$country[wtc$country == "United.States"] <- "USA"
sort(unique(wtc$country))
nationalities <- wtc[!duplicated(wtc$player1), c("player1", "country")]
wtc1 <- gather(wtc, key = player, value = name, player1, player2)
gameCounts <- summarize(group_by(wtc1, name), n())
pNames <- showSimilarNames(gameCounts$name)
# remove non-matched names
pNames <- pNames[-c(1, 2, 3, 8, 12, 23, 29, 30), ]
# align replacements
index <- c(5, 7, 8, 9, 18, 19, 23)
for (i in index) pNames[i, ] <- rev(pNames[i, ])
pNames
for (i in seq_len(nrow(pNames))) {
wtc1$name[wtc1$name == pNames[i, 2]] <- pNames[i, 1]
}
# manually fix
wtc1$name[wtc1$name == "PA¤r-Ola Nilsson"] <- "Per-Ola Nilsson"
wtc1$name[wtc1$name == "BenoA®t CA©rA©"] <- "Benoit Cere"
wtc1$name[wtc1$name == "HA¥vard Kongsrud"] <- "Haovard Kongsrud"
wtc1$name[wtc1$name == "AⱲic Grux"] <- "Eric Grux"
wtc1$name[wtc1$name == "KA©vin Maeder"] <- "Kevin Maeder"
wtc1$name[wtc1$name == "SA¸ren Kongsgaard"] <- "Soeren Kongsgaard"
wtc1$name[wtc1$name == "Matej TkA¡c"] <- "Matej Tkac"
wtc1$name[wtc1$name == "Mathias LA¼tzkendorf"] <- "Mathias Luetzkendorf"
wtc1$name[wtc1$name == "Torjus EftestA¸l"] <- "Torjus Eftestel"
wtc1$name[wtc1$name == "SA¸ren Husum"] <- "Soeren Husum"
wtc1$name[grep(wtc1$name, pattern = "^.+ric Grux$")] <- "Eric Grux"
wtc1$name[grep(wtc1$name, pattern = "^.+s Leite$")] <- "Luis Leite"
wtc1$name[wtc1$name == "SA¸ren Laust Jacobsen"] <- "Soeren Laust Jacobsen"
wtc1$name[wtc1$name == "Benjamin FA¼tzi"] <- "Benjamin Futzi"
wtc1$name[wtc1$name == "LorA¡nd SA¶vA©nyhA¡zi"] <- "Lorand Sovenyhazi"
wtc1$name[wtc1$name == "PA©ter MolnA¡r"] <- "Peter Molnar"
wtc1$name[wtc1$name == "AndrA¡s PorkolA¡b"] <- "Andras Porkolab"
wtc1$name[wtc1$name == "JA¡nos Kendi"] <- "Janos Kendi"
wtc1$name[wtc1$name == "KA©vin Aouragh"] <- "Kevin Aouragh"
wtc1$name[wtc1$name == "AurA©lien Guiramand"] <- "Aurelien Guiramand"
wtc1$name[wtc1$name == "LuboA¡ Hribik"] <- "Luboa Hribik"
wtc1$name[wtc1$name == "Gerard O' Brien"] <- "Gerard O'Brien"
wtc1$name[wtc1$name == "Gerard O'brien"] <- "Gerard O'Brien"
wtc1$name[wtc1$name == "Kevin Aourragh"] <- "Kevin Aouragh"
wtc1$name[grep(wtc1$name, pattern = "^Peter Cornelious Dyhre.*Poulsen$")] <- "Peter Cornelious Dyhre-Poulsen"
wtc1$name[wtc1$name == "Konrad KrA³licki"] <- "Konrad Krolicki"
sort(unique(wtc1$name[!grepl("^[A-Za-z '\\.-]+$", wtc1$name)]))
wtc2 <- spread(wtc1, key = player, value = name)
load("rating2015-reg.RData")
# check special character
z <- rating2015$ratings$Player
source("toPlain.R")
z <- toPlain(z)
z[z == "(C) Daniel Bogdanoski"] <- "Daniel Bogdanoski"
z[z == "Dominik de Cassan: Axis+Iron Mother"] <- "Dominik de Cassan"
z[grep(z, pattern = "^P.*r-Ola Nilsson$")] <- "Per-Ola Nilsson"
z[grep(z, pattern = "^Beno.*t C.*r.*$")] <- "Benoit Cere"
z[grep(z, pattern = "^H.*vard Kongsrud$")] <- "Haovard Kongsrud"
z[z == "KA©vin Maeder"] <- "Kevin Maeder"
z[grep(z, pattern = "^S.*ren Kongsgaard$")] <- "Soeren Kongsgaard"
z[grep(z, pattern = "^Matej Tk.*$")] <- "Matej Tkac"
z[grep(z, pattern = "^Mathias L.*tzkendorf$")] <- "Mathias Luetzkendorf"
z[grep(z, pattern = "^Torjus Eftest.*l$")] <- "Torjus Eftestel"
z[grep(z, pattern = "^S.*ren Husum$")] <- "Soeren Husum"
z[grep(z, pattern = "^.+ric Grux$")] <- "Eric Grux"
z[grep(z, pattern = "^.+s Leite$")] <- "Luis Leite"
z[grep(z, pattern = "^S.*ren Laust Jacobsen$")] <- "Soeren Laust Jacobsen"
z[z == "Benjamin FA¼tzi"] <- "Benjamin Futzi"
z[z == "LorA¡nd SA¶vA©nyhA¡zi"] <- "Lorand Sovenyhazi"
z[grep(z, pattern = "^P.*ter Moln.*r$")] <- "Peter Molnar"
z[grep(z, pattern = "^Andr.*s Porkol.*b$")] <- "Andras Porkolab"
z[grep(z, pattern = "^J.*nos Kendi$")] <- "Janos Kendi"
z[z == "KA©vin Aouragh"] <- "Kevin Aouragh"
z[grep(z, pattern = "^Aur.*lien Guiramand$")] <- "Aurelien Guiramand"
z[grep(z, pattern = "^Lubo.* Hribik$")] <- "Luboa Hribik"
z[grep(z, pattern = "^Konrad Kr.*licki$")] <- "Konrad Krolicki"
z[grep(z, pattern = "^Peter Cornelious Dyhre.*Poulsen$")] <- "Peter Cornelious Dyhre-Poulsen"
#z[grep(z, pattern = "^M.*t.* T.*ri$")]
#z[z == "Gerard O' Brien"] <- "Gerard O'Brien"
#z[z == "Gerard O'brien"] <- "Gerard O'Brien"
#z[z == "Kevin Aourragh"] <- "Kevin Aouragh"
z1 <- sort(z[!grepl("^[A-Za-z '\\.-]+$", z)])
z1
rating2015$ratings$Player <- z
save(rating2015, file = "rating2015-reg16.rda")
# merge duplicated names
dat15 <- rating2015$ratings
showSimilarNames(dat15$Player)
# remove nicknames
dat15$Player[grep("Snot", dat15$Player)] <- "Daniel Pawelka"
dat15$Player[grep("Raghnath", dat15$Player)] <- "Martin Florian"
# filter(dat15, grepl(pattern = "Kasd.*vasilis", x = Player))
# missed Dimitris
dat15 <- filter(dat15, !grepl(pattern = "Dimitris Kasdiovasilis", x = Player))
dat15 <- mutate(dat15,
Player = replace(Player,
grepl(pattern = "Dimitrios Kasdovasilis", x = Player),
"Dimitris Kasdiovasilis"))
# Frederik Punt
filter(dat15, grepl(pattern = "Fre.*k Punt", x = Player))
row <- filter(dat15, grepl(pattern = "Fre.*k Punt", x = Player))
row$Rating <- mean(row$Rating)
row$Games <- sum(row$Games)
row$Win <- sum(row$Win)
row$Loss <- sum(row$Loss)
dat15 <- filter(dat15, !grepl(pattern = "Freek Punt", x = Player))
dat15[dat15$Player == "Frederik Punt", ] <- row[1, ]
# filter(dat15, grepl(pattern = "Pawelka", x = Player))
dat15 <- slice(dat15, -grep(pattern = "Daniel Pawelka", x = Player)[2])
filter(dat15, grepl(pattern = "Florian$", x = Player))
dat15 <- slice(dat15, -grep(pattern = "Daniel Pawelka", x = Player)[1])
# "Jason Mcleod"
filter(dat15, grepl(pattern = "Ja.* Mcleod", x = Player))
dat15 <- filter(dat15, !grepl(pattern = "Jason Mcleod", x = Player))
dat15 <- mutate(dat15,
Player = replace(Player,
grepl(pattern = "Jay Mcleod", x = Player),
"Jason Mcleod"))
# "Josh Bates"
filter(dat15, grepl(pattern = "Josh.* Bates", x = Player))
dat15 <- filter(dat15, !grepl(pattern = "Josh Bates", x = Player))
# "Michal Nakonieczny"
row <- filter(dat15, grepl(pattern = "Michal .+onieczny", x = Player))
row$Player <- "Michal Nakonieczny"
row$Games <- sum(row$Games)
row$Win <- sum(row$Win)
row$Loss <- sum(row$Loss)
dat15 <- filter(dat15, !grepl(pattern = "Michal Konieczny", x = Player))
dat15[dat15$Player == "Michal Nakonieczny", ] <- row[1, ]
# "Nikolaos Kerazoglou"
filter(dat15, grepl(pattern = "Niko.*s Kerazoglou", x = Player))
dat15 <- filter(dat15, !grepl(pattern = "Nikos Kerazoglou", x = Player))
# "Panagiotis Demiris"
filter(dat15, grepl(pattern = "Panagiotis .+emiris", x = Player))
dat15 <- filter(dat15, !grepl(pattern = "Panagiotis Demiris", x = Player))
dat15 <- mutate(dat15,
Player = replace(Player,
grepl(pattern = "Panagiotis Ntemiris", x = Player),
"Panagiotis Demiris"))
# dat15$Player[grep(".*Mader", x = dat15$Player)]
# grep(" \\\".+\\\" ", x = dat15$Player, value = TRUE)
showSimilarNames(dat15$Player)
rating2015$ratings <- dat15
showSimilarNames(c(rating2015$ratings$Player, wtc$player1, wtc$player2))
showSimilarNames(c(dat15$Player, players17$player))
save(rating2015, file = "rating2015-reg16.rda")
# rating2015$ratings <- merge(x = rating2015$ratings,
# y = nationalities, all.x = TRUE, by.x = "Player", by.y = "player1")
sum(is.na(rating2015$ratings$country))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment