Last active
September 26, 2017 22:15
-
-
Save CSJCampbell/a956f8762aa4fc4c08afef9257a4498d to your computer and use it in GitHub Desktop.
WTC 2017 Analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
country | Rating | |
---|---|---|
Australia | 2445.549203 | |
England | 2343.962608 | |
USA | 2342.762909 | |
Sweden | 2313.943643 | |
Poland | 2279.733775 | |
Germany | 2263.613631 | |
Canada | 2256.044501 | |
Austria | 2246.198598 | |
NA | 2216.573624 | |
Finland | 2213.208031 | |
Italy | 2199.732591 | |
Denmark | 2181.675218 | |
Slovakia | 2151.784601 | |
Ireland | 2146.959651 | |
France | 2140.708093 | |
United.Nations | 2131.291862 | |
Scotland | 2118.545808 | |
Malta | 2112.417012 | |
Netherlands | 2111.028661 | |
Russia | 2089.290038 | |
Norway | 2078.400923 | |
Hungary | 2042.690559 | |
Wales | 2039.363156 | |
Greece | 2022.818299 | |
Belgium | 2014.507336 | |
Spain | 2007.025194 | |
Middle.East | 2006.221647 | |
Portugal | 1995.792035 | |
Latvia | 1977.852884 | |
Switzerland | 1976.917115 | |
Ukraine | 1972.035878 | |
Czech.Republic | 1963.651928 | |
China | 1909.536554 | |
Northern.Ireland | 1904.77978 | |
Slovenia | 1800 | |
UAE | 1800 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# master script to make predictions | |
library(dplyr) | |
library(jsonlite) | |
library(WTCTools) | |
players2017 <- read_json("World_Team_Championship_2017.json") | |
# matrix is efficient for data extraction | |
players17 <- matrix(NA, nrow = length(players2017) * 5, | |
ncol = 7, | |
dimnames = list(NULL, | |
c("team", "player", "faction", "caster1", "caster2", "theme1", "theme2"))) | |
for (i in seq_along(players2017)) { | |
team <- matrix(NA, nrow = 5, ncol = 7) | |
dat <- players2017[[i]] | |
team[, 1] <- dat$name | |
# extract and transpose | |
team[, -1] <- t(vapply(dat$players[1:5], | |
FUN = function(x) c(x$name, x$faction, x$list1$list[[1]], x$list2$list[[1]], | |
ifelse(is.null(x$list1$theme), yes = "", no = x$list1$theme), | |
ifelse(is.null(x$list2$theme), yes = "", no = x$list2$theme)), | |
FUN.VALUE = character(6))) | |
players17[seq.int(((i - 1) * 5) + 1, i * 5), ] <- team | |
} | |
# to data.frame | |
players17 <- as.data.frame(players17, stringsAsFactors = FALSE) | |
# check names | |
players17 <- mutate(players17, | |
caster1 = harmonizeCasters(caster1), | |
caster2 = harmonizeCasters(caster2)) | |
# remove nicknames | |
grep(" \\\".+\\\" ", x = players17$player, value = TRUE) | |
players17$player <- gsub(" \\\".+\\\" ", x = players17$player, replacement = " ") | |
players17$player[players17$player == "Anthony O Reilly "] <- "Anthony O'Reilly" | |
players17$player[players17$player == "Dmtrii Tochenov"] <- "Dmitrii Tochenov" | |
players17$player[grep(".*Mader", x = players17$player)] | |
write.csv(players17, file = "players2017.csv", row.names = FALSE) | |
players17 <- read.csv(file = "players2017.csv", stringsAsFactors = FALSE) | |
############################################################################### | |
# get ratings | |
############################################################################### | |
# moved to wtc_predictions_2017_player_reg.R | |
#source("wtc_predictions_2017_update_rating.R") | |
load("rating2015-reg16.rda") | |
wtc$allrounds <- wtc$round + (wtc$year - 2013) * 6 | |
########################################################### | |
# ratings based on pairings with selected caster pairings | |
rating2016 <- steph(x = select(filter(wtc, year == 2016), | |
allrounds, player1, player2, TP), | |
status = rating2015$ratings) | |
save(rating2016, file = "rating2016.rda") | |
rankings <- rating2016$ratings | |
write.csv(rankings, file = "rankings2016.csv", row.names = FALSE) | |
########################################################### | |
# register players | |
source("toPlain.R") | |
players17$player <- toPlain(players17$player) | |
players17$player <- gsub(pattern = " \\\".+\\\"", | |
x = players17$player, replacement = "") | |
players17$player <- gsub(pattern = " '.+'$", | |
x = players17$player, replacement = "") | |
players17$player <- gsub(pattern = " '[A-Za-z ]+' ", | |
x = players17$player, replacement = " ") | |
players17$player <- gsub(pattern = "\\\"[A-Za-z ]+(''|\\\" )", | |
x = players17$player, replacement = " ") | |
players17$player <- gsub(pattern = " \\(.+\\)$", | |
x = players17$player, replacement = "") | |
# remove all caps | |
ind <- grepl("^[A-Z -]+$", x = players17$player) | |
players17$player[ind] <- tools::toTitleCase(casefold(players17$player[ind])) | |
newPlayers <- players17$player[!players17$player %in% | |
rating2015$ratings$Player] | |
newPlayers | |
# replace names in players17 with correct similar names | |
pNames <- showSimilarNames(c(newPlayers, rating2015$ratings$Player)) | |
# keep matched name pairs | |
pNames <- pNames[c(10, 11, 13, 14, 15, 16, 17, 20, 21, 32, 33, 34, 35), ] | |
# align replacements | |
bool <- pNames[, 2] %in% rating2015$ratings$Player | |
for (i in seq_along(bool)) if (bool[i]) pNames[i, ] <- rev(pNames[i, ]) | |
# "Georg Buscha" %in% rating2015$ratings$Player | |
# replace values in second column with values in first | |
for (i in seq_len(nrow(pNames))) { | |
players17$player[players17$player == pNames[i, 2]] <- pNames[i, 1] | |
} | |
players17$player[players17$player == "kerazoglou nikos"] <- "Kerazoglou Nikolaos" | |
players17$player[players17$player == "Victor Pons"] <- "Victor Pons Tec" | |
write.csv(players17, file = "players2017.csv", row.names = FALSE) | |
players17 <- read.csv(file = "players2017.csv", stringsAsFactors = FALSE) | |
########################################################### | |
# check players in rating | |
p17 <- unique(players17$player) | |
knownp <- p17[p17 %in% rating2015$ratings$Player] | |
length(knownp) | |
unknownp <- p17[!p17 %in% rating2015$ratings$Player] | |
length(unknownp) | |
i = 3 | |
pp17 <- grep(pattern = paste0("^", LETTERS[i]), x = unknownp, | |
value = TRUE) | |
pp15 <- grep(pattern = paste0("^", LETTERS[i]), | |
x = rating2015$ratings$Player[!rating2015$ratings$Player %in% knownp], | |
value = TRUE) | |
nm <- matrix(NA, nrow = max(length(pp17), length(pp15)), ncol = 3) | |
nm[seq_along(pp17), 1] <- sort(pp17) | |
nm[seq_along(pp15), 2] <- sort(pp15) | |
rownames(nm) <- sort(pp15) | |
nm[rating2015$ratings$Player[rating2015$ratings$Player %in% pp15], 3] <- rating2015$ratings$country[rating2015$ratings$Player %in% pp15] | |
rownames(nm) <- NULL | |
nm | |
players17$country <- gsub(pattern = "Team ", replacement = "", | |
x = players17$team) | |
players17$country <- gsub(pattern = "Epic ", replacement = "", | |
x = players17$country) | |
players17$country <- gsub(pattern = "Prime ", replacement = "", | |
x = players17$country) | |
players17$country <- gsub(pattern = "Northern ", replacement = "Northern.", | |
x = players17$country) | |
players17$country <- gsub(pattern = "United ", replacement = "United.", | |
x = players17$country) | |
players17$country <- gsub(pattern = "Middle ", replacement = "Middle.", | |
x = players17$country) | |
players17$country <- gsub(pattern = "^.*Czech.*$", replacement = "Czech.Republic", | |
x = players17$country) | |
players17$country <- sapply(strsplit(x = players17$country, split = " "), function(x) x[1]) | |
players17$country[players17$country == "United.States"] <- "USA" | |
sort(unique(players17$country)) | |
library(dplyr) | |
nationalityRating <- as.data.frame( | |
summarise(group_by(rating2015$ratings, country), | |
Rating = quantile(Rating, probs = 0.35))) | |
write.csv(nationalityRating, | |
file = "nationalityRating.csv", | |
quote = FALSE, row.names = FALSE) | |
players17 <- left_join(x = players17, | |
y = nationalityRating) | |
players17 <- left_join(x = players17, | |
y = transmute(rating2015$ratings, | |
player = Player, Rating1 = Rating, country = country)) | |
dim(players17) | |
# set unrated players to have average score | |
players17$Rating1[is.na(players17$Rating1)] <- players17$Rating[is.na(players17$Rating1)] | |
sum(is.na(players17$Rating1)) | |
players17$Rating1[is.na(players17$Rating1)] <- 1800 | |
#help(package = "data.table") | |
# complete ratings object | |
new <- transmute(filter(players17, | |
!players17$player %in% rating2015$ratings$Player), | |
Player = player, Rating = Rating1, Deviation = 200, Games = 0, | |
Win = 0, Draw = 0, Loss = 0, Lag = 0, country = country) | |
new <- slice(group_by(new, Player), 1) | |
# add records to object | |
rating2015$ratings <- arrange(as.data.frame(rbind(rating2015$ratings, as.data.frame(new))), | |
Player) | |
players17 <- gather(players17, key = listno, value = List, | |
caster1, caster2) | |
players17 <- rename(players17, Team = team, Player = player) | |
#uniCasters <- sort(unique(players17$List)) | |
#pairlookup16 <- matrix(0, nrow = length(casters), | |
# ncol = length(casters), dimnames = list(casters, casters)) | |
# pairlookup16 <- setMatrixVal(x = pairlookup16, | |
# list1 = rep("Wurmwood1", times = length(uniCasters[!uniCasters %in% "Irusk2"])), list2 = uniCasters[!uniCasters %in% "Irusk2"], val = 2) | |
# pairlookup16 <- setMatrixVal(x = pairlookup16, | |
# list1 = rep("Madrak2", times = length(uniCasters[!uniCasters %in% "Haley2"])), list2 = uniCasters[!uniCasters %in% "Haley2"], val = 2) | |
players17 <- arrange(players17, Team, Player) | |
#rating2015$ratings$Player[!rating2015$ratings$Player %in% players17$Player] | |
players17[players17$Team == "Austria Mozart", ] | |
players17[players17$Team == "Czech Eagles", ] | |
players17[players17$Team == "Germany Loreley", ] | |
# remove duplicates | |
players17 <- slice(players17, -c(14, 337, 76, 399, 140, 463)) | |
############################################################################### | |
# | |
# Simulation | |
# | |
############################################################################### | |
load("rating2016.rda") | |
nteams <- length(unique(players17$Team)) | |
nn <- 10000L | |
out <- matrix(NA_character_, nrow = nteams, ncol = nn) | |
library(foreach) | |
library(doSNOW) | |
cl <- makeCluster(2L) | |
registerDoSNOW(cl) | |
set.seed(287934) | |
sseed <- as.integer(runif(n = nn, min = 1, max = nn) * 1000) | |
sseed[duplicated(sseed)] <- as.integer( | |
runif(n = sum(duplicated(sseed)), | |
min = nn, max = nn * 10) * 1000) | |
sum(duplicated(sseed)) | |
#[1] 0 | |
system.time( | |
#out <- foreach(ii = seq_len(nn)) %do% { | |
out <- foreach(ii = seq_len(nn), .packages = "WTCTools") %dopar% { | |
#for (ii in seq_len(nn)) { | |
set.seed(sseed[ii]) | |
thisRound <- doRound(data = players17, results = NULL, | |
ratings = rating2016, | |
pairlookup = mark3lookup) | |
for (jj in 8:12) { | |
thisRound <- doRound(data = players17, | |
round = jj, | |
results = thisRound, | |
ratings = rating2016, | |
pairlookup = mark3lookup) | |
} | |
tmp <- rownames(thisRound) | |
tmp[order(thisRound$Matches.Won, | |
thisRound$Total.Games.Won, | |
decreasing = TRUE)] | |
}) | |
stopCluster(cl) | |
save(out, file = "out21092017.RData") | |
############################################################################### | |
# | |
# Summarise Simulation Results | |
# | |
############################################################################### | |
load("out21092017.RData") | |
nn <- length(out) | |
out <- t(do.call(what = "rbind", out)) | |
# order for each simulation | |
tab <- apply(X = out, MARGIN = 1L, FUN = function(x) { | |
sort(table(x), decreasing = TRUE) }) | |
(tab[[1]] / (nn/100))[1:6] | |
res <- matrix(NA, nrow = length(tab), ncol = length(tab)) | |
teamNames <- out[, 1] | |
rownames(res) <- teamNames | |
for (i in seq_along(tab)) { | |
perc <- tab[[i]] / (nn/100) | |
perc[teamNames[!teamNames %in% names(perc)]] <- 0 | |
res[teamNames, i] <- perc[teamNames] | |
} | |
############################################################################### | |
# | |
# Summarize Results | |
# | |
############################################################################### | |
moment <- function(x) { | |
mean(x * seq_along(x)) | |
} | |
mmax <- apply(X = res, MARGIN = 1L, FUN = moment) | |
reso <- res[order(mmax), ] | |
mmax <- mmax[order(mmax)] | |
mmax | |
dat <- data.frame(Rank = seq_along(mmax), Team = names(mmax), "Score Moment" = round(mmax, 3)) | |
rownames(dat) <- NULL | |
library(xtable) | |
print(xtable(dat), type = "html") | |
############################################################################### | |
# | |
# Expert Opinion Bootstrap | |
# | |
############################################################################### | |
klaw <- matrix(c( | |
"Sascha Maisel", "Australia Drop Bear", "Canada Beaver", "England Lions", "Germany Loreley", "Italy Michelangelo", "Poland Hussar", "Sweden Götaland", | |
"Chris Davies", "Australia Drop Bear", "Canada Beaver", "England Lions", "Germany Loreley", "Norway Hugin", "Poland Hussar", "Sweden Norrland", | |
"Bubba Dalton", "Australia Drop Bear", "Canada Beaver", "England Lions", "Germany Loreley", "Poland Hussar", "Sweden Götaland", "USA Texas", | |
"Tom Guan", "Australia Drop Bear", "Canada Beaver", "England Lions", "Germany Loreley", "Italy Michelangelo", "Poland Hussar", "USA Texas", | |
"Ryan Evans", "Australia Drop Bear", "Canada Moose", "England Lions", "Germany Loreley", "Norway Hugin", "Poland Hussar", "Sweden Götaland", | |
"Konrad Sosnowski", "Australia Drop Bear", "Canada Moose", "Germany Loreley", "Sweden Götaland", "USA Texas", "Poland Hussar", "Poland Knights", | |
"Johan Dyrlind", "Australia Drop Bear", "Canada Beaver", "Germany Loreley", "Poland Hussar", "Sweden Götaland", "Sweden Norrland", "Sweden Svealand", | |
"Norbert Brunhuber", "Australia Drop Bear", "Canada Beaver", "England Lions", "Germany Loreley", "Poland Hussar", "Sweden Götaland", "USA Texas"), | |
nrow = 8, byrow = TRUE) | |
klaw <- as.data.frame(klaw, stringsAsFactors = FALSE) | |
klaw <- rename(klaw, Player = V1) | |
klaw <- gather(klaw, key = Rank, value = Team, -Player) | |
klaw <- mutate(klaw, Rank = (1:7)[factor(Rank)]) | |
klaw <- arrange(klaw, Player, Rank) | |
head(klaw) | |
unique(klaw$Team[!klaw$Team %in% teamNames]) | |
# showSimilarNames(c(klaw$Team, teamNames)) | |
# borda count | |
klaw1 <- arrange(summarize(group_by(klaw, Team), | |
Borda = sum(7 - Rank, na.rm = TRUE)), desc(Borda)) | |
print(xtable(klaw1), type = "html") | |
############################################################################### | |
# | |
# Simulation Plot | |
# | |
############################################################################### | |
library(ggplot2) | |
png("Australia_Drop_Bear_Prob_Density_2017.png", height = 600, width = 800, res = 120) | |
qplot(data = cbind.data.frame(Rank = c(0.9, seq_len(nrow(res))), | |
`Probability Density` = c(0, res["Australia Drop Bear",])), | |
x = Rank, y = `Probability Density`, | |
geom = "polygon", fill = I("gold")) + | |
ggtitle("Likelihood of Rank of Australia Drop Bear") + | |
theme_bw() | |
dev.off() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# cleaning of wtc data and rating data | |
library(WTCTools) | |
library(tidyr) | |
wtc$country <- gsub(pattern = "Team ", replacement = "", | |
x = wtc$team1) | |
wtc$country <- gsub(pattern = "Epic ", replacement = "", | |
x = wtc$country) | |
wtc$country <- gsub(pattern = "Prime ", replacement = "", | |
x = wtc$country) | |
wtc$country <- gsub(pattern = "Northern ", replacement = "Northern.", | |
x = wtc$country) | |
wtc$country <- gsub(pattern = "United ", replacement = "United.", | |
x = wtc$country) | |
wtc$country <- gsub(pattern = "Middle ", replacement = "Middle.", | |
x = wtc$country) | |
wtc$country <- gsub(pattern = "Czech ", replacement = "Czech.", | |
x = wtc$country) | |
wtc$country <- sapply(strsplit(x = wtc$country, split = " "), function(x) x[1]) | |
wtc$country[wtc$country == "United.States"] <- "USA" | |
sort(unique(wtc$country)) | |
nationalities <- wtc[!duplicated(wtc$player1), c("player1", "country")] | |
wtc1 <- gather(wtc, key = player, value = name, player1, player2) | |
gameCounts <- summarize(group_by(wtc1, name), n()) | |
pNames <- showSimilarNames(gameCounts$name) | |
# remove non-matched names | |
pNames <- pNames[-c(1, 2, 3, 8, 12, 23, 29, 30), ] | |
# align replacements | |
index <- c(5, 7, 8, 9, 18, 19, 23) | |
for (i in index) pNames[i, ] <- rev(pNames[i, ]) | |
pNames | |
for (i in seq_len(nrow(pNames))) { | |
wtc1$name[wtc1$name == pNames[i, 2]] <- pNames[i, 1] | |
} | |
# manually fix | |
wtc1$name[wtc1$name == "PA¤r-Ola Nilsson"] <- "Per-Ola Nilsson" | |
wtc1$name[wtc1$name == "BenoA®t CA©rA©"] <- "Benoit Cere" | |
wtc1$name[wtc1$name == "HA¥vard Kongsrud"] <- "Haovard Kongsrud" | |
wtc1$name[wtc1$name == "AⱲic Grux"] <- "Eric Grux" | |
wtc1$name[wtc1$name == "KA©vin Maeder"] <- "Kevin Maeder" | |
wtc1$name[wtc1$name == "SA¸ren Kongsgaard"] <- "Soeren Kongsgaard" | |
wtc1$name[wtc1$name == "Matej TkA¡c"] <- "Matej Tkac" | |
wtc1$name[wtc1$name == "Mathias LA¼tzkendorf"] <- "Mathias Luetzkendorf" | |
wtc1$name[wtc1$name == "Torjus EftestA¸l"] <- "Torjus Eftestel" | |
wtc1$name[wtc1$name == "SA¸ren Husum"] <- "Soeren Husum" | |
wtc1$name[grep(wtc1$name, pattern = "^.+ric Grux$")] <- "Eric Grux" | |
wtc1$name[grep(wtc1$name, pattern = "^.+s Leite$")] <- "Luis Leite" | |
wtc1$name[wtc1$name == "SA¸ren Laust Jacobsen"] <- "Soeren Laust Jacobsen" | |
wtc1$name[wtc1$name == "Benjamin FA¼tzi"] <- "Benjamin Futzi" | |
wtc1$name[wtc1$name == "LorA¡nd SA¶vA©nyhA¡zi"] <- "Lorand Sovenyhazi" | |
wtc1$name[wtc1$name == "PA©ter MolnA¡r"] <- "Peter Molnar" | |
wtc1$name[wtc1$name == "AndrA¡s PorkolA¡b"] <- "Andras Porkolab" | |
wtc1$name[wtc1$name == "JA¡nos Kendi"] <- "Janos Kendi" | |
wtc1$name[wtc1$name == "KA©vin Aouragh"] <- "Kevin Aouragh" | |
wtc1$name[wtc1$name == "AurA©lien Guiramand"] <- "Aurelien Guiramand" | |
wtc1$name[wtc1$name == "LuboA¡ Hribik"] <- "Luboa Hribik" | |
wtc1$name[wtc1$name == "Gerard O' Brien"] <- "Gerard O'Brien" | |
wtc1$name[wtc1$name == "Gerard O'brien"] <- "Gerard O'Brien" | |
wtc1$name[wtc1$name == "Kevin Aourragh"] <- "Kevin Aouragh" | |
wtc1$name[grep(wtc1$name, pattern = "^Peter Cornelious Dyhre.*Poulsen$")] <- "Peter Cornelious Dyhre-Poulsen" | |
wtc1$name[wtc1$name == "Konrad KrA³licki"] <- "Konrad Krolicki" | |
sort(unique(wtc1$name[!grepl("^[A-Za-z '\\.-]+$", wtc1$name)])) | |
wtc2 <- spread(wtc1, key = player, value = name) | |
load("rating2015-reg.RData") | |
# check special character | |
z <- rating2015$ratings$Player | |
source("toPlain.R") | |
z <- toPlain(z) | |
z[z == "(C) Daniel Bogdanoski"] <- "Daniel Bogdanoski" | |
z[z == "Dominik de Cassan: Axis+Iron Mother"] <- "Dominik de Cassan" | |
z[grep(z, pattern = "^P.*r-Ola Nilsson$")] <- "Per-Ola Nilsson" | |
z[grep(z, pattern = "^Beno.*t C.*r.*$")] <- "Benoit Cere" | |
z[grep(z, pattern = "^H.*vard Kongsrud$")] <- "Haovard Kongsrud" | |
z[z == "KA©vin Maeder"] <- "Kevin Maeder" | |
z[grep(z, pattern = "^S.*ren Kongsgaard$")] <- "Soeren Kongsgaard" | |
z[grep(z, pattern = "^Matej Tk.*$")] <- "Matej Tkac" | |
z[grep(z, pattern = "^Mathias L.*tzkendorf$")] <- "Mathias Luetzkendorf" | |
z[grep(z, pattern = "^Torjus Eftest.*l$")] <- "Torjus Eftestel" | |
z[grep(z, pattern = "^S.*ren Husum$")] <- "Soeren Husum" | |
z[grep(z, pattern = "^.+ric Grux$")] <- "Eric Grux" | |
z[grep(z, pattern = "^.+s Leite$")] <- "Luis Leite" | |
z[grep(z, pattern = "^S.*ren Laust Jacobsen$")] <- "Soeren Laust Jacobsen" | |
z[z == "Benjamin FA¼tzi"] <- "Benjamin Futzi" | |
z[z == "LorA¡nd SA¶vA©nyhA¡zi"] <- "Lorand Sovenyhazi" | |
z[grep(z, pattern = "^P.*ter Moln.*r$")] <- "Peter Molnar" | |
z[grep(z, pattern = "^Andr.*s Porkol.*b$")] <- "Andras Porkolab" | |
z[grep(z, pattern = "^J.*nos Kendi$")] <- "Janos Kendi" | |
z[z == "KA©vin Aouragh"] <- "Kevin Aouragh" | |
z[grep(z, pattern = "^Aur.*lien Guiramand$")] <- "Aurelien Guiramand" | |
z[grep(z, pattern = "^Lubo.* Hribik$")] <- "Luboa Hribik" | |
z[grep(z, pattern = "^Konrad Kr.*licki$")] <- "Konrad Krolicki" | |
z[grep(z, pattern = "^Peter Cornelious Dyhre.*Poulsen$")] <- "Peter Cornelious Dyhre-Poulsen" | |
#z[grep(z, pattern = "^M.*t.* T.*ri$")] | |
#z[z == "Gerard O' Brien"] <- "Gerard O'Brien" | |
#z[z == "Gerard O'brien"] <- "Gerard O'Brien" | |
#z[z == "Kevin Aourragh"] <- "Kevin Aouragh" | |
z1 <- sort(z[!grepl("^[A-Za-z '\\.-]+$", z)]) | |
z1 | |
rating2015$ratings$Player <- z | |
save(rating2015, file = "rating2015-reg16.rda") | |
# merge duplicated names | |
dat15 <- rating2015$ratings | |
showSimilarNames(dat15$Player) | |
# remove nicknames | |
dat15$Player[grep("Snot", dat15$Player)] <- "Daniel Pawelka" | |
dat15$Player[grep("Raghnath", dat15$Player)] <- "Martin Florian" | |
# filter(dat15, grepl(pattern = "Kasd.*vasilis", x = Player)) | |
# missed Dimitris | |
dat15 <- filter(dat15, !grepl(pattern = "Dimitris Kasdiovasilis", x = Player)) | |
dat15 <- mutate(dat15, | |
Player = replace(Player, | |
grepl(pattern = "Dimitrios Kasdovasilis", x = Player), | |
"Dimitris Kasdiovasilis")) | |
# Frederik Punt | |
filter(dat15, grepl(pattern = "Fre.*k Punt", x = Player)) | |
row <- filter(dat15, grepl(pattern = "Fre.*k Punt", x = Player)) | |
row$Rating <- mean(row$Rating) | |
row$Games <- sum(row$Games) | |
row$Win <- sum(row$Win) | |
row$Loss <- sum(row$Loss) | |
dat15 <- filter(dat15, !grepl(pattern = "Freek Punt", x = Player)) | |
dat15[dat15$Player == "Frederik Punt", ] <- row[1, ] | |
# filter(dat15, grepl(pattern = "Pawelka", x = Player)) | |
dat15 <- slice(dat15, -grep(pattern = "Daniel Pawelka", x = Player)[2]) | |
filter(dat15, grepl(pattern = "Florian$", x = Player)) | |
dat15 <- slice(dat15, -grep(pattern = "Daniel Pawelka", x = Player)[1]) | |
# "Jason Mcleod" | |
filter(dat15, grepl(pattern = "Ja.* Mcleod", x = Player)) | |
dat15 <- filter(dat15, !grepl(pattern = "Jason Mcleod", x = Player)) | |
dat15 <- mutate(dat15, | |
Player = replace(Player, | |
grepl(pattern = "Jay Mcleod", x = Player), | |
"Jason Mcleod")) | |
# "Josh Bates" | |
filter(dat15, grepl(pattern = "Josh.* Bates", x = Player)) | |
dat15 <- filter(dat15, !grepl(pattern = "Josh Bates", x = Player)) | |
# "Michal Nakonieczny" | |
row <- filter(dat15, grepl(pattern = "Michal .+onieczny", x = Player)) | |
row$Player <- "Michal Nakonieczny" | |
row$Games <- sum(row$Games) | |
row$Win <- sum(row$Win) | |
row$Loss <- sum(row$Loss) | |
dat15 <- filter(dat15, !grepl(pattern = "Michal Konieczny", x = Player)) | |
dat15[dat15$Player == "Michal Nakonieczny", ] <- row[1, ] | |
# "Nikolaos Kerazoglou" | |
filter(dat15, grepl(pattern = "Niko.*s Kerazoglou", x = Player)) | |
dat15 <- filter(dat15, !grepl(pattern = "Nikos Kerazoglou", x = Player)) | |
# "Panagiotis Demiris" | |
filter(dat15, grepl(pattern = "Panagiotis .+emiris", x = Player)) | |
dat15 <- filter(dat15, !grepl(pattern = "Panagiotis Demiris", x = Player)) | |
dat15 <- mutate(dat15, | |
Player = replace(Player, | |
grepl(pattern = "Panagiotis Ntemiris", x = Player), | |
"Panagiotis Demiris")) | |
# dat15$Player[grep(".*Mader", x = dat15$Player)] | |
# grep(" \\\".+\\\" ", x = dat15$Player, value = TRUE) | |
showSimilarNames(dat15$Player) | |
rating2015$ratings <- dat15 | |
showSimilarNames(c(rating2015$ratings$Player, wtc$player1, wtc$player2)) | |
showSimilarNames(c(dat15$Player, players17$player)) | |
save(rating2015, file = "rating2015-reg16.rda") | |
# rating2015$ratings <- merge(x = rating2015$ratings, | |
# y = nationalities, all.x = TRUE, by.x = "Player", by.y = "player1") | |
sum(is.na(rating2015$ratings$country)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment