Created
June 30, 2022 14:28
-
-
Save cjtdevil/81595282bcddc780e3531f152c574ee7 to your computer and use it in GitHub Desktop.
Hockey Hall of Fame worthiness by JAWS for InfernalAccess article about Patrik Elias
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse);library(rvest);library(ggplot2); | |
library(ggbeeswarm);library(chart) | |
# Convert the weird characters | |
to.plain <- function(s) { | |
# 1 character substitutions | |
old1 <- "šžþàáâãäåçèéêëìíîïðñòóôõöùúûüý" | |
new1 <- "szyaaaaaaceeeeiiiidnooooouuuuy" | |
s1 <- chartr(old1, new1, s) | |
# 2 character substitutions | |
old2 <- c("œ", "ß", "æ", "ø") | |
new2 <- c("oe", "ss", "ae", "oe") | |
s2 <- s1 | |
for(i in seq_along(old2)) s2 <- gsub(old2[i], new2[i], s2, fixed = TRUE) | |
s2 | |
} | |
# Scrape Hall of Famer list | |
hof = read_html('https://www.hockey-reference.com/awards/hhof.html') %>% | |
html_table() %>% | |
data.frame %>% filter(Var.3 == 'Player') %>% | |
`[`(1:2) %>% `colnames<-`(c('Season','Player')) %>% filter(Season!='Year') %>% | |
mutate(Player = to.plain(Player), | |
Player = gsub('Raymond Bourque','Ray Bourque',Player)) | |
# List this year's inductees | |
thisyear = c('Daniel Alfredsson','Roberto Luongo','Daniel Sedin','Henrik Sedin') | |
# Get list of URLs | |
home = 'https://www.hockey-reference.com/leagues/' | |
urls = read_html(home) %>% html_nodes('th a') %>% html_attr('href') %>% | |
`[`(which(grepl('NHL',.))) | |
years = urls %>% | |
gsub(pattern = '/leagues/NHL_',replacement = '') %>% | |
gsub(pattern = '.html',replacement = '') %>% as.numeric() | |
urls = urls %>% gsub(pattern = '.html',replacement = '_skaters-misc.html') | |
# Main Player Scrape | |
df_list = list() | |
for(i in 1:length(urls)){ | |
Sys.sleep(5) | |
cat(100*round(i/length(urls),3),"%", "\r") | |
flush.console() | |
df <- read_html(paste0('https://www.hockey-reference.com',urls[i])) %>% | |
html_table() | |
df_list[[length(df_list)+1]] = as.data.frame(df) %>% mutate(Season = years[i]) | |
} | |
# Aggregate and tidy up data | |
temp <- bind_rows(df_list) %>% | |
`colnames<-`(c('Rk','Player','Age','Tm','Pos','GP','GC', | |
'Gpg','Apg','PTSpg','GCpg', | |
'PIM','S', | |
'Gadj','Aadj','PTSadj','GCadj', | |
'TGF','PGF','TGA','PGA', | |
'PM','xGF','xGA','EPM', | |
'OPS','DPS','PS', | |
'Att','Made','Miss','Pct','Season','na')) %>% | |
mutate_at(vars(GP:na),~as.numeric(.)) | |
# JAWS Calculation and HOF classification | |
final <- temp %>% | |
filter(!is.na(GP)) %>% | |
group_by(Player) %>% | |
arrange(Season) %>% | |
mutate(Player=to.plain(Player), | |
y0 = PS, | |
last = max(Season,na.rm = T)) %>% | |
mutate(y1 = lag(y0,1), | |
y2 = lag(y0,2), | |
y3 = lag(y0,3), | |
y4 = lag(y0,4), | |
y5 = lag(y0,5), | |
y6 = lag(y0,6), | |
Career = sum(y0,na.rm = T)) %>% | |
mutate_at(vars(y0:y6),~ifelse(is.na(.),0,.)) %>% | |
mutate(`7yr-Peak` = y0+y1+y2+y3+y4+y5+y6, | |
JAWS = (`7yr-Peak`+Career)/2, | |
HHOF = ifelse(Player %in% hof$Player, | |
ifelse(last>1967,'HOFer (Post-Exp)','HOFer (Pre-Exp)'), | |
ifelse(Player=='Patrik Elias',' Elias', | |
ifelse(Player %in% thisyear,'2022', | |
'Other'))), | |
Pos = ifelse(grepl('D',Pos),'Defencemen',ifelse(grepl('G',Pos),'Goalies','Forwards')), | |
Size = ifelse(HHOF == ' Elias',3, | |
ifelse(HHOF == '2022',2,1))) %>% | |
arrange(-JAWS) %>% | |
slice(1) %>% | |
select(Player,Position=Pos,HHOF,`7yr-Peak`,Career,JAWS,Size,last) | |
# Plot for article | |
ggplot(data=final %>% | |
filter(HHOF != 'Other')%>% | |
arrange(desc(HHOF)), | |
aes(x=JAWS,y=Position,color=HHOF,size=Size))+ | |
geom_quasirandom(groupOnX = FALSE) + | |
scale_size(range = c(2, 4),guide="none") + | |
scale_colour_manual(values = c("red", "cyan3", "black","grey",'white'))+ | |
ggtitle('Patrik Elias among Hockey Hall of Famers') + | |
theme(plot.title = element_text(hjust=0.5)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment