Skip to content

Instantly share code, notes, and snippets.

@cjtdevil
Created June 30, 2022 14:28
Show Gist options
  • Save cjtdevil/81595282bcddc780e3531f152c574ee7 to your computer and use it in GitHub Desktop.
Save cjtdevil/81595282bcddc780e3531f152c574ee7 to your computer and use it in GitHub Desktop.
Hockey Hall of Fame worthiness by JAWS for InfernalAccess article about Patrik Elias
library(tidyverse);library(rvest);library(ggplot2);
library(ggbeeswarm);library(chart)
# Convert the weird characters
to.plain <- function(s) {
# 1 character substitutions
old1 <- "šžþàáâãäåçèéêëìíîïðñòóôõöùúûüý"
new1 <- "szyaaaaaaceeeeiiiidnooooouuuuy"
s1 <- chartr(old1, new1, s)
# 2 character substitutions
old2 <- c("œ", "ß", "æ", "ø")
new2 <- c("oe", "ss", "ae", "oe")
s2 <- s1
for(i in seq_along(old2)) s2 <- gsub(old2[i], new2[i], s2, fixed = TRUE)
s2
}
# Scrape Hall of Famer list
hof = read_html('https://www.hockey-reference.com/awards/hhof.html') %>%
html_table() %>%
data.frame %>% filter(Var.3 == 'Player') %>%
`[`(1:2) %>% `colnames<-`(c('Season','Player')) %>% filter(Season!='Year') %>%
mutate(Player = to.plain(Player),
Player = gsub('Raymond Bourque','Ray Bourque',Player))
# List this year's inductees
thisyear = c('Daniel Alfredsson','Roberto Luongo','Daniel Sedin','Henrik Sedin')
# Get list of URLs
home = 'https://www.hockey-reference.com/leagues/'
urls = read_html(home) %>% html_nodes('th a') %>% html_attr('href') %>%
`[`(which(grepl('NHL',.)))
years = urls %>%
gsub(pattern = '/leagues/NHL_',replacement = '') %>%
gsub(pattern = '.html',replacement = '') %>% as.numeric()
urls = urls %>% gsub(pattern = '.html',replacement = '_skaters-misc.html')
# Main Player Scrape
df_list = list()
for(i in 1:length(urls)){
Sys.sleep(5)
cat(100*round(i/length(urls),3),"%", "\r")
flush.console()
df <- read_html(paste0('https://www.hockey-reference.com',urls[i])) %>%
html_table()
df_list[[length(df_list)+1]] = as.data.frame(df) %>% mutate(Season = years[i])
}
# Aggregate and tidy up data
temp <- bind_rows(df_list) %>%
`colnames<-`(c('Rk','Player','Age','Tm','Pos','GP','GC',
'Gpg','Apg','PTSpg','GCpg',
'PIM','S',
'Gadj','Aadj','PTSadj','GCadj',
'TGF','PGF','TGA','PGA',
'PM','xGF','xGA','EPM',
'OPS','DPS','PS',
'Att','Made','Miss','Pct','Season','na')) %>%
mutate_at(vars(GP:na),~as.numeric(.))
# JAWS Calculation and HOF classification
final <- temp %>%
filter(!is.na(GP)) %>%
group_by(Player) %>%
arrange(Season) %>%
mutate(Player=to.plain(Player),
y0 = PS,
last = max(Season,na.rm = T)) %>%
mutate(y1 = lag(y0,1),
y2 = lag(y0,2),
y3 = lag(y0,3),
y4 = lag(y0,4),
y5 = lag(y0,5),
y6 = lag(y0,6),
Career = sum(y0,na.rm = T)) %>%
mutate_at(vars(y0:y6),~ifelse(is.na(.),0,.)) %>%
mutate(`7yr-Peak` = y0+y1+y2+y3+y4+y5+y6,
JAWS = (`7yr-Peak`+Career)/2,
HHOF = ifelse(Player %in% hof$Player,
ifelse(last>1967,'HOFer (Post-Exp)','HOFer (Pre-Exp)'),
ifelse(Player=='Patrik Elias',' Elias',
ifelse(Player %in% thisyear,'2022',
'Other'))),
Pos = ifelse(grepl('D',Pos),'Defencemen',ifelse(grepl('G',Pos),'Goalies','Forwards')),
Size = ifelse(HHOF == ' Elias',3,
ifelse(HHOF == '2022',2,1))) %>%
arrange(-JAWS) %>%
slice(1) %>%
select(Player,Position=Pos,HHOF,`7yr-Peak`,Career,JAWS,Size,last)
# Plot for article
ggplot(data=final %>%
filter(HHOF != 'Other')%>%
arrange(desc(HHOF)),
aes(x=JAWS,y=Position,color=HHOF,size=Size))+
geom_quasirandom(groupOnX = FALSE) +
scale_size(range = c(2, 4),guide="none") +
scale_colour_manual(values = c("red", "cyan3", "black","grey",'white'))+
ggtitle('Patrik Elias among Hockey Hall of Famers') +
theme(plot.title = element_text(hjust=0.5))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment