Skip to content

Instantly share code, notes, and snippets.

@cjtdevil
Created April 21, 2021 02:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cjtdevil/0cac7f1874f7bb1f28f0afe39b968676 to your computer and use it in GitHub Desktop.
Save cjtdevil/0cac7f1874f7bb1f28f0afe39b968676 to your computer and use it in GitHub Desktop.
library(tidyverse);library(rvest)
scrape_season = function(year){
year <- paste0(year-1,year)
pp_url = paste0("http://naturalstattrick.com/playerteams.php?fromseason=",year,"&thruseason=",year,"&stype=2&sit=pp&score=all&stdoi=oi&rate=n&team=ALL&pos=S&loc=B&toi=0&gpfilt=none&fd=&td=&tgp=410&lines=multi&draftteam=ALL")
ev_url = paste0("http://naturalstattrick.com/playerteams.php?fromseason=",year,"&thruseason=",year,"&stype=2&sit=ev&score=all&stdoi=oi&rate=n&team=ALL&pos=S&loc=B&toi=0&gpfilt=none&fd=&td=&tgp=410&lines=multi&draftteam=ALL")
ip_url = paste0("http://naturalstattrick.com/playerteams.php?fromseason=",year,"&thruseason=",year,"&stype=2&sit=pp&score=all&stdoi=std&rate=n&team=ALL&pos=S&loc=B&toi=0&gpfilt=none&fd=&td=&tgp=410&lines=multi&draftteam=ALL")
ie_url = paste0("http://naturalstattrick.com/playerteams.php?fromseason=",year,"&thruseason=",year,"&stype=2&sit=ev&score=all&stdoi=std&rate=n&team=ALL&pos=S&loc=B&toi=0&gpfilt=none&fd=&td=&tgp=410&lines=multi&draftteam=ALL")
read_html(pp_url) %>% html_table() %>% '[['(1) %>% select(Player,Team,Position,TOI,GF,CF,xGF) -> ppo
read_html(ev_url) %>% html_table() %>% '[['(1) %>% select(Player,Team,Position,TOI,GF,CF,xGF) -> evo
read_html(ip_url) %>% html_table() %>% '[['(1) %>% select(Player,Team,Position,TOI,Goals,iCF,ixG,As=`Total Assists`,Pts=`Total Points`) -> ppi
read_html(ie_url) %>% html_table() %>% '[['(1) %>% select(Player,Team,Position,TOI,Goals,iCF,ixG,As=`Total Assists`,Pts=`Total Points`) -> evi
df <- ppo %>%
left_join(ppi,by=c("Player","Team","Position","TOI")) %>%
`colnames<-`(c("Player","Team","Position",paste0('PP_',colnames(.)[4:12]))) %>%
left_join(evo,by=c("Player","Team","Position")) %>%
left_join(evi,by=c("Player","Team","Position","TOI")) %>%
replace(is.na(.),0) %>%
mutate(PTS = Pts + PP_Pts,IPP = As/GF,PP_IPP=PP_As/PP_GF,
tm.PP_GF = PP_GF - PP_Goals,
tm.PP_xGF = (sum(PP_GF,na.rm = T)/sum(PP_xGF,na.rm = T))*(PP_xGF - PP_ixG),
tm.GF = GF - Goals,
tm.xGF = (sum(GF,na.rm = T)/sum(xGF,na.rm = T))*(xGF - ixG),
tm.dEVG = tm.GF - tm.xGF,
tm.dPPG = tm.PP_GF - tm.PP_xGF,
tm.dG = tm.dEVG + tm.dPPG,
tm.ppPTS_added = ifelse(is.na(PP_IPP*tm.dPPG),0,PP_IPP*tm.dPPG),
tm.evPTS_added = ifelse(is.na(IPP*tm.dEVG),0,IPP*tm.dEVG),
tm.PTS_added = tm.ppPTS_added + tm.evPTS_added,
ppPTS_adj = PP_Pts - tm.ppPTS_added,
evPTS_adj = Pts - tm.evPTS_added,
PTS_adj = PTS - tm.PTS_added) %>%
replace(is.na(.),0) %>%
filter(PP_IPP<=1,IPP<=1) %>%
select(Player,Position,Team,EV_PTS=Pts,PP_PTS = PP_Pts,PTS,
tm.ppPTS_added,tm.evPTS_added,tm.PTS_added,
ppPTS_adj,evPTS_adj,PTS_adj) %>%
arrange(-PTS_adj) %>%
mutate(Year=year)
}
dlist = list()
for(i in c(2017:2021)){
df <- scrape_season(i)
dlist[[length(dlist)+1]] = df
}
df_final = dlist %>% bind_rows()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment