Skip to content

Instantly share code, notes, and snippets.

@jalapic
Created September 30, 2015 16:53
Show Gist options
  • Save jalapic/47b0be9cff2732267949 to your computer and use it in GitHub Desktop.
Save jalapic/47b0be9cff2732267949 to your computer and use it in GitHub Desktop.
NHL Small Multiple
# Making an NHL Small Multiple
# this is only minimally annotated - sorry
# Libraries we'll use
library(dplyr)
library(magrittr)
library(XML)
library(ggthemes)
library(ggplot2)
# On launch, get 2014/15 data
# I wrote this a few months ago prior to my rvest conversion - XML still works ok here
# but you might want to switch to rvest
doc <- readHTMLTable("http://www.hockey-reference.com/leagues/NHL_2015_games.html")
x <- doc[[1]]
colnames(x)<-c("date", "visitor", "vgoal" ,"home", "hgoal", "soot", "notes")
x$hgoal <- as.numeric(as.character(x$hgoal))
x$vgoal <- as.numeric(as.character(x$vgoal))
## Make a nice dataframe where we clean up some formatting and we make our points columns
x2014 <- x %>%
filter(complete.cases(.)==T) %>%
mutate(soot = gsub("^$|^ $", NA, .$soot)) %>%
mutate(hpoints = ifelse(is.na(soot)==T, ifelse(hgoal>vgoal, 2, 0), ifelse(soot=="OT" | soot=="SO", ifelse(hgoal>vgoal, 2, 1)))) %>%
mutate(vpoints = ifelse(is.na(soot)==T, ifelse(vgoal>hgoal, 2, 0), ifelse(soot=="OT" | soot=="SO", ifelse(vgoal>hgoal, 2, 1)))) %>%
select(date, team=home, opp=visitor, hgoal, vgoal, hpoints, vpoints) %>%
arrange(date) %>%
mutate(event = dense_rank(date)) %>%
ungroup()
## make it team vs opponent (ie double dataframe and switch home/away)
x2014 <- rbind(x2014 %>% select(date, team, opp, pts=hpoints),
x2014 %>% select(date, team=opp, opp=team, pts=vpoints))
x2014$team <- as.character(x2014$team)
x2014$opp <- as.character(x2014$opp)
#arrange by date and then calculate cumulative points and points percentage
x2014a <- x2014 %>% arrange(date) %>%
group_by(team) %>% mutate(gameno = dense_rank(date), cumpts = cumsum(pts), cumptspct = cumpts / gameno)
# Want to sort the small multiple by highest average points
myteams <- x2014a %>% group_by(team) %>% filter(gameno==max(gameno)) %>% ungroup() %>% arrange(desc(cumptspct)) %>% .$team
x2014a$team <- factor(x2014a$team, levels=myteams)
#Calculate Rolling mean - note align = 'right' to make sure 20 game average is at right game number
x2014a <- x2014a %>% group_by(team) %>% mutate(rm = zoo::rollsum(pts, 20, fill=NA, align="right"))
x2014a %>% arrange(team, gameno) %>% data.frame %>% head(100) #checking everything is ok
### Plot
ggplot(x2014a, aes(gameno, rm)) + geom_path(aes(gameno, rm), color="firebrick2", lwd=.75) +
xlim(20,82) +
facet_wrap(~ team) +
ggtitle("20 game NHL 2014/15 Moving Point-Average Totals") +
theme_wsj() +
theme(axis.title = element_text(size = rel(1), family="Arial"),
panel.background = element_rect(fill='white'),
plot.background = element_rect(fill='white'),
strip.background = element_rect(fill='white'),
axis.text.x = element_text(color = "gray20", size = rel(0.9), family="Arial"),
axis.text.y = element_text(color = "gray20", size = rel(0.9), family="Arial"),
axis.title.x = element_text(size = rel(1), vjust = 0, family="Arial"),
axis.title.y = element_text(size = rel(1), vjust=1, angle=90, family="Arial"),
axis.ticks.y = element_blank(), axis.ticks.x = element_blank(),
plot.title = element_text(hjust = 0, vjust = 1, size = rel(1.1), family="Arial"),
text = element_text(color = "gray20", size = 12, family="Arial"),
axis.text = element_text(size = rel(1), family="Arial"),
legend.position = "none",
axis.line=element_line()
) + xlab("Game Number") + ylab("Cumulative Points per Game")
### I then make small edits in illustrator to the spacing of titles/sub-headers
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment