thebioengineer/NFL_Game_Status_Crosstalk.Rmd

## NFL_Game_Status_Crosstalk.Rmd
---
title: "NFL Game Crosstalk"
author: "Ellis Hughes"
date: "12/18/2019"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

# devtools::install_github(repo = "maksimhorowitz/nflscrapR")
# install.packages("teamcolors")
# install.packages("highcharter")

library(nflscrapR)
library(tidyverse)
library(teamcolors)
library(plotly)

# Pull out the Steelers and Chief colors:
nfl_teamcolors <- teamcolors %>% filter(league == "nfl")
colors <- nfl_teamcolors %>%
  filter(name == c("Seattle Seahawks", "Tampa Bay Buccaneers")) %>%
  mutate(
    name = case_when(
      name == "Seattle Seahawks" ~ "home",
      name == "Tampa Bay Buccaneers" ~ "away"
    )
  ) %>%
  select(team = name, team_color = primary)


```

NFL games flow back and forth throughout the game. For a period of time one team could have a high probability of winning.
The next minute, the opposing team could figure it out and storm back for the W.

```{r load_data}

week_9_games <- scrape_game_ids(2019, weeks = 9)

SEA_vs_TB_pbp <- week_9_games %>%
  filter(home_team == "SEA") %>%
  pull(game_id) %>%
  scrape_json_play_by_play()

```

## Win probability


```{r Win-Probability, echo=FALSE}


interpolate_vals <- function(df,x,y){

  df2 <- lapply(seq(1, nrow(df) - 1),function(idx,dfx,dfy){
    dx1 <- dfx[idx]
    dx2 <- dfx[idx + 1]

    dy1 <- dfy[idx]
    dy2 <- dfy[idx + 1]

    slope = (dy2 - dy1) / (dx2 - dx1)

    data.frame(
      x = seq(dx1, dx2 - 1),
      y = dy1 + (slope * (seq(dx1, dx2 - 1) - dx1))
    )
  },df[[x]],df[[y]])

  df2 %>%
    bind_rows() %>%
    setNames(c(x,y))
}


# Now generate the win probability chart:
probabilities <- SEA_vs_TB_pbp %>%
  filter(!is.na(home_wp),
         !is.na(away_wp)) %>%
  dplyr::select(
    qtr,
    game_seconds_remaining,
    home_wp
    ) %>%
  mutate(game_seconds =
           if_else(qtr != 5,
                   abs(game_seconds_remaining - 3600),
                   3600 + abs((game_seconds_remaining - 600))
                   )) %>%
  filter(game_seconds != 4200 ) %>%
  arrange(game_seconds) %>%
  distinct(game_seconds,.keep_all = TRUE) %>%
  interpolate_vals("game_seconds","home_wp") %>%
  mutate( team = case_when(
    home_wp > .5 ~ "home",
    home_wp < .5 ~ "away",
    home_wp == .5 ~ NA_character_
  )) %>%
  left_join(
    colors, by = "team"
  )


```


```{r win-probability-chart}

ggplot(probabilities) +
  geom_segment(aes(
    x = game_seconds,
    y = home_wp,
    xend = lead(game_seconds),
    yend = lead(home_wp),
    color = I(team_color),
    group = 1
    )) +
  ggtitle(label = "Win Probability - SEA vs TB Week 9") +
  xlab("GameTime") +
  ylab(NULL) +
  scale_y_continuous(
    position = "right",
    breaks = c(0,.25,.50,.75,1),
    labels = c("Away\n\n 100%","75%","50%","75%","Home\n\n100%"),
    limits = c(0,1)
  ) +
  scale_x_reverse(
    breaks = c(0,900,1800,2700,3600),
    labels = c("Q1","Q2","Q3","Q4","OT"),
    ) +
  coord_flip()

ggplotly()

```
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
	---
	title: "NFL Game Crosstalk"
	author: "Ellis Hughes"
	date: "12/18/2019"
	output: html_document
	---

	```{r setup, include=FALSE}
	knitr::opts_chunk$set(echo = TRUE)

	# devtools::install_github(repo = "maksimhorowitz/nflscrapR")
	# install.packages("teamcolors")
	# install.packages("highcharter")

	library(nflscrapR)
	library(tidyverse)
	library(teamcolors)
	library(plotly)

	# Pull out the Steelers and Chief colors:
	nfl_teamcolors <- teamcolors %>% filter(league == "nfl")
	colors <- nfl_teamcolors %>%
	filter(name == c("Seattle Seahawks", "Tampa Bay Buccaneers")) %>%
	mutate(
	name = case_when(
	name == "Seattle Seahawks" ~ "home",
	name == "Tampa Bay Buccaneers" ~ "away"
	)
	) %>%
	select(team = name, team_color = primary)


	```

	NFL games flow back and forth throughout the game. For a period of time one team could have a high probability of winning.
	The next minute, the opposing team could figure it out and storm back for the W.

	```{r load_data}

	week_9_games <- scrape_game_ids(2019, weeks = 9)

	SEA_vs_TB_pbp <- week_9_games %>%
	filter(home_team == "SEA") %>%
	pull(game_id) %>%
	scrape_json_play_by_play()

	```

	## Win probability


	```{r Win-Probability, echo=FALSE}


	interpolate_vals <- function(df,x,y){

	df2 <- lapply(seq(1, nrow(df) - 1),function(idx,dfx,dfy){
	dx1 <- dfx[idx]
	dx2 <- dfx[idx + 1]

	dy1 <- dfy[idx]
	dy2 <- dfy[idx + 1]

	slope = (dy2 - dy1) / (dx2 - dx1)

	data.frame(
	x = seq(dx1, dx2 - 1),
	y = dy1 + (slope * (seq(dx1, dx2 - 1) - dx1))
	)
	},df[[x]],df[[y]])

	df2 %>%
	bind_rows() %>%
	setNames(c(x,y))
	}


	# Now generate the win probability chart:
	probabilities <- SEA_vs_TB_pbp %>%
	filter(!is.na(home_wp),
	!is.na(away_wp)) %>%
	dplyr::select(
	qtr,
	game_seconds_remaining,
	home_wp
	) %>%
	mutate(game_seconds =
	if_else(qtr != 5,
	abs(game_seconds_remaining - 3600),
	3600 + abs((game_seconds_remaining - 600))
	)) %>%
	filter(game_seconds != 4200 ) %>%
	arrange(game_seconds) %>%
	distinct(game_seconds,.keep_all = TRUE) %>%
	interpolate_vals("game_seconds","home_wp") %>%
	mutate( team = case_when(
	home_wp > .5 ~ "home",
	home_wp < .5 ~ "away",
	home_wp == .5 ~ NA_character_
	)) %>%
	left_join(
	colors, by = "team"
	)




	```


	```{r win-probability-chart}

	ggplot(probabilities) +
	geom_segment(aes(
	x = game_seconds,
	y = home_wp,
	xend = lead(game_seconds),
	yend = lead(home_wp),
	color = I(team_color),
	group = 1
	)) +
	ggtitle(label = "Win Probability - SEA vs TB Week 9") +
	xlab("GameTime") +
	ylab(NULL) +
	scale_y_continuous(
	position = "right",
	breaks = c(0,.25,.50,.75,1),
	labels = c("Away\n\n 100%","75%","50%","75%","Home\n\n100%"),
	limits = c(0,1)
	) +
	scale_x_reverse(
	breaks = c(0,900,1800,2700,3600),
	labels = c("Q1","Q2","Q3","Q4","OT"),
	) +
	coord_flip()

	ggplotly()

	```
	Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.