Skip to content

Instantly share code, notes, and snippets.

@msjgriffiths
Created September 22, 2021 00:25
Show Gist options
  • Save msjgriffiths/8c95b67459843610f474be81ff182546 to your computer and use it in GitHub Desktop.
Save msjgriffiths/8c95b67459843610f474be81ff182546 to your computer and use it in GitHub Desktop.
NYC Crash Data
library(tidyverse)
library(patchwork)
crashes <- read_csv("~/Downloads/Motor_Vehicle_Collisions_-_Crashes.csv")
vehicles <- read_csv("~/Downloads/Motor_Vehicle_Collisions_-_Vehicles.csv")
vehicles %>%
group_by(COLLISION_ID) %>%
summarise(
out_of_state = sum(STATE_REGISTRATION != "NY", na.rm = TRUE),
unknown = max(is.na(STATE_REGISTRATION), na.rm = TRUE)
) %>%
ungroup() ->
state_info
crashes %>%
mutate(
is_fatal = `NUMBER OF PERSONS KILLED` > 0,
has_injury = `NUMBER OF PERSONS INJURED` > 0
) %>% View()
select(COLLISION_ID, is_fatal, `CRASH DATE`, has_injury) %>%
inner_join(state_info) %>%
mutate(has_out_of_state = out_of_state > 0) ->
crash_data
crash_data %>%
na.omit() %>%
mutate(
date = as.Date(`CRASH DATE`, format = "%m/%d/%Y"),
week = lubridate::floor_date(date, "month"),
has_out_of_state = ifelse(has_out_of_state, "Accident had Out of State Driver", "No out of state driver")
) %>%
group_by(has_out_of_state, week) %>%
summarise(k = mean(is_fatal)) %>%
ggplot(aes(week, k, color = has_out_of_state)) +
geom_point() +
geom_smooth(method = "gam", formula = y ~ s(x, bs = "gp", k = 50)) +
theme_minimal() +
facet_wrap(~ has_out_of_state, ncol = 2) +
theme(legend.position = "none") +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Month",
y = "% Crashes with Fatalities"
) -> g1
crash_data %>%
na.omit() %>%
mutate(
date = as.Date(`CRASH DATE`, format = "%m/%d/%Y"),
week = lubridate::floor_date(date, "month"),
has_out_of_state = ifelse(has_out_of_state, "Accident had Out of State Driver", "No out of state driver")
) %>%
group_by(has_out_of_state, week) %>%
summarise(k = mean(has_injury)) %>%
ggplot(aes(week, k, color = has_out_of_state)) +
geom_point() +
geom_smooth(method = "gam", formula = y ~ s(x, bs = "gp", k = 50)) +
theme_minimal() +
facet_wrap(~ has_out_of_state, ncol = 2) +
theme(legend.position = "none") +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Month",
y = "% Crashes with Injuries"
) -> g2
g1 / g2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment