Created
September 22, 2021 00:25
-
-
Save msjgriffiths/8c95b67459843610f474be81ff182546 to your computer and use it in GitHub Desktop.
NYC Crash Data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(patchwork) | |
crashes <- read_csv("~/Downloads/Motor_Vehicle_Collisions_-_Crashes.csv") | |
vehicles <- read_csv("~/Downloads/Motor_Vehicle_Collisions_-_Vehicles.csv") | |
vehicles %>% | |
group_by(COLLISION_ID) %>% | |
summarise( | |
out_of_state = sum(STATE_REGISTRATION != "NY", na.rm = TRUE), | |
unknown = max(is.na(STATE_REGISTRATION), na.rm = TRUE) | |
) %>% | |
ungroup() -> | |
state_info | |
crashes %>% | |
mutate( | |
is_fatal = `NUMBER OF PERSONS KILLED` > 0, | |
has_injury = `NUMBER OF PERSONS INJURED` > 0 | |
) %>% View() | |
select(COLLISION_ID, is_fatal, `CRASH DATE`, has_injury) %>% | |
inner_join(state_info) %>% | |
mutate(has_out_of_state = out_of_state > 0) -> | |
crash_data | |
crash_data %>% | |
na.omit() %>% | |
mutate( | |
date = as.Date(`CRASH DATE`, format = "%m/%d/%Y"), | |
week = lubridate::floor_date(date, "month"), | |
has_out_of_state = ifelse(has_out_of_state, "Accident had Out of State Driver", "No out of state driver") | |
) %>% | |
group_by(has_out_of_state, week) %>% | |
summarise(k = mean(is_fatal)) %>% | |
ggplot(aes(week, k, color = has_out_of_state)) + | |
geom_point() + | |
geom_smooth(method = "gam", formula = y ~ s(x, bs = "gp", k = 50)) + | |
theme_minimal() + | |
facet_wrap(~ has_out_of_state, ncol = 2) + | |
theme(legend.position = "none") + | |
scale_y_continuous(labels = scales::percent) + | |
labs( | |
x = "Month", | |
y = "% Crashes with Fatalities" | |
) -> g1 | |
crash_data %>% | |
na.omit() %>% | |
mutate( | |
date = as.Date(`CRASH DATE`, format = "%m/%d/%Y"), | |
week = lubridate::floor_date(date, "month"), | |
has_out_of_state = ifelse(has_out_of_state, "Accident had Out of State Driver", "No out of state driver") | |
) %>% | |
group_by(has_out_of_state, week) %>% | |
summarise(k = mean(has_injury)) %>% | |
ggplot(aes(week, k, color = has_out_of_state)) + | |
geom_point() + | |
geom_smooth(method = "gam", formula = y ~ s(x, bs = "gp", k = 50)) + | |
theme_minimal() + | |
facet_wrap(~ has_out_of_state, ncol = 2) + | |
theme(legend.position = "none") + | |
scale_y_continuous(labels = scales::percent) + | |
labs( | |
x = "Month", | |
y = "% Crashes with Injuries" | |
) -> g2 | |
g1 / g2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment