library(tidyr) | |
library(dplyr) | |
library(stringr) | |
library(ggplot2) | |
cities_select <- c("HOUSTON","SAN ANTONIO","DALLAS","AUSTIN","FORT WORTH","EL PASO") | |
#---------------------------------------- | |
# get population estimates | |
#---------------------------------------- | |
# get 2000-2010 | |
pop_ests_2000 <- read.csv("http://www.census.gov/popest/data/intercensal/cities/files/SUB-EST00INT.csv", | |
stringsAsFactors = FALSE) | |
filter(pop_ests_2000, STNAME=="Texas" & SUMLEV==162 & toupper(NAME) %in% paste(cities_select, "CITY")) %>% | |
select(NAME, POPESTIMATE2000:POPESTIMATE2009, POPESTIMATE2010) %>% | |
gather("Year","Pop", POPESTIMATE2000:POPESTIMATE2010) %>% | |
mutate(City = toupper(str_sub(NAME, end = -6L)), | |
Year = as.integer(str_sub(Year, start = -4L)), | |
source = "2000-2010") %>% | |
select(-NAME) -> | |
pop_by_city_2000 | |
# get 2010-2014 | |
pop_ests_2010 <- read.csv("http://www.census.gov/popest/data/cities/totals/2014/files/SUB-EST2014_48.csv", | |
stringsAsFactors = FALSE) | |
filter(pop_ests_2010, SUMLEV==162 & toupper(NAME) %in% paste(cities_select, "CITY")) %>% | |
select(NAME, POPESTIMATE2010:POPESTIMATE2014) %>% | |
gather("Year","Pop", POPESTIMATE2010:POPESTIMATE2014) %>% | |
mutate(City = toupper(str_sub(NAME, end = -6L)), | |
Year = as.integer(str_sub(Year, start = -4L)), | |
source = "2010-2014") %>% | |
select(-NAME) -> | |
pop_by_city_2010 | |
# adjust so 2010 agrees across sources | |
filter(pop_by_city_2000, Year==2010) %>% | |
select(City, Pop_old = Pop) %>% | |
left_join(select(filter(pop_by_city_2010, Year==2010), City, Pop), by = "City") %>% | |
mutate(ratio = Pop / Pop_old) %>% | |
select(City, ratio) -> | |
pop_adj_ratio | |
left_join(pop_by_city_2000, pop_adj_ratio, by = "City") %>% | |
mutate(Pop = Pop * ratio * max((Year - 2005) / 5, 0)) %>% | |
filter(2006 <= Year, Year <= 2009) %>% | |
select(-ratio) -> | |
pop_by_city_2000_adj | |
pop_by_city <- rbind(pop_by_city_2000_adj, pop_by_city_2010) | |
# add projection for 2015 | |
lm_pop <- lm(Pop ~ 0 + City + Year:City, data = pop_by_city_2010) | |
pop2015 <- filter(pop_by_city, Year==2014) %>% mutate(Year = 2015) | |
pop2015$Pop <- predict(lm_pop, newdata = pop2015) | |
pop_by_city <- rbind(pop_by_city, pop2015) | |
ggplot(pop_by_city, aes(Year, Pop, color = City, shape = City, linetype = source)) + | |
geom_point() + geom_line() + | |
theme_minimal() | |
#--------------------------------------- | |
# prepare crash data | |
#--------------------------------------- | |
crash_dat <- read.csv("http://blogs.edb.utexas.edu/pusto/files/2015/08/Yearly_crash_data_by_city.csv", | |
stringsAsFactors=FALSE) | |
select(crash_dat, City, Year, Fatal_crashes:Incapacitating_injuries, Total_crashes) %>% | |
filter(City %in% cities_select) %>% droplevels() %>% | |
mutate(Fatality_rate = Fatal_crashes / Total_crashes) %>% | |
gather("quantity","n", Fatal_crashes:Total_crashes, Fatality_rate) %>% | |
filter(!is.na(n)) %>% | |
left_join(pop_by_city, by = c("City","Year")) %>% | |
mutate(partial = (Year==2015), | |
quantity = str_replace(quantity,"_"," "), | |
n_per = n / Pop * 100000, | |
projection = "actual") -> | |
crashes | |
#------------------------------- | |
# Crashes per 100K residents | |
#------------------------------- | |
filter(crashes, Year==2015) %>% mutate(n = 0, n_per = 0) -> | |
crashes_axis | |
filter(crashes, Year==2015) %>% | |
mutate(n = n * 12 / 7, n_per = n_per * 12/ 7, projection = "projected") -> | |
crashes_projected | |
crashes <- rbind(crashes, crashes_projected) | |
filter(crashes, quantity != "Fatality rate", | |
Year < 2015 | projection == "projected") %>% droplevels() -> | |
crashes_total | |
crash_labels <- filter(crashes_total, Year==2015) | |
crashes_per <- | |
ggplot(crashes_total, aes(Year, n_per, color = City, shape = City)) + | |
geom_line() + geom_point() + | |
geom_text(data = crash_labels, aes(x = 2015, y = n_per, label = City), hjust=-0.1, size = 3) + | |
geom_blank(data = filter(crashes_axis, quantity!="Fatality rate")) + | |
scale_color_brewer(type = "qual", palette=7) + | |
scale_x_continuous(limits = c(2006, 2016), | |
breaks = 2006:2015, labels = c(2006:2014, "2015 \n (Projected)")) + | |
facet_wrap(~ quantity, ncol = 1, scales = "free") + | |
theme_minimal() + theme(legend.position="none") + | |
labs(color = "", shape = "", y = "Crashes/Fatalities per 100,000 residents", | |
title = "Annual crashes and fatalities/injuries per 100,000 residents \n in selected Texas cities (2006-2015)") | |
arrange(crashes_total, quantity, City, Year) %>% | |
group_by(quantity, City) %>% | |
mutate(change = n_per - lag(n_per)) %>% | |
filter(!is.na(change) & quantity %in% c("Fatal crashes","Fatalities")) -> | |
crashes_change | |
summary(filter(crashes_change, quantity=="Fatal crashes")$change) | |
summary(filter(crashes_change, quantity=="Fatalities")$change) | |
ggplot(crashes_change, aes(change, color = quantity)) + geom_density() | |
#------------------------------- | |
# Total crashes | |
#------------------------------- | |
crashes_total_fig <- | |
ggplot(crashes_total, aes(Year, n, color = City, shape = City)) + | |
geom_line() + geom_point() + | |
geom_text(data = crash_labels, aes(x = 2015, y = n, label = City), hjust=-0.1, size = 3) + | |
geom_blank(data = filter(crashes_axis, quantity!="Fatality rate")) + | |
scale_color_brewer(type = "qual", palette=7) + | |
scale_x_continuous(limits = c(2006, 2016), | |
breaks = 2006:2015, labels = c(2006:2014, "2015 \n (Projected)")) + | |
facet_wrap(~ quantity, ncol = 1, scales = "free") + | |
theme_minimal() + theme(legend.position="none") + | |
labs(color = "", shape = "", y = "Total Crashes/Fatalities", | |
title = "Annual total crashes and fatalities/injuries \n in selected Texas cities (2006-2015)") | |
#------------------------------- | |
# Fatality rate figure | |
#------------------------------- | |
fatality_rate <- filter(crashes, quantity=="Fatality rate" & projection=="actual") | |
crash_labels <- filter(fatality_rate, Year==2015) | |
fatality_rates <- | |
ggplot(fatality_rate, aes(Year, n, color = City, shape = City)) + | |
geom_line() + geom_point() + | |
geom_blank(data = filter(crashes_axis, quantity=="Fatality rate")) + | |
geom_text(data = crash_labels, aes(x = 2015, y = n, label = City), hjust=-0.1, size = 3) + | |
scale_color_brewer(type = "qual", palette=7) + | |
scale_x_continuous(limits = c(2006, 2016), breaks = 2006:2015, | |
labels = c(2006:2014, "2015 \n (Through 7/31)")) + | |
theme_minimal() + theme(legend.position="none") + | |
labs(color = "", shape = "", y = "Fatal crashes / Total crashes", | |
title = "Proportion of crashes that are fatal \n in selected Texas cities (2010-2015)") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment