Created
July 22, 2022 14:29
-
-
Save tor-gu/c03164e55d851b2e31c87ea5b5b6591b to your computer and use it in GitHub Desktop.
Build nj_municipality map and combined_table for use of force data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Tidyverse packages | |
library(dplyr) | |
library(magrittr) | |
library(purrr) | |
# Maps and census libraries | |
# *** Note: CENSUS API Key required *** | |
library(sf) | |
library(tidycensus) | |
library(tigris) | |
# NJ OAG data packages | |
library(njoagleod) | |
library(njoaguof) | |
# Recommended option (uncomment) | |
# options(tigris_use_cache = TRUE) | |
# NJ Geography ---- | |
## Get a list of NJ counties | |
counties <- municipality %>% pull(county) %>% unique() %>% sort() | |
## Get a map of NJ municipalities using tigris | |
nj_municipality_map <- counties %>% | |
map_df(~ county_subdivisions("NJ", county = ., class = "sf")) | |
## Get the towns that border the Atlantic Ocean | |
jersey_shore_water <- nj_municipality_map %>% | |
filter(ALAND == 0) %>% | |
filter(GEOID != "3401100000") | |
jersey_shore_indices <- st_intersects(jersey_shore_water, | |
nj_municipality_map) %>% | |
unlist() %>% unique() | |
## Now build a table of shore towns | |
municipality_shore <- nj_municipality_map %>% | |
as_tibble() %>% | |
mutate(shore_town = FALSE) | |
municipality_shore$shore_town[jersey_shore_indices] = TRUE | |
municipality_shore <- municipality_shore %>% | |
filter(ALAND != 0) %>% | |
select(GEOID, shore_town) | |
# Agency officer summary from njoagleod ---- | |
officer_info <- officer %>% group_by(agency_name, agency_county) %>% | |
summarize( | |
officer_count = n(), | |
officer_r_male = sum(officer_gender == "Male", na.rm = TRUE) / n(), | |
officer_mean_age = mean(officer_age, na.rm = TRUE), | |
officer_r_race_na = sum(is.na(officer_race) / n()), | |
officer_r_white = sum(officer_race == "White", na.rm = TRUE) / n(), | |
officer_r_black = sum(officer_race == "Black", na.rm = TRUE) / n(), | |
officer_r_asian = sum(officer_race == "Asian", na.rm = TRUE) / n(), | |
officer_r_hispanic_or_latino = | |
sum(officer_race == "Hispanic", na.rm = TRUE) / n(), | |
officer_r_race_other = 1 - sum(c_across( | |
officer_r_race_na:officer_r_hispanic_or_latino | |
)), | |
.groups = "drop" | |
) %>% | |
left_join(agency, by = c("agency_name", "agency_county")) %>% | |
filter(!is.na(municipality)) %>% | |
left_join(municipality, by = c("agency_county"="county", "municipality")) %>% | |
select( | |
GEOID, | |
officer_count, | |
officer_mean_age, | |
officer_r_male, | |
officer_r_white:officer_r_hispanic_or_latino, | |
officer_r_race_other, | |
officer_r_race_na | |
) | |
# Municipality demographics via tidycensus ---- | |
## Get population and density (2019 / get_estimates) | |
municipality_population <- counties %>% | |
map_df( | |
~ get_estimates( | |
geography = "county subdivision", | |
state = "NJ", | |
county = ., | |
year = 2019, | |
output = "wide", | |
product = "population" | |
) | |
) %>% | |
select(GEOID, population = POP, density = DENSITY) | |
## Get race/enthnicity ratios (2010 / get_decennial) | |
municipality_race_ethnicity_ratios <- counties %>% | |
map_df( | |
~ get_decennial( | |
geography = "county subdivision", | |
state = "NJ", | |
county = ., | |
year = 2010, | |
output = "wide", | |
variables = c( | |
population = "P001001", | |
white = "P003002", | |
black = "P003003", | |
native_american = "P003004", | |
asian = "P003005", | |
pacific_islander = "P003006", | |
other_races = "P003007", | |
two_or_more_races = "P003008", | |
non_hispanic_or_latino = "P004002", | |
hispanic_or_latino = "P004003" | |
) | |
) | |
) %>% | |
filter(population != 0) %>% | |
# Convert totals to ratios | |
mutate(across(white:hispanic_or_latino, ~ .x / population)) %>% | |
rename_with( ~ paste0("r_", .), white:hispanic_or_latino) %>% | |
select(GEOID, r_white:r_hispanic_or_latino) | |
## Combine the two Princetons. | |
# Township: Population 16265, GEOID 3402160915 | |
# Borough: Population 12307 GEOID 3402160900 | |
combined_princeton <- municipality_race_ethnicity_ratios %>% | |
filter(GEOID %in% c("3402160900", "3402160915")) %>% | |
arrange(GEOID) %>% | |
mutate(weight = c(12307, 16265)) %>% | |
summarize(across(r_white:r_hispanic_or_latino, | |
~ sum(.x * weight) / sum(weight))) %>% | |
mutate(GEOID = "3402160900") %>% | |
relocate(GEOID) | |
## Update the table with new ratios for Princeton Borough | |
municipality_race_ethnicity_ratios <- municipality_race_ethnicity_ratios %>% | |
rows_update(combined_princeton, by="GEOID") | |
## Get median household income (acs 2019) | |
municipality_income <- counties %>% | |
map_df( | |
~ get_acs( | |
geography = "county subdivision", | |
state = "NJ", | |
county = ., | |
year = 2019, | |
output = "wide", | |
variables = c(household_median_income = "B19013_001") | |
) | |
) %>% | |
select(GEOID, household_median_income = household_median_incomeE) %>% | |
filter(!is.na(household_median_income)) | |
# Use of force incident rates from njoaguof ---- | |
## Estimating when reporting started | |
initial_incident_2021_estimates <- incident %>% | |
# Find the first incident and the number of incidents after | |
group_by(agency_county, agency_name) %>% | |
summarize( | |
first_incident = min(incident_date_1), | |
incidents_after_first = sum(lubridate::year(incident_date_1) == 2021) - 1, | |
.groups = "drop" | |
) %>% | |
filter(lubridate::year(first_incident) == 2021) %>% | |
mutate( | |
# Estimate lambda | |
lambda_est_period = lubridate::ymd("2022-01-01") - first_incident, | |
lambda_est = incidents_after_first / as.double(lambda_est_period), | |
# Estimate t_0 | |
t_0_est = case_when( | |
incidents_after_first == 0 ~ as.Date(NA), | |
TRUE ~ first_incident - exp(-lambda_est) / (1 - exp(-lambda_est)), | |
), | |
# Estimate the partial year | |
reporting_days_est = lubridate::ymd("2022-01-01") - | |
pmax(t_0_est, lubridate::ymd("2021-01-01")), | |
partial_year = as.double(reporting_days_est) / | |
as.double(lubridate::ymd("2022-01-01") - lubridate::ymd("2021-01-01")) | |
) %>% | |
# Filter out non-municipal agencies | |
left_join(agency, by = c("agency_county", "agency_name")) %>% | |
filter(!is.na(municipality)) %>% | |
select(agency_county, municipality, lambda_est, t_0_est, partial_year) | |
## Incident rate table for 2021 | |
incident_rate_2021 <- incident %>% | |
# Get the 2021 incident count | |
filter(lubridate::year(incident_date_1) <= 2021) %>% | |
group_by(agency_county, agency_name) %>% | |
summarize(incident_count = sum(lubridate::year(incident_date_1) == 2021), | |
.groups = "drop") %>% | |
# Filter out the non-municipal agencies | |
left_join(agency, by = c("agency_county", "agency_name")) %>% | |
filter(!is.na(municipality)) %>% | |
select(agency_county, municipality, incident_count) %>% | |
# Add a partial_year field, initialed to 1.0, and then update | |
# with the initial_incident_2021_estimates | |
mutate(partial_year = 1.0) %>% | |
rows_update( | |
select( | |
initial_incident_2021_estimates, | |
agency_county, | |
municipality, | |
partial_year | |
), | |
by = c("agency_county", "municipality") | |
) %>% | |
# Estimate the 2021 yearly rate | |
mutate(incident_rate_est = incident_count / partial_year) | |
## Clean up and use GEOID as primary key | |
municipality_incident_rate <- incident_rate_2021 %>% | |
left_join(municipality, | |
by = c("agency_county" = "county", "municipality")) %>% | |
select(GEOID, incident_count, partial_year, incident_rate_est) | |
# Combined table ---- | |
combined_table <- officer_info %>% | |
inner_join(municipality_population, by="GEOID") %>% | |
inner_join(municipality_race_ethnicity_ratios, by="GEOID") %>% | |
inner_join(municipality_income, by="GEOID") %>% | |
inner_join(municipality_shore, by="GEOID") %>% | |
inner_join(municipality_incident_rate, by="GEOID") | |
# Save the map and the combined table --- | |
# Uncomment to save | |
# save(nj_municipality_map, combined_table, file="use_of_force_data.Rda") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment