Skip to content

Instantly share code, notes, and snippets.

@tor-gu
Created July 22, 2022 14:29
Show Gist options
  • Save tor-gu/c03164e55d851b2e31c87ea5b5b6591b to your computer and use it in GitHub Desktop.
Save tor-gu/c03164e55d851b2e31c87ea5b5b6591b to your computer and use it in GitHub Desktop.
Build nj_municipality map and combined_table for use of force data
# Tidyverse packages
library(dplyr)
library(magrittr)
library(purrr)
# Maps and census libraries
# *** Note: CENSUS API Key required ***
library(sf)
library(tidycensus)
library(tigris)
# NJ OAG data packages
library(njoagleod)
library(njoaguof)
# Recommended option (uncomment)
# options(tigris_use_cache = TRUE)
# NJ Geography ----
## Get a list of NJ counties
counties <- municipality %>% pull(county) %>% unique() %>% sort()
## Get a map of NJ municipalities using tigris
nj_municipality_map <- counties %>%
map_df(~ county_subdivisions("NJ", county = ., class = "sf"))
## Get the towns that border the Atlantic Ocean
jersey_shore_water <- nj_municipality_map %>%
filter(ALAND == 0) %>%
filter(GEOID != "3401100000")
jersey_shore_indices <- st_intersects(jersey_shore_water,
nj_municipality_map) %>%
unlist() %>% unique()
## Now build a table of shore towns
municipality_shore <- nj_municipality_map %>%
as_tibble() %>%
mutate(shore_town = FALSE)
municipality_shore$shore_town[jersey_shore_indices] = TRUE
municipality_shore <- municipality_shore %>%
filter(ALAND != 0) %>%
select(GEOID, shore_town)
# Agency officer summary from njoagleod ----
officer_info <- officer %>% group_by(agency_name, agency_county) %>%
summarize(
officer_count = n(),
officer_r_male = sum(officer_gender == "Male", na.rm = TRUE) / n(),
officer_mean_age = mean(officer_age, na.rm = TRUE),
officer_r_race_na = sum(is.na(officer_race) / n()),
officer_r_white = sum(officer_race == "White", na.rm = TRUE) / n(),
officer_r_black = sum(officer_race == "Black", na.rm = TRUE) / n(),
officer_r_asian = sum(officer_race == "Asian", na.rm = TRUE) / n(),
officer_r_hispanic_or_latino =
sum(officer_race == "Hispanic", na.rm = TRUE) / n(),
officer_r_race_other = 1 - sum(c_across(
officer_r_race_na:officer_r_hispanic_or_latino
)),
.groups = "drop"
) %>%
left_join(agency, by = c("agency_name", "agency_county")) %>%
filter(!is.na(municipality)) %>%
left_join(municipality, by = c("agency_county"="county", "municipality")) %>%
select(
GEOID,
officer_count,
officer_mean_age,
officer_r_male,
officer_r_white:officer_r_hispanic_or_latino,
officer_r_race_other,
officer_r_race_na
)
# Municipality demographics via tidycensus ----
## Get population and density (2019 / get_estimates)
municipality_population <- counties %>%
map_df(
~ get_estimates(
geography = "county subdivision",
state = "NJ",
county = .,
year = 2019,
output = "wide",
product = "population"
)
) %>%
select(GEOID, population = POP, density = DENSITY)
## Get race/enthnicity ratios (2010 / get_decennial)
municipality_race_ethnicity_ratios <- counties %>%
map_df(
~ get_decennial(
geography = "county subdivision",
state = "NJ",
county = .,
year = 2010,
output = "wide",
variables = c(
population = "P001001",
white = "P003002",
black = "P003003",
native_american = "P003004",
asian = "P003005",
pacific_islander = "P003006",
other_races = "P003007",
two_or_more_races = "P003008",
non_hispanic_or_latino = "P004002",
hispanic_or_latino = "P004003"
)
)
) %>%
filter(population != 0) %>%
# Convert totals to ratios
mutate(across(white:hispanic_or_latino, ~ .x / population)) %>%
rename_with( ~ paste0("r_", .), white:hispanic_or_latino) %>%
select(GEOID, r_white:r_hispanic_or_latino)
## Combine the two Princetons.
# Township: Population 16265, GEOID 3402160915
# Borough: Population 12307 GEOID 3402160900
combined_princeton <- municipality_race_ethnicity_ratios %>%
filter(GEOID %in% c("3402160900", "3402160915")) %>%
arrange(GEOID) %>%
mutate(weight = c(12307, 16265)) %>%
summarize(across(r_white:r_hispanic_or_latino,
~ sum(.x * weight) / sum(weight))) %>%
mutate(GEOID = "3402160900") %>%
relocate(GEOID)
## Update the table with new ratios for Princeton Borough
municipality_race_ethnicity_ratios <- municipality_race_ethnicity_ratios %>%
rows_update(combined_princeton, by="GEOID")
## Get median household income (acs 2019)
municipality_income <- counties %>%
map_df(
~ get_acs(
geography = "county subdivision",
state = "NJ",
county = .,
year = 2019,
output = "wide",
variables = c(household_median_income = "B19013_001")
)
) %>%
select(GEOID, household_median_income = household_median_incomeE) %>%
filter(!is.na(household_median_income))
# Use of force incident rates from njoaguof ----
## Estimating when reporting started
initial_incident_2021_estimates <- incident %>%
# Find the first incident and the number of incidents after
group_by(agency_county, agency_name) %>%
summarize(
first_incident = min(incident_date_1),
incidents_after_first = sum(lubridate::year(incident_date_1) == 2021) - 1,
.groups = "drop"
) %>%
filter(lubridate::year(first_incident) == 2021) %>%
mutate(
# Estimate lambda
lambda_est_period = lubridate::ymd("2022-01-01") - first_incident,
lambda_est = incidents_after_first / as.double(lambda_est_period),
# Estimate t_0
t_0_est = case_when(
incidents_after_first == 0 ~ as.Date(NA),
TRUE ~ first_incident - exp(-lambda_est) / (1 - exp(-lambda_est)),
),
# Estimate the partial year
reporting_days_est = lubridate::ymd("2022-01-01") -
pmax(t_0_est, lubridate::ymd("2021-01-01")),
partial_year = as.double(reporting_days_est) /
as.double(lubridate::ymd("2022-01-01") - lubridate::ymd("2021-01-01"))
) %>%
# Filter out non-municipal agencies
left_join(agency, by = c("agency_county", "agency_name")) %>%
filter(!is.na(municipality)) %>%
select(agency_county, municipality, lambda_est, t_0_est, partial_year)
## Incident rate table for 2021
incident_rate_2021 <- incident %>%
# Get the 2021 incident count
filter(lubridate::year(incident_date_1) <= 2021) %>%
group_by(agency_county, agency_name) %>%
summarize(incident_count = sum(lubridate::year(incident_date_1) == 2021),
.groups = "drop") %>%
# Filter out the non-municipal agencies
left_join(agency, by = c("agency_county", "agency_name")) %>%
filter(!is.na(municipality)) %>%
select(agency_county, municipality, incident_count) %>%
# Add a partial_year field, initialed to 1.0, and then update
# with the initial_incident_2021_estimates
mutate(partial_year = 1.0) %>%
rows_update(
select(
initial_incident_2021_estimates,
agency_county,
municipality,
partial_year
),
by = c("agency_county", "municipality")
) %>%
# Estimate the 2021 yearly rate
mutate(incident_rate_est = incident_count / partial_year)
## Clean up and use GEOID as primary key
municipality_incident_rate <- incident_rate_2021 %>%
left_join(municipality,
by = c("agency_county" = "county", "municipality")) %>%
select(GEOID, incident_count, partial_year, incident_rate_est)
# Combined table ----
combined_table <- officer_info %>%
inner_join(municipality_population, by="GEOID") %>%
inner_join(municipality_race_ethnicity_ratios, by="GEOID") %>%
inner_join(municipality_income, by="GEOID") %>%
inner_join(municipality_shore, by="GEOID") %>%
inner_join(municipality_incident_rate, by="GEOID")
# Save the map and the combined table ---
# Uncomment to save
# save(nj_municipality_map, combined_table, file="use_of_force_data.Rda")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment