tor-gu/build_combined_table.R

## build_combined_table.R
# Tidyverse packages
library(dplyr)
library(magrittr)
library(purrr)

# Maps and census libraries
# *** Note: CENSUS API Key required ***
library(sf)
library(tidycensus)
library(tigris)

# NJ OAG data packages
library(njoagleod)
library(njoaguof)

# Recommended option (uncomment)
# options(tigris_use_cache = TRUE)

# NJ Geography ----

## Get a list of NJ counties
counties <- municipality %>% pull(county) %>% unique() %>% sort()

## Get a map of NJ municipalities using tigris
nj_municipality_map <- counties %>%
  map_df(~ county_subdivisions("NJ", county = ., class = "sf"))

## Get the towns that border the Atlantic Ocean
jersey_shore_water <- nj_municipality_map %>%
  filter(ALAND == 0) %>%
  filter(GEOID != "3401100000")
jersey_shore_indices <- st_intersects(jersey_shore_water,
                                      nj_municipality_map) %>%
  unlist() %>% unique()

## Now build a table of shore towns
municipality_shore <- nj_municipality_map %>%
  as_tibble() %>%
  mutate(shore_town = FALSE)
municipality_shore$shore_town[jersey_shore_indices] = TRUE
municipality_shore <- municipality_shore %>%
  filter(ALAND != 0) %>%
  select(GEOID, shore_town)

# Agency officer summary from njoagleod ----

officer_info <- officer %>% group_by(agency_name, agency_county) %>%
  summarize(
    officer_count = n(),
    officer_r_male = sum(officer_gender == "Male", na.rm = TRUE) / n(),
    officer_mean_age = mean(officer_age, na.rm = TRUE),
    officer_r_race_na = sum(is.na(officer_race) / n()),
    officer_r_white = sum(officer_race == "White", na.rm = TRUE) / n(),
    officer_r_black = sum(officer_race == "Black", na.rm = TRUE) / n(),
    officer_r_asian = sum(officer_race == "Asian", na.rm = TRUE) / n(),
    officer_r_hispanic_or_latino =
      sum(officer_race == "Hispanic", na.rm = TRUE) / n(),
    officer_r_race_other = 1 - sum(c_across(
      officer_r_race_na:officer_r_hispanic_or_latino
    )),
    .groups = "drop"
  ) %>%
  left_join(agency, by = c("agency_name", "agency_county")) %>%
  filter(!is.na(municipality)) %>%
  left_join(municipality, by = c("agency_county"="county", "municipality")) %>%
  select(
    GEOID,
    officer_count,
    officer_mean_age,
    officer_r_male,
    officer_r_white:officer_r_hispanic_or_latino,
    officer_r_race_other,
    officer_r_race_na
  )

# Municipality demographics via tidycensus ----

## Get population and density  (2019 / get_estimates)
municipality_population <- counties %>%
  map_df(
    ~ get_estimates(
      geography = "county subdivision",
      state = "NJ",
      county = .,
      year = 2019,
      output = "wide",
      product = "population"
    )
  ) %>%
  select(GEOID, population = POP, density = DENSITY)

## Get race/enthnicity ratios (2010 / get_decennial)
municipality_race_ethnicity_ratios <- counties %>%
  map_df(
    ~ get_decennial(
      geography = "county subdivision",
      state = "NJ",
      county = .,
      year = 2010,
      output = "wide",
      variables = c(
        population = "P001001",
        white = "P003002",
        black = "P003003",
        native_american = "P003004",
        asian = "P003005",
        pacific_islander = "P003006",
        other_races = "P003007",
        two_or_more_races = "P003008",
        non_hispanic_or_latino = "P004002",
        hispanic_or_latino = "P004003"
      )
    )
  ) %>%
  filter(population != 0) %>%
  # Convert totals to ratios
  mutate(across(white:hispanic_or_latino, ~ .x / population)) %>%
  rename_with( ~ paste0("r_", .), white:hispanic_or_latino) %>%
  select(GEOID, r_white:r_hispanic_or_latino)

## Combine the two Princetons.
#   Township:  Population 16265,  GEOID 3402160915
#   Borough:   Population 12307   GEOID 3402160900
combined_princeton <- municipality_race_ethnicity_ratios %>%
  filter(GEOID %in% c("3402160900", "3402160915")) %>%
  arrange(GEOID) %>%
  mutate(weight = c(12307, 16265)) %>%
  summarize(across(r_white:r_hispanic_or_latino,
                   ~ sum(.x * weight) / sum(weight))) %>%
  mutate(GEOID = "3402160900") %>%
  relocate(GEOID)

## Update the table with new ratios for Princeton Borough
municipality_race_ethnicity_ratios <- municipality_race_ethnicity_ratios %>%
  rows_update(combined_princeton, by="GEOID")

## Get median household income (acs 2019)
municipality_income <- counties %>%
  map_df(
    ~ get_acs(
      geography = "county subdivision",
      state = "NJ",
      county = .,
      year = 2019,
      output = "wide",
      variables = c(household_median_income = "B19013_001")
    )
  ) %>%
  select(GEOID, household_median_income = household_median_incomeE) %>%
  filter(!is.na(household_median_income))

# Use of force incident rates from njoaguof ----

## Estimating when reporting started
initial_incident_2021_estimates <- incident %>%
  # Find the first incident and the number of incidents after
  group_by(agency_county, agency_name) %>%
  summarize(
    first_incident = min(incident_date_1),
    incidents_after_first = sum(lubridate::year(incident_date_1) == 2021) - 1,
    .groups = "drop"
  ) %>%
  filter(lubridate::year(first_incident) == 2021) %>%
  mutate(
    # Estimate lambda
    lambda_est_period = lubridate::ymd("2022-01-01") - first_incident,
    lambda_est = incidents_after_first / as.double(lambda_est_period),
    # Estimate t_0
    t_0_est = case_when(
      incidents_after_first == 0 ~ as.Date(NA),
      TRUE ~ first_incident - exp(-lambda_est) / (1 - exp(-lambda_est)),
    ),
    # Estimate the partial year
    reporting_days_est = lubridate::ymd("2022-01-01") -
      pmax(t_0_est, lubridate::ymd("2021-01-01")),
    partial_year = as.double(reporting_days_est) /
      as.double(lubridate::ymd("2022-01-01") - lubridate::ymd("2021-01-01"))
  ) %>%
  # Filter out non-municipal agencies
  left_join(agency, by = c("agency_county", "agency_name")) %>%
  filter(!is.na(municipality)) %>%
  select(agency_county, municipality, lambda_est, t_0_est, partial_year)

## Incident rate table for 2021
incident_rate_2021 <- incident %>%
  # Get the 2021 incident count
  filter(lubridate::year(incident_date_1) <= 2021) %>%
  group_by(agency_county, agency_name) %>%
  summarize(incident_count = sum(lubridate::year(incident_date_1) == 2021),
            .groups = "drop") %>%
  # Filter out the non-municipal agencies
  left_join(agency, by = c("agency_county", "agency_name")) %>%
  filter(!is.na(municipality)) %>%
  select(agency_county, municipality, incident_count) %>%
  # Add a partial_year field, initialed to 1.0, and then update
  # with the initial_incident_2021_estimates
  mutate(partial_year = 1.0) %>%
  rows_update(
    select(
      initial_incident_2021_estimates,
      agency_county,
      municipality,
      partial_year
    ),
    by = c("agency_county", "municipality")
  ) %>%
  # Estimate the 2021 yearly rate
  mutate(incident_rate_est = incident_count / partial_year)

## Clean up and use GEOID as primary key
municipality_incident_rate <- incident_rate_2021 %>%
  left_join(municipality,
            by = c("agency_county" = "county", "municipality")) %>%
  select(GEOID, incident_count, partial_year, incident_rate_est)

# Combined table ----

combined_table <- officer_info %>%
  inner_join(municipality_population, by="GEOID") %>%
  inner_join(municipality_race_ethnicity_ratios, by="GEOID") %>%
  inner_join(municipality_income, by="GEOID") %>%
  inner_join(municipality_shore, by="GEOID") %>%
  inner_join(municipality_incident_rate, by="GEOID")

# Save the map and the combined table ---
# Uncomment to save
# save(nj_municipality_map, combined_table, file="use_of_force_data.Rda")
	# Tidyverse packages
	library(dplyr)
	library(magrittr)
	library(purrr)

	# Maps and census libraries
	# * Note: CENSUS API Key required *
	library(sf)
	library(tidycensus)
	library(tigris)

	# NJ OAG data packages
	library(njoagleod)
	library(njoaguof)

	# Recommended option (uncomment)
	# options(tigris_use_cache = TRUE)

	# NJ Geography ----

	## Get a list of NJ counties
	counties <- municipality %>% pull(county) %>% unique() %>% sort()

	## Get a map of NJ municipalities using tigris
	nj_municipality_map <- counties %>%
	map_df(~ county_subdivisions("NJ", county = ., class = "sf"))

	## Get the towns that border the Atlantic Ocean
	jersey_shore_water <- nj_municipality_map %>%
	filter(ALAND == 0) %>%
	filter(GEOID != "3401100000")
	jersey_shore_indices <- st_intersects(jersey_shore_water,
	nj_municipality_map) %>%
	unlist() %>% unique()

	## Now build a table of shore towns
	municipality_shore <- nj_municipality_map %>%
	as_tibble() %>%
	mutate(shore_town = FALSE)
	municipality_shore$shore_town[jersey_shore_indices] = TRUE
	municipality_shore <- municipality_shore %>%
	filter(ALAND != 0) %>%
	select(GEOID, shore_town)

	# Agency officer summary from njoagleod ----

	officer_info <- officer %>% group_by(agency_name, agency_county) %>%
	summarize(
	officer_count = n(),
	officer_r_male = sum(officer_gender == "Male", na.rm = TRUE) / n(),
	officer_mean_age = mean(officer_age, na.rm = TRUE),
	officer_r_race_na = sum(is.na(officer_race) / n()),
	officer_r_white = sum(officer_race == "White", na.rm = TRUE) / n(),
	officer_r_black = sum(officer_race == "Black", na.rm = TRUE) / n(),
	officer_r_asian = sum(officer_race == "Asian", na.rm = TRUE) / n(),
	officer_r_hispanic_or_latino =
	sum(officer_race == "Hispanic", na.rm = TRUE) / n(),
	officer_r_race_other = 1 - sum(c_across(
	officer_r_race_na:officer_r_hispanic_or_latino
	)),
	.groups = "drop"
	) %>%
	left_join(agency, by = c("agency_name", "agency_county")) %>%
	filter(!is.na(municipality)) %>%
	left_join(municipality, by = c("agency_county"="county", "municipality")) %>%
	select(
	GEOID,
	officer_count,
	officer_mean_age,
	officer_r_male,
	officer_r_white:officer_r_hispanic_or_latino,
	officer_r_race_other,
	officer_r_race_na
	)

	# Municipality demographics via tidycensus ----

	## Get population and density (2019 / get_estimates)
	municipality_population <- counties %>%
	map_df(
	~ get_estimates(
	geography = "county subdivision",
	state = "NJ",
	county = .,
	year = 2019,
	output = "wide",
	product = "population"
	)
	) %>%
	select(GEOID, population = POP, density = DENSITY)

	## Get race/enthnicity ratios (2010 / get_decennial)
	municipality_race_ethnicity_ratios <- counties %>%
	map_df(
	~ get_decennial(
	geography = "county subdivision",
	state = "NJ",
	county = .,
	year = 2010,
	output = "wide",
	variables = c(
	population = "P001001",
	white = "P003002",
	black = "P003003",
	native_american = "P003004",
	asian = "P003005",
	pacific_islander = "P003006",
	other_races = "P003007",
	two_or_more_races = "P003008",
	non_hispanic_or_latino = "P004002",
	hispanic_or_latino = "P004003"
	)
	)
	) %>%
	filter(population != 0) %>%
	# Convert totals to ratios
	mutate(across(white:hispanic_or_latino, ~ .x / population)) %>%
	rename_with( ~ paste0("r_", .), white:hispanic_or_latino) %>%
	select(GEOID, r_white:r_hispanic_or_latino)

	## Combine the two Princetons.
	# Township: Population 16265, GEOID 3402160915
	# Borough: Population 12307 GEOID 3402160900
	combined_princeton <- municipality_race_ethnicity_ratios %>%
	filter(GEOID %in% c("3402160900", "3402160915")) %>%
	arrange(GEOID) %>%
	mutate(weight = c(12307, 16265)) %>%
	summarize(across(r_white:r_hispanic_or_latino,
	~ sum(.x * weight) / sum(weight))) %>%
	mutate(GEOID = "3402160900") %>%
	relocate(GEOID)

	## Update the table with new ratios for Princeton Borough
	municipality_race_ethnicity_ratios <- municipality_race_ethnicity_ratios %>%
	rows_update(combined_princeton, by="GEOID")

	## Get median household income (acs 2019)
	municipality_income <- counties %>%
	map_df(
	~ get_acs(
	geography = "county subdivision",
	state = "NJ",
	county = .,
	year = 2019,
	output = "wide",
	variables = c(household_median_income = "B19013_001")
	)
	) %>%
	select(GEOID, household_median_income = household_median_incomeE) %>%
	filter(!is.na(household_median_income))

	# Use of force incident rates from njoaguof ----

	## Estimating when reporting started
	initial_incident_2021_estimates <- incident %>%
	# Find the first incident and the number of incidents after
	group_by(agency_county, agency_name) %>%
	summarize(
	first_incident = min(incident_date_1),
	incidents_after_first = sum(lubridate::year(incident_date_1) == 2021) - 1,
	.groups = "drop"
	) %>%
	filter(lubridate::year(first_incident) == 2021) %>%
	mutate(
	# Estimate lambda
	lambda_est_period = lubridate::ymd("2022-01-01") - first_incident,
	lambda_est = incidents_after_first / as.double(lambda_est_period),
	# Estimate t_0
	t_0_est = case_when(
	incidents_after_first == 0 ~ as.Date(NA),
	TRUE ~ first_incident - exp(-lambda_est) / (1 - exp(-lambda_est)),
	),
	# Estimate the partial year
	reporting_days_est = lubridate::ymd("2022-01-01") -
	pmax(t_0_est, lubridate::ymd("2021-01-01")),
	partial_year = as.double(reporting_days_est) /
	as.double(lubridate::ymd("2022-01-01") - lubridate::ymd("2021-01-01"))
	) %>%
	# Filter out non-municipal agencies
	left_join(agency, by = c("agency_county", "agency_name")) %>%
	filter(!is.na(municipality)) %>%
	select(agency_county, municipality, lambda_est, t_0_est, partial_year)

	## Incident rate table for 2021
	incident_rate_2021 <- incident %>%
	# Get the 2021 incident count
	filter(lubridate::year(incident_date_1) <= 2021) %>%
	group_by(agency_county, agency_name) %>%
	summarize(incident_count = sum(lubridate::year(incident_date_1) == 2021),
	.groups = "drop") %>%
	# Filter out the non-municipal agencies
	left_join(agency, by = c("agency_county", "agency_name")) %>%
	filter(!is.na(municipality)) %>%
	select(agency_county, municipality, incident_count) %>%
	# Add a partial_year field, initialed to 1.0, and then update
	# with the initial_incident_2021_estimates
	mutate(partial_year = 1.0) %>%
	rows_update(
	select(
	initial_incident_2021_estimates,
	agency_county,
	municipality,
	partial_year
	),
	by = c("agency_county", "municipality")
	) %>%
	# Estimate the 2021 yearly rate
	mutate(incident_rate_est = incident_count / partial_year)

	## Clean up and use GEOID as primary key
	municipality_incident_rate <- incident_rate_2021 %>%
	left_join(municipality,
	by = c("agency_county" = "county", "municipality")) %>%
	select(GEOID, incident_count, partial_year, incident_rate_est)

	# Combined table ----

	combined_table <- officer_info %>%
	inner_join(municipality_population, by="GEOID") %>%
	inner_join(municipality_race_ethnicity_ratios, by="GEOID") %>%
	inner_join(municipality_income, by="GEOID") %>%
	inner_join(municipality_shore, by="GEOID") %>%
	inner_join(municipality_incident_rate, by="GEOID")

	# Save the map and the combined table ---
	# Uncomment to save
	# save(nj_municipality_map, combined_table, file="use_of_force_data.Rda")