plnnr/get_hh_income_by_race_2000.R

## get_hh_income_by_race_2000.R

get_hh_income_by_race_2000 <- function(vector_of_tract_fips, dec_state, dec_county) {
  # Load essential libraries for function
  if(!require(pacman)){install.packages(pacman);library(pacman)}
  p_load(tidycensus, tidyverse)

  # Name variables to gather
  race_income_vars_00 <- c('P151A001', 'P151A002', 'P151A003', 'P151A004', 'P151A005', 'P151A006', 'P151A007', 'P151A008',
                           'P151A009', 'P151A010', 'P151A011', 'P151A012', 'P151A013', 'P151A014', 'P151A015', 'P151A016',
                           'P151A017', 'P151B001', 'P151B002', 'P151B003', 'P151B004', 'P151B005', 'P151B006', 'P151B007',
                           'P151B008', 'P151B009', 'P151B010', 'P151B011', 'P151B012', 'P151B013', 'P151B014', 'P151B015',
                           'P151B016', 'P151B017', 'P151C001', 'P151C002', 'P151C003', 'P151C004', 'P151C005', 'P151C006',
                           'P151C007', 'P151C008', 'P151C009', 'P151C010', 'P151C011', 'P151C012', 'P151C013', 'P151C014',
                           'P151C015', 'P151C016', 'P151C017', 'P151D001', 'P151D002', 'P151D003', 'P151D004', 'P151D005',
                           'P151D006', 'P151D007', 'P151D008', 'P151D009', 'P151D010', 'P151D011', 'P151D012', 'P151D013',
                           'P151D014', 'P151D015', 'P151D016', 'P151D017', 'P151E001', 'P151E002', 'P151E003', 'P151E004',
                           'P151E005', 'P151E006', 'P151E007', 'P151E008', 'P151E009', 'P151E010', 'P151E011', 'P151E012',
                           'P151E013', 'P151E014', 'P151E015', 'P151E016', 'P151E017', 'P151F001', 'P151F002', 'P151F003',
                           'P151F004', 'P151F005', 'P151F006', 'P151F007', 'P151F008', 'P151F009', 'P151F010', 'P151F011',
                           'P151F012', 'P151F013', 'P151F014', 'P151F015', 'P151F016', 'P151F017', 'P151G001', 'P151G002',
                           'P151G003', 'P151G004', 'P151G005', 'P151G006', 'P151G007', 'P151G008', 'P151G009', 'P151G010',
                           'P151G011', 'P151G012', 'P151G013', 'P151G014', 'P151G015', 'P151G016', 'P151G017', 'P151H001',
                           'P151H002', 'P151H003', 'P151H004', 'P151H005', 'P151H006', 'P151H007', 'P151H008', 'P151H009',
                           'P151H010', 'P151H011', 'P151H012', 'P151H013', 'P151H014', 'P151H015', 'P151H016', 'P151H017',
                           'P151I001', 'P151I002', 'P151I003', 'P151I004', 'P151I005', 'P151I006', 'P151I007', 'P151I008',
                           'P151I009', 'P151I010', 'P151I011', 'P151I012', 'P151I013', 'P151I014', 'P151I015', 'P151I016',
                           'P151I017')

  # Query the data
  hhincome_api_query <- get_decennial(geography = "tract", variables = race_income_vars_00,
                            year = 2000, sumfile = "sf3", state = dec_state,
                            county = dec_county,
                            geometry = F)

  return(hhincome_api_query)

  ##### Code below is not necessary because the API call above fails ####
  #
  # # Define income groups
  # incgroups <- c('Less than $10,000', '$10,000 to $14,999', '$15,000 to $19,999', '$20,000 to $24,999',
  #                '$25,000 to $29,999', '$30,000 to $34,999', '$35,000 to $39,999', '$40,000 to $44,999',
  #                '$45,000 to $49,999', '$50,000 to $59,999', '$60,000 to $74,999', '$75,000 to $99,999',
  #                '$100,000 to $124,999', '$125,000 to $149,999', '$150,000 to $199,999', '$200,000 or more')
  #
  # # Make vector of race-income combinations
  # raceinc <- c(paste("White,", incgroups), paste("Black,", incgroups), paste("AIAN,", incgroups),
  #              paste("Asian,", incgroups), paste("NHPI,", incgroups), paste("Other,", incgroups),
  #              paste("Multi,", incgroups), paste("Hispanic,", incgroups), paste("White NH,", incgroups))
  #
  # # Clean the data. 0) filter only for intersted tracts; 1) add state-county fips;
  # # 2) clean up/remove variable name and race code;
  # # 3) filter out "total HH"; 4) arrange by GEOID, race code and variable number
  # hhincome <- hhincome_api_query %>%
  #   filter(GEOID %in% vector_of_tract_fips) %>%
  #   mutate(stcnty = substr(x = GEOID, start = 1, stop = 5)) %>%
  #   mutate(variable = str_replace(variable, 'P151', ''),
  #          race_code = substr(x = variable, start = 1, stop = 1),
  #          variable = substr(x = variable, start = 2, stop = 4)) %>%
  #   filter(variable != "001") %>%
  #   arrange(GEOID, race_code, variable)
  #
  # # Create group column that gives human-readable names to race codes and variable names
  # hhincome$group <- rep(raceinc, length(unique(hhincome$GEOID)))
  #
  # # More cleanup. 1) Separate group into two columns: race and income; 2) make income ordered factor;
  # # 3) remove group and race_code variables; 4) select only variables interested in
  # hhincome <- hhincome %>%
  #   separate(group, into = c("race", "income"), sep = ", ", remove = F) %>%
  #   mutate(income = ordered(income, levels = incgroups)) %>%
  #   select(GEOID, stcnty, NAME, variable, race, income, value)
  #
  # return(hhincome)
}


sw_corridor_list <- c('41051005800', '41051005900', '41051006001', '41051006002', '41051006200', '41051006402', '41051006403',
                      '41051006404', '41051006501', '41051006502', '41051006602', '41005020302', '41005020304', '41005020401',
                      '41067030600', '41067030700', '41067030801', '41067030806', '41067030900', '41067031912', '41067032005')

# Fails: object 'race_income_vars_00' not found
income_race_00_swc <- get_hh_income_by_race_2000(sw_corridor_list, dec_state = "OR", dec_county = c("051", "067", "005"))

	get_hh_income_by_race_2000 <- function(vector_of_tract_fips, dec_state, dec_county) {
	# Load essential libraries for function
	if(!require(pacman)){install.packages(pacman);library(pacman)}
	p_load(tidycensus, tidyverse)

	# Name variables to gather
	race_income_vars_00 <- c('P151A001', 'P151A002', 'P151A003', 'P151A004', 'P151A005', 'P151A006', 'P151A007', 'P151A008',
	'P151A009', 'P151A010', 'P151A011', 'P151A012', 'P151A013', 'P151A014', 'P151A015', 'P151A016',
	'P151A017', 'P151B001', 'P151B002', 'P151B003', 'P151B004', 'P151B005', 'P151B006', 'P151B007',
	'P151B008', 'P151B009', 'P151B010', 'P151B011', 'P151B012', 'P151B013', 'P151B014', 'P151B015',
	'P151B016', 'P151B017', 'P151C001', 'P151C002', 'P151C003', 'P151C004', 'P151C005', 'P151C006',
	'P151C007', 'P151C008', 'P151C009', 'P151C010', 'P151C011', 'P151C012', 'P151C013', 'P151C014',
	'P151C015', 'P151C016', 'P151C017', 'P151D001', 'P151D002', 'P151D003', 'P151D004', 'P151D005',
	'P151D006', 'P151D007', 'P151D008', 'P151D009', 'P151D010', 'P151D011', 'P151D012', 'P151D013',
	'P151D014', 'P151D015', 'P151D016', 'P151D017', 'P151E001', 'P151E002', 'P151E003', 'P151E004',
	'P151E005', 'P151E006', 'P151E007', 'P151E008', 'P151E009', 'P151E010', 'P151E011', 'P151E012',
	'P151E013', 'P151E014', 'P151E015', 'P151E016', 'P151E017', 'P151F001', 'P151F002', 'P151F003',
	'P151F004', 'P151F005', 'P151F006', 'P151F007', 'P151F008', 'P151F009', 'P151F010', 'P151F011',
	'P151F012', 'P151F013', 'P151F014', 'P151F015', 'P151F016', 'P151F017', 'P151G001', 'P151G002',
	'P151G003', 'P151G004', 'P151G005', 'P151G006', 'P151G007', 'P151G008', 'P151G009', 'P151G010',
	'P151G011', 'P151G012', 'P151G013', 'P151G014', 'P151G015', 'P151G016', 'P151G017', 'P151H001',
	'P151H002', 'P151H003', 'P151H004', 'P151H005', 'P151H006', 'P151H007', 'P151H008', 'P151H009',
	'P151H010', 'P151H011', 'P151H012', 'P151H013', 'P151H014', 'P151H015', 'P151H016', 'P151H017',
	'P151I001', 'P151I002', 'P151I003', 'P151I004', 'P151I005', 'P151I006', 'P151I007', 'P151I008',
	'P151I009', 'P151I010', 'P151I011', 'P151I012', 'P151I013', 'P151I014', 'P151I015', 'P151I016',
	'P151I017')

	# Query the data
	hhincome_api_query <- get_decennial(geography = "tract", variables = race_income_vars_00,
	year = 2000, sumfile = "sf3", state = dec_state,
	county = dec_county,
	geometry = F)

	return(hhincome_api_query)

	##### Code below is not necessary because the API call above fails ####
	#
	# # Define income groups
	# incgroups <- c('Less than $10,000', '$10,000 to $14,999', '$15,000 to $19,999', '$20,000 to $24,999',
	# '$25,000 to $29,999', '$30,000 to $34,999', '$35,000 to $39,999', '$40,000 to $44,999',
	# '$45,000 to $49,999', '$50,000 to $59,999', '$60,000 to $74,999', '$75,000 to $99,999',
	# '$100,000 to $124,999', '$125,000 to $149,999', '$150,000 to $199,999', '$200,000 or more')
	#
	# # Make vector of race-income combinations
	# raceinc <- c(paste("White,", incgroups), paste("Black,", incgroups), paste("AIAN,", incgroups),
	# paste("Asian,", incgroups), paste("NHPI,", incgroups), paste("Other,", incgroups),
	# paste("Multi,", incgroups), paste("Hispanic,", incgroups), paste("White NH,", incgroups))
	#
	# # Clean the data. 0) filter only for intersted tracts; 1) add state-county fips;
	# # 2) clean up/remove variable name and race code;
	# # 3) filter out "total HH"; 4) arrange by GEOID, race code and variable number
	# hhincome <- hhincome_api_query %>%
	# filter(GEOID %in% vector_of_tract_fips) %>%
	# mutate(stcnty = substr(x = GEOID, start = 1, stop = 5)) %>%
	# mutate(variable = str_replace(variable, 'P151', ''),
	# race_code = substr(x = variable, start = 1, stop = 1),
	# variable = substr(x = variable, start = 2, stop = 4)) %>%
	# filter(variable != "001") %>%
	# arrange(GEOID, race_code, variable)
	#
	# # Create group column that gives human-readable names to race codes and variable names
	# hhincome$group <- rep(raceinc, length(unique(hhincome$GEOID)))
	#
	# # More cleanup. 1) Separate group into two columns: race and income; 2) make income ordered factor;
	# # 3) remove group and race_code variables; 4) select only variables interested in
	# hhincome <- hhincome %>%
	# separate(group, into = c("race", "income"), sep = ", ", remove = F) %>%
	# mutate(income = ordered(income, levels = incgroups)) %>%
	# select(GEOID, stcnty, NAME, variable, race, income, value)
	#
	# return(hhincome)
	}


	sw_corridor_list <- c('41051005800', '41051005900', '41051006001', '41051006002', '41051006200', '41051006402', '41051006403',
	'41051006404', '41051006501', '41051006502', '41051006602', '41005020302', '41005020304', '41005020401',
	'41067030600', '41067030700', '41067030801', '41067030806', '41067030900', '41067031912', '41067032005')

	# Fails: object 'race_income_vars_00' not found
	income_race_00_swc <- get_hh_income_by_race_2000(sw_corridor_list, dec_state = "OR", dec_county = c("051", "067", "005"))