plnnr/get_median_hh_income_by_race_acs.R

## get_median_hh_income_by_race_acs.R
## Get median HH income by race for 2010 and 2016
# Returns an error that function parameter "acs_year" is not found.
get_median_hh_income_by_race_acs <- function(vector_of_tract_fips, compatibility_mode = "1990", acs_year = 2016, acs_state=c("OR", "WA"), acs_county = c("051", "005", "009", "067", "071", "011", "059")) {
  if(!require(pacman)){install.packages(pacman);library(pacman)}
  p_load(tidycensus, tidyverse)

  race_income_vars_1016 <- c('B19001A_001', 'B19001A_002', 'B19001A_003', 'B19001A_004', 'B19001A_005', 'B19001A_006', 'B19001A_007',
                             'B19001A_008', 'B19001A_009', 'B19001A_010', 'B19001A_011', 'B19001A_012', 'B19001A_013', 'B19001A_014',
                             'B19001A_015', 'B19001A_016', 'B19001A_017', 'B19001B_001', 'B19001B_002', 'B19001B_003', 'B19001B_004',
                             'B19001B_005', 'B19001B_006', 'B19001B_007', 'B19001B_008', 'B19001B_009', 'B19001B_010', 'B19001B_011',
                             'B19001B_012', 'B19001B_013', 'B19001B_014', 'B19001B_015', 'B19001B_016', 'B19001B_017', 'B19001C_001',
                             'B19001C_002', 'B19001C_003', 'B19001C_004', 'B19001C_005', 'B19001C_006', 'B19001C_007', 'B19001C_008',
                             'B19001C_009', 'B19001C_010', 'B19001C_011', 'B19001C_012', 'B19001C_013', 'B19001C_014', 'B19001C_015',
                             'B19001C_016', 'B19001C_017', 'B19001D_001', 'B19001D_002', 'B19001D_003', 'B19001D_004', 'B19001D_005',
                             'B19001D_006', 'B19001D_007', 'B19001D_008', 'B19001D_009', 'B19001D_010', 'B19001D_011', 'B19001D_012',
                             'B19001D_013', 'B19001D_014', 'B19001D_015', 'B19001D_016', 'B19001D_017', 'B19001E_001', 'B19001E_002',
                             'B19001E_003', 'B19001E_004', 'B19001E_005', 'B19001E_006', 'B19001E_007', 'B19001E_008', 'B19001E_009',
                             'B19001E_010', 'B19001E_011', 'B19001E_012', 'B19001E_013', 'B19001E_014', 'B19001E_015', 'B19001E_016',
                             'B19001E_017', 'B19001F_001', 'B19001F_002', 'B19001F_003', 'B19001F_004', 'B19001F_005', 'B19001F_006',
                             'B19001F_007', 'B19001F_008', 'B19001F_009', 'B19001F_010', 'B19001F_011', 'B19001F_012', 'B19001F_013',
                             'B19001F_014', 'B19001F_015', 'B19001F_016', 'B19001F_017', 'B19001G_001', 'B19001G_002', 'B19001G_003',
                             'B19001G_004', 'B19001G_005', 'B19001G_006', 'B19001G_007', 'B19001G_008', 'B19001G_009', 'B19001G_010',
                             'B19001G_011', 'B19001G_012', 'B19001G_013', 'B19001G_014', 'B19001G_015', 'B19001G_016', 'B19001G_017',
                             'B19001H_001', 'B19001H_002', 'B19001H_003', 'B19001H_004', 'B19001H_005', 'B19001H_006', 'B19001H_007',
                             'B19001H_008', 'B19001H_009', 'B19001H_010', 'B19001H_011', 'B19001H_012', 'B19001H_013', 'B19001H_014',
                             'B19001H_015', 'B19001H_016', 'B19001H_017', 'B19001I_001', 'B19001I_002', 'B19001I_003', 'B19001I_004',
                             'B19001I_005', 'B19001I_006', 'B19001I_007', 'B19001I_008', 'B19001I_009', 'B19001I_010', 'B19001I_011',
                             'B19001I_012', 'B19001I_013', 'B19001I_014', 'B19001I_015', 'B19001I_016', 'B19001I_017')

  incgroups <- c('Less than $10,000', '$10,000 to $14,999', '$15,000 to $19,999', '$20,000 to $24,999',
                 '$25,000 to $29,999', '$30,000 to $34,999', '$35,000 to $39,999', '$40,000 to $44,999',
                 '$45,000 to $49,999', '$50,000 to $59,999', '$60,000 to $74,999', '$75,000 to $99,999',
                 '$100,000 to $124,999', '$125,000 to $149,999', '$150,000 to $199,999', '$200,000 or more')

  raceinc <- c(paste("White,", incgroups),
               paste("Black,", incgroups),
               paste("AIAN,", incgroups),
               paste("Asian,", incgroups),
               paste("NHPI,", incgroups),
               paste("Other,", incgroups),
               paste("Multi,", incgroups),
               paste("White NH,", incgroups),
               paste("Hispanic,", incgroups))

  get_income_value_keys <- function(dframe) {
    return_list_of_lists <- list(c(dframe[[1]], 0),
                                 c(dframe[[2]], 10000),
                                 c(dframe[[3]], 15000),
                                 c(dframe[[4]], 20000),
                                 c(dframe[[5]], 25000),
                                 c(dframe[[6]], 30000),
                                 c(dframe[[7]], 35000),
                                 c(dframe[[8]], 40000),
                                 c(dframe[[9]], 45000),
                                 c(dframe[[10]], 50000),
                                 c(dframe[[11]], 60000),
                                 c(dframe[[12]], 75000),
                                 c(dframe[[13]], 100000),
                                 c(dframe[[14]], 125000),
                                 c(dframe[[15]], 150000),
                                 c(dframe[[16]], 200000))
    return(return_list_of_lists)
  }

  get_pareto_median <- function(list_of_lists) {

    # Make sure list is sorted in ascending order
    data_list <- list_of_lists[order(sapply(list_of_lists, '[[', 2))]

    # Pull out elements into separate lists to make things clearer later on.
    counts <- lapply(data_list, '[[', 1)
    bases <- lapply(data_list, '[[', 2)

    # Calculate the "widths" of each group as the difference between the bases
    widths = list()
    for (i in seq_along(bases[1:length(names(bases))-1])){
      val = bases[[i + 1]] - bases[[i]]
      widths <- append(widths, val)
    }

    # The last element returns an error because i + 1 is out of bounds.
    # Just append 1, since there is no effective upper bound.
    # Could be fixed with a tryCatch().
    widths <- append(widths, 1)

    # Find the group containing the median, which will be the group at which
    # the cumulative counts is greater than the sum of all of the counts divided by 2
    target = Reduce("+",counts) / 2
    cumulative_counts = 1
    index = 1
    while (cumulative_counts <= target) {
      cumulative_counts = cumulative_counts + counts[[index]]
      index = index + 1
    }
    index = index - 1

    # Calculate the sum of all the groups prior to the one that contains the median
    previous_groups_sum = cumulative_counts - counts[[index]]

    # Last, estimate the median
    return(bases[[index]] + ((Reduce("+",counts) / 2 - previous_groups_sum) / counts[[index]]) * widths[[index]])
  }

  # Query data
  hhincome <- get_acs(geography = "tract", variables = race_income_vars_1016,
                      year = acs_year, state = acs_state, survey = 'acs5',
                      county = acs_county) %>%
    mutate(stcnty = substr(x = GEOID, start = 1, stop = 5)) %>%
    #filter(stcnty %in% c('41005', '41009', '41051', '41067', '41071', '53011', '53059'))
    mutate(variable = str_replace(variable, 'B19001', ''),
           race_code = substr(x = variable, start = 1, stop = 1),
           variable = substr(x = variable, start = 3, stop = 5)) %>%
    filter(variable != "001") %>%
    arrange(GEOID, race_code, variable)

  hhincome$group <- rep(raceinc, length(unique(hhincome$GEOID)))

  hhincome2 <- hhincome %>%
    separate(group, into = c("race", "income"), sep = ", ", remove = F) %>%
    mutate(income = ordered(income, levels = c('Less than $10,000', '$10,000 to $14,999', '$15,000 to $19,999', '$20,000 to $24,999',
                                               '$25,000 to $29,999', '$30,000 to $34,999', '$35,000 to $39,999', '$40,000 to $44,999',
                                               '$45,000 to $49,999', '$50,000 to $59,999', '$60,000 to $74,999', '$75,000 to $99,999',
                                               '$100,000 to $124,999', '$125,000 to $149,999', '$150,000 to $199,999', '$200,000 or more')),
           race_rec = race,
           race_rec = case_when(race == "Asian" | race == "NHPI" ~ "API",
                                race == "Other" | race == "Multi" ~ "Other",
                                TRUE ~ race),
           poc_rec = case_when(race == "White" ~ "White",
                               race != "White NH" ~ "POC",
                               TRUE ~ "White NH")) %>%
    filter(GEOID %in% vector_of_tract_fips)

  ### Return income for race codes that match 2010 values
  hhincome_2010_compatible <- hhincome2 %>%
    select(race, income, estimate) %>%
    group_by(race, income) %>%
    summarize_all(sum) %>%
    ungroup() %>%
    spread(key = income, value = estimate)

  median_inc_2010_compatible_list <- list(list('AIAN'), list('Asian'), list('Black'), list('Hispanic'), list('Multi'), list('NHPI'), list('Other'), list('White'), list('White NH'))

  print("Median household income by race (2010 version)")
  for (i in seq_along(median_inc_2010_compatible_list[1:9])){
    current_race <- as.character(median_inc_2010_compatible_list[[i]][[1]])
    current_race_income <- hhincome_2010_compatible %>% filter(race == current_race) %>% select(-race)
    current_income_value_keys <- get_income_value_keys(current_race_income)
    current_med_hh_income <- get_pareto_median(current_income_value_keys)
    median_inc_2010_compatible_list[[i]][2] <- round(current_med_hh_income, 0)
    print(paste( median_inc_2010_compatible_list[[i]][1], ":",  median_inc_2010_compatible_list[[i]][2]))
  }

  df_2010_compatible <- data.frame(matrix(unlist(median_inc_2010_compatible_list), nrow = 9, byrow = T))
  names(df_2010_compatible)[1] <- "race"
  names(df_2010_compatible)[2] <- "median_hh_income"
  #return(df_2010_compatible)

  ### Return income for race codes that match 1990 values
  hhincome_1990_compatible <- hhincome2 %>%
    select(race_rec, income, estimate) %>%
    group_by(race_rec, income) %>%
    summarize_all(sum) %>%
    ungroup() %>%
    spread(key = income, value = estimate)

  median_inc_1990_compatible_list <- list(list('AIAN'), list('API'), list('Black'), list('Hispanic'), list('Other'), list('White'), list('White NH'))

  print("Median household income by race (1990 version)")
  for (i in seq_along(median_inc_1990_compatible_list[1:7])){
    current_race <- as.character(median_inc_1990_compatible_list[[i]][[1]])
    current_race_income <- hhincome_1990_compatible %>% filter(race_rec == current_race) %>% select(-race_rec)
    current_income_value_keys <- get_income_value_keys(current_race_income)
    current_med_hh_income <- get_pareto_median(current_income_value_keys)
    median_inc_1990_compatible_list[[i]][2] <- round(current_med_hh_income, 0)
    print(paste(median_inc_1990_compatible_list[[i]][1], ":",  median_inc_1990_compatible_list[[i]][2]))
  }

  df_1990_compatible <- data.frame(matrix(unlist(median_inc_1990_compatible_list), nrow = 7, byrow = T))
  names(df_1990_compatible)[1] <- "race"
  names(df_1990_compatible)[2] <- "median_hh_income"
  #return(df_1990_compatible)

  ### Return income for white and PoC race categories (2010)
  hhincome_2010_poc <- hhincome2 %>%
    select(poc_rec, income, estimate) %>%
    group_by(poc_rec, income) %>%
    summarize_all(sum) %>%
    ungroup() %>%
    spread(key = income, value = estimate)

  median_inc_2010_poc_list <- list(list('White'), list('POC'), list('White NH'))

  print("Median household income by race (aggregated to PoC/White)")
  for (i in seq_along(median_inc_2010_poc_list[1:3])){
    current_race <- as.character(median_inc_2010_poc_list[[i]][[1]])
    current_race_income <- hhincome_2010_poc %>% filter(poc_rec == current_race) %>% select(-poc_rec)
    current_income_value_keys <- get_income_value_keys(current_race_income)
    current_med_hh_income <- get_pareto_median(current_income_value_keys)
    median_inc_2010_poc_list[[i]][2] <- round(current_med_hh_income, 0)
    print(paste( median_inc_2010_poc_list[[i]][1], ":",  median_inc_2010_poc_list[[i]][2]))
  }

  df_2010_poc <- data.frame(matrix(unlist(median_inc_2010_poc_list), nrow = 3, byrow = T))
  names(df_2010_poc)[1] <- "race"
  names(df_2010_poc)[2] <- "median_hh_income"
  #return(df_2010_poc)

  if (compatibility_mode == "1990"){
    return(df_1990_compatible)
  } else if (compatibility_mode == "2010"){
    return(df_2010_compatible)
  } else if (compatibility_mode == "POC"){
    return(df_2010_poc)
  } else {return("ERROR")}

}

icura_list <- c('41051002401' , '41051002402' , '41051002501' , '41051002502' , '41051003100' , '41051003200' , '41051003301' ,
                '41051003302' , '41051003401' , '41051003402' , '41051003501' , '41051003502' , '41051003601' , '41051003602' ,
                '41051003701' , '41051003702' , '41051003801' , '41051003802' , '41051003803' , '41051003901' , '41051003902' ,
                '41051002303' , '41051002203', '41051002201', '41051002202', '41051002301', '41051002302')

# This returns error "object 'acs_year' not found"
# When acs_year is hard-coded, the same error occurs
# When I specify within get_acs() `year = 2016` then it fails at the second parameter, which is acs_county
med_hh_race_16_icura <- get_median_hh_income_by_race_acs(icura_list)


### The code below is essentially the bones of the function above. The function above has been generalized, of course
# but the parameters should all be the same, more or less.
race_income_16 <- get_acs(geography = "tract", variables = race_income_vars_1016,
                          year = 2016, state = c("OR", "WA"),
                          county = c("051", "005", "009", "067", "071", "011", "059"),
                          geometry = T) %>%
  mutate(stcnty = substr(x = GEOID, start = 1, stop = 5)) %>%
  filter(stcnty %in% c('41005', '41009', '41051', '41067', '41071', '53011', '53059'))

race_income_16_working <- race_income_16 %>%
  mutate(variable = str_replace(variable, 'B19001', ''),
         year = 2016,
         race_code = substr(x = variable, start = 1, stop = 1),
         variable = substr(x = variable, start = 3, stop = 5)) %>%
  filter(variable != "001") %>%
  arrange(GEOID, race_code, variable)

incgroups <- c('Less than $10,000', '$10,000 to $14,999', '$15,000 to $19,999', '$20,000 to $24,999',
               '$25,000 to $29,999', '$30,000 to $34,999', '$35,000 to $39,999', '$40,000 to $44,999',
               '$45,000 to $49,999', '$50,000 to $59,999', '$60,000 to $74,999', '$75,000 to $99,999',
               '$100,000 to $124,999', '$125,000 to $149,999', '$150,000 to $199,999', '$200,000 or more')

raceinc <- c(paste("White,", incgroups),
             paste("Black,", incgroups),
             paste("AIAN,", incgroups),
             paste("Asian,", incgroups),
             paste("NHPI,", incgroups),
             paste("Other,", incgroups),
             paste("Multi,", incgroups),
             paste("White NH,", incgroups),
             paste("Hispanic,", incgroups))

race_income_16_working$group <- rep(raceinc, length(unique(race_income_16_working$GEOID)))

race_income_16_working <- race_income_16_working %>%
  separate(group, into = c("race", "income"), sep = ", ", remove = F) %>%
  mutate(income = ordered(income, levels = c('Less than $10,000', '$10,000 to $14,999', '$15,000 to $19,999', '$20,000 to $24,999',
                                             '$25,000 to $29,999', '$30,000 to $34,999', '$35,000 to $39,999', '$40,000 to $44,999',
                                             '$45,000 to $49,999', '$50,000 to $59,999', '$60,000 to $74,999', '$75,000 to $99,999',
                                             '$100,000 to $124,999', '$125,000 to $149,999', '$150,000 to $199,999', '$200,000 or more')),
         race_rec = race,
         race_rec = case_when(race == "Asian" | race == "NHPI" ~ "API",
                              race == "Other" | race == "Multi" ~ "Other",
                              TRUE ~ race)) # This line recodes `race_rec` to `race` for all other instances

race_income_16_summary <- race_income_16_working %>%
  filter(GEOID %in% icura_list) %>%
  group_by(year, race_rec, income) %>%
  summarize(total_hh = sum(estimate),
            total_hh_m = moe_sum(moe = moe, estimate = estimate))
View(race_income_16_summary)
	## Get median HH income by race for 2010 and 2016
	# Returns an error that function parameter "acs_year" is not found.
	get_median_hh_income_by_race_acs <- function(vector_of_tract_fips, compatibility_mode = "1990", acs_year = 2016, acs_state=c("OR", "WA"), acs_county = c("051", "005", "009", "067", "071", "011", "059")) {
	if(!require(pacman)){install.packages(pacman);library(pacman)}
	p_load(tidycensus, tidyverse)

	race_income_vars_1016 <- c('B19001A_001', 'B19001A_002', 'B19001A_003', 'B19001A_004', 'B19001A_005', 'B19001A_006', 'B19001A_007',
	'B19001A_008', 'B19001A_009', 'B19001A_010', 'B19001A_011', 'B19001A_012', 'B19001A_013', 'B19001A_014',
	'B19001A_015', 'B19001A_016', 'B19001A_017', 'B19001B_001', 'B19001B_002', 'B19001B_003', 'B19001B_004',
	'B19001B_005', 'B19001B_006', 'B19001B_007', 'B19001B_008', 'B19001B_009', 'B19001B_010', 'B19001B_011',
	'B19001B_012', 'B19001B_013', 'B19001B_014', 'B19001B_015', 'B19001B_016', 'B19001B_017', 'B19001C_001',
	'B19001C_002', 'B19001C_003', 'B19001C_004', 'B19001C_005', 'B19001C_006', 'B19001C_007', 'B19001C_008',
	'B19001C_009', 'B19001C_010', 'B19001C_011', 'B19001C_012', 'B19001C_013', 'B19001C_014', 'B19001C_015',
	'B19001C_016', 'B19001C_017', 'B19001D_001', 'B19001D_002', 'B19001D_003', 'B19001D_004', 'B19001D_005',
	'B19001D_006', 'B19001D_007', 'B19001D_008', 'B19001D_009', 'B19001D_010', 'B19001D_011', 'B19001D_012',
	'B19001D_013', 'B19001D_014', 'B19001D_015', 'B19001D_016', 'B19001D_017', 'B19001E_001', 'B19001E_002',
	'B19001E_003', 'B19001E_004', 'B19001E_005', 'B19001E_006', 'B19001E_007', 'B19001E_008', 'B19001E_009',
	'B19001E_010', 'B19001E_011', 'B19001E_012', 'B19001E_013', 'B19001E_014', 'B19001E_015', 'B19001E_016',
	'B19001E_017', 'B19001F_001', 'B19001F_002', 'B19001F_003', 'B19001F_004', 'B19001F_005', 'B19001F_006',
	'B19001F_007', 'B19001F_008', 'B19001F_009', 'B19001F_010', 'B19001F_011', 'B19001F_012', 'B19001F_013',
	'B19001F_014', 'B19001F_015', 'B19001F_016', 'B19001F_017', 'B19001G_001', 'B19001G_002', 'B19001G_003',
	'B19001G_004', 'B19001G_005', 'B19001G_006', 'B19001G_007', 'B19001G_008', 'B19001G_009', 'B19001G_010',
	'B19001G_011', 'B19001G_012', 'B19001G_013', 'B19001G_014', 'B19001G_015', 'B19001G_016', 'B19001G_017',
	'B19001H_001', 'B19001H_002', 'B19001H_003', 'B19001H_004', 'B19001H_005', 'B19001H_006', 'B19001H_007',
	'B19001H_008', 'B19001H_009', 'B19001H_010', 'B19001H_011', 'B19001H_012', 'B19001H_013', 'B19001H_014',
	'B19001H_015', 'B19001H_016', 'B19001H_017', 'B19001I_001', 'B19001I_002', 'B19001I_003', 'B19001I_004',
	'B19001I_005', 'B19001I_006', 'B19001I_007', 'B19001I_008', 'B19001I_009', 'B19001I_010', 'B19001I_011',
	'B19001I_012', 'B19001I_013', 'B19001I_014', 'B19001I_015', 'B19001I_016', 'B19001I_017')

	incgroups <- c('Less than $10,000', '$10,000 to $14,999', '$15,000 to $19,999', '$20,000 to $24,999',
	'$25,000 to $29,999', '$30,000 to $34,999', '$35,000 to $39,999', '$40,000 to $44,999',
	'$45,000 to $49,999', '$50,000 to $59,999', '$60,000 to $74,999', '$75,000 to $99,999',
	'$100,000 to $124,999', '$125,000 to $149,999', '$150,000 to $199,999', '$200,000 or more')

	raceinc <- c(paste("White,", incgroups),
	paste("Black,", incgroups),
	paste("AIAN,", incgroups),
	paste("Asian,", incgroups),
	paste("NHPI,", incgroups),
	paste("Other,", incgroups),
	paste("Multi,", incgroups),
	paste("White NH,", incgroups),
	paste("Hispanic,", incgroups))

	get_income_value_keys <- function(dframe) {
	return_list_of_lists <- list(c(dframe[[1]], 0),
	c(dframe[[2]], 10000),
	c(dframe[[3]], 15000),
	c(dframe[[4]], 20000),
	c(dframe[[5]], 25000),
	c(dframe[[6]], 30000),
	c(dframe[[7]], 35000),
	c(dframe[[8]], 40000),
	c(dframe[[9]], 45000),
	c(dframe[[10]], 50000),
	c(dframe[[11]], 60000),
	c(dframe[[12]], 75000),
	c(dframe[[13]], 100000),
	c(dframe[[14]], 125000),
	c(dframe[[15]], 150000),
	c(dframe[[16]], 200000))
	return(return_list_of_lists)
	}

	get_pareto_median <- function(list_of_lists) {

	# Make sure list is sorted in ascending order
	data_list <- list_of_lists[order(sapply(list_of_lists, '[[', 2))]

	# Pull out elements into separate lists to make things clearer later on.
	counts <- lapply(data_list, '[[', 1)
	bases <- lapply(data_list, '[[', 2)

	# Calculate the "widths" of each group as the difference between the bases
	widths = list()
	for (i in seq_along(bases[1:length(names(bases))-1])){
	val = bases[[i + 1]] - bases[[i]]
	widths <- append(widths, val)
	}

	# The last element returns an error because i + 1 is out of bounds.
	# Just append 1, since there is no effective upper bound.
	# Could be fixed with a tryCatch().
	widths <- append(widths, 1)

	# Find the group containing the median, which will be the group at which
	# the cumulative counts is greater than the sum of all of the counts divided by 2
	target = Reduce("+",counts) / 2
	cumulative_counts = 1
	index = 1
	while (cumulative_counts <= target) {
	cumulative_counts = cumulative_counts + counts[[index]]
	index = index + 1
	}
	index = index - 1

	# Calculate the sum of all the groups prior to the one that contains the median
	previous_groups_sum = cumulative_counts - counts[[index]]

	# Last, estimate the median
	return(bases[[index]] + ((Reduce("+",counts) / 2 - previous_groups_sum) / counts[[index]]) * widths[[index]])
	}

	# Query data
	hhincome <- get_acs(geography = "tract", variables = race_income_vars_1016,
	year = acs_year, state = acs_state, survey = 'acs5',
	county = acs_county) %>%
	mutate(stcnty = substr(x = GEOID, start = 1, stop = 5)) %>%
	#filter(stcnty %in% c('41005', '41009', '41051', '41067', '41071', '53011', '53059'))
	mutate(variable = str_replace(variable, 'B19001', ''),
	race_code = substr(x = variable, start = 1, stop = 1),
	variable = substr(x = variable, start = 3, stop = 5)) %>%
	filter(variable != "001") %>%
	arrange(GEOID, race_code, variable)

	hhincome$group <- rep(raceinc, length(unique(hhincome$GEOID)))

	hhincome2 <- hhincome %>%
	separate(group, into = c("race", "income"), sep = ", ", remove = F) %>%
	mutate(income = ordered(income, levels = c('Less than $10,000', '$10,000 to $14,999', '$15,000 to $19,999', '$20,000 to $24,999',
	'$25,000 to $29,999', '$30,000 to $34,999', '$35,000 to $39,999', '$40,000 to $44,999',
	'$45,000 to $49,999', '$50,000 to $59,999', '$60,000 to $74,999', '$75,000 to $99,999',
	'$100,000 to $124,999', '$125,000 to $149,999', '$150,000 to $199,999', '$200,000 or more')),
	race_rec = race,
	race_rec = case_when(race == "Asian" \| race == "NHPI" ~ "API",
	race == "Other" \| race == "Multi" ~ "Other",
	TRUE ~ race),
	poc_rec = case_when(race == "White" ~ "White",
	race != "White NH" ~ "POC",
	TRUE ~ "White NH")) %>%
	filter(GEOID %in% vector_of_tract_fips)

	### Return income for race codes that match 2010 values
	hhincome_2010_compatible <- hhincome2 %>%
	select(race, income, estimate) %>%
	group_by(race, income) %>%
	summarize_all(sum) %>%
	ungroup() %>%
	spread(key = income, value = estimate)

	median_inc_2010_compatible_list <- list(list('AIAN'), list('Asian'), list('Black'), list('Hispanic'), list('Multi'), list('NHPI'), list('Other'), list('White'), list('White NH'))

	print("Median household income by race (2010 version)")
	for (i in seq_along(median_inc_2010_compatible_list[1:9])){
	current_race <- as.character(median_inc_2010_compatible_list[[i]][[1]])
	current_race_income <- hhincome_2010_compatible %>% filter(race == current_race) %>% select(-race)
	current_income_value_keys <- get_income_value_keys(current_race_income)
	current_med_hh_income <- get_pareto_median(current_income_value_keys)
	median_inc_2010_compatible_list[[i]][2] <- round(current_med_hh_income, 0)
	print(paste( median_inc_2010_compatible_list[[i]][1], ":", median_inc_2010_compatible_list[[i]][2]))
	}

	df_2010_compatible <- data.frame(matrix(unlist(median_inc_2010_compatible_list), nrow = 9, byrow = T))
	names(df_2010_compatible)[1] <- "race"
	names(df_2010_compatible)[2] <- "median_hh_income"
	#return(df_2010_compatible)

	### Return income for race codes that match 1990 values
	hhincome_1990_compatible <- hhincome2 %>%
	select(race_rec, income, estimate) %>%
	group_by(race_rec, income) %>%
	summarize_all(sum) %>%
	ungroup() %>%
	spread(key = income, value = estimate)

	median_inc_1990_compatible_list <- list(list('AIAN'), list('API'), list('Black'), list('Hispanic'), list('Other'), list('White'), list('White NH'))

	print("Median household income by race (1990 version)")
	for (i in seq_along(median_inc_1990_compatible_list[1:7])){
	current_race <- as.character(median_inc_1990_compatible_list[[i]][[1]])
	current_race_income <- hhincome_1990_compatible %>% filter(race_rec == current_race) %>% select(-race_rec)
	current_income_value_keys <- get_income_value_keys(current_race_income)
	current_med_hh_income <- get_pareto_median(current_income_value_keys)
	median_inc_1990_compatible_list[[i]][2] <- round(current_med_hh_income, 0)
	print(paste(median_inc_1990_compatible_list[[i]][1], ":", median_inc_1990_compatible_list[[i]][2]))
	}

	df_1990_compatible <- data.frame(matrix(unlist(median_inc_1990_compatible_list), nrow = 7, byrow = T))
	names(df_1990_compatible)[1] <- "race"
	names(df_1990_compatible)[2] <- "median_hh_income"
	#return(df_1990_compatible)

	### Return income for white and PoC race categories (2010)
	hhincome_2010_poc <- hhincome2 %>%
	select(poc_rec, income, estimate) %>%
	group_by(poc_rec, income) %>%
	summarize_all(sum) %>%
	ungroup() %>%
	spread(key = income, value = estimate)

	median_inc_2010_poc_list <- list(list('White'), list('POC'), list('White NH'))

	print("Median household income by race (aggregated to PoC/White)")
	for (i in seq_along(median_inc_2010_poc_list[1:3])){
	current_race <- as.character(median_inc_2010_poc_list[[i]][[1]])
	current_race_income <- hhincome_2010_poc %>% filter(poc_rec == current_race) %>% select(-poc_rec)
	current_income_value_keys <- get_income_value_keys(current_race_income)
	current_med_hh_income <- get_pareto_median(current_income_value_keys)
	median_inc_2010_poc_list[[i]][2] <- round(current_med_hh_income, 0)
	print(paste( median_inc_2010_poc_list[[i]][1], ":", median_inc_2010_poc_list[[i]][2]))
	}

	df_2010_poc <- data.frame(matrix(unlist(median_inc_2010_poc_list), nrow = 3, byrow = T))
	names(df_2010_poc)[1] <- "race"
	names(df_2010_poc)[2] <- "median_hh_income"
	#return(df_2010_poc)

	if (compatibility_mode == "1990"){
	return(df_1990_compatible)
	} else if (compatibility_mode == "2010"){
	return(df_2010_compatible)
	} else if (compatibility_mode == "POC"){
	return(df_2010_poc)
	} else {return("ERROR")}

	}

	icura_list <- c('41051002401' , '41051002402' , '41051002501' , '41051002502' , '41051003100' , '41051003200' , '41051003301' ,
	'41051003302' , '41051003401' , '41051003402' , '41051003501' , '41051003502' , '41051003601' , '41051003602' ,
	'41051003701' , '41051003702' , '41051003801' , '41051003802' , '41051003803' , '41051003901' , '41051003902' ,
	'41051002303' , '41051002203', '41051002201', '41051002202', '41051002301', '41051002302')

	# This returns error "object 'acs_year' not found"
	# When acs_year is hard-coded, the same error occurs
	# When I specify within get_acs() `year = 2016` then it fails at the second parameter, which is acs_county
	med_hh_race_16_icura <- get_median_hh_income_by_race_acs(icura_list)




	### The code below is essentially the bones of the function above. The function above has been generalized, of course
	# but the parameters should all be the same, more or less.
	race_income_16 <- get_acs(geography = "tract", variables = race_income_vars_1016,
	year = 2016, state = c("OR", "WA"),
	county = c("051", "005", "009", "067", "071", "011", "059"),
	geometry = T) %>%
	mutate(stcnty = substr(x = GEOID, start = 1, stop = 5)) %>%
	filter(stcnty %in% c('41005', '41009', '41051', '41067', '41071', '53011', '53059'))

	race_income_16_working <- race_income_16 %>%
	mutate(variable = str_replace(variable, 'B19001', ''),
	year = 2016,
	race_code = substr(x = variable, start = 1, stop = 1),
	variable = substr(x = variable, start = 3, stop = 5)) %>%
	filter(variable != "001") %>%
	arrange(GEOID, race_code, variable)

	incgroups <- c('Less than $10,000', '$10,000 to $14,999', '$15,000 to $19,999', '$20,000 to $24,999',
	'$25,000 to $29,999', '$30,000 to $34,999', '$35,000 to $39,999', '$40,000 to $44,999',
	'$45,000 to $49,999', '$50,000 to $59,999', '$60,000 to $74,999', '$75,000 to $99,999',
	'$100,000 to $124,999', '$125,000 to $149,999', '$150,000 to $199,999', '$200,000 or more')

	raceinc <- c(paste("White,", incgroups),
	paste("Black,", incgroups),
	paste("AIAN,", incgroups),
	paste("Asian,", incgroups),
	paste("NHPI,", incgroups),
	paste("Other,", incgroups),
	paste("Multi,", incgroups),
	paste("White NH,", incgroups),
	paste("Hispanic,", incgroups))

	race_income_16_working$group <- rep(raceinc, length(unique(race_income_16_working$GEOID)))

	race_income_16_working <- race_income_16_working %>%
	separate(group, into = c("race", "income"), sep = ", ", remove = F) %>%
	mutate(income = ordered(income, levels = c('Less than $10,000', '$10,000 to $14,999', '$15,000 to $19,999', '$20,000 to $24,999',
	'$25,000 to $29,999', '$30,000 to $34,999', '$35,000 to $39,999', '$40,000 to $44,999',
	'$45,000 to $49,999', '$50,000 to $59,999', '$60,000 to $74,999', '$75,000 to $99,999',
	'$100,000 to $124,999', '$125,000 to $149,999', '$150,000 to $199,999', '$200,000 or more')),
	race_rec = race,
	race_rec = case_when(race == "Asian" \| race == "NHPI" ~ "API",
	race == "Other" \| race == "Multi" ~ "Other",
	TRUE ~ race)) # This line recodes `race_rec` to `race` for all other instances

	race_income_16_summary <- race_income_16_working %>%
	filter(GEOID %in% icura_list) %>%
	group_by(year, race_rec, income) %>%
	summarize(total_hh = sum(estimate),
	total_hh_m = moe_sum(moe = moe, estimate = estimate))
	View(race_income_16_summary)