Skip to content

Instantly share code, notes, and snippets.

@plnnr
Last active July 31, 2018 23:43
Show Gist options
  • Save plnnr/0d0c841cf6b04d2e3fce4333a21c0351 to your computer and use it in GitHub Desktop.
Save plnnr/0d0c841cf6b04d2e3fce4333a21c0351 to your computer and use it in GitHub Desktop.
R code to get demographic profile for 2010 and 2016 by feeding in a list of census tract FIPS codes. It also calculates median household income by aggregating tract income data and finding the pareto median.
library(tidyverse)
library(tidycensus)
library(dplyr)
# Uncomment and run this code if it is your first time using tidycensus:
# census_api_key("<your_key_here>", install = T)
# If you do not have a Census API key, you can get one at the link below:
# https://api.census.gov/data/key_signup.html
variable_list = c('B08301_001', 'B08301_003', 'B08301_004', 'B08301_010', 'B08301_016',
'B08301_017', 'B08301_018', 'B08301_019', 'B08301_020', 'B08301_021',
'B25003_001', 'B25003_002', 'B25003_003', 'B19301_001', 'B25010_001',
'B03002_003', 'B03002_004', 'B03002_005', 'B03002_006', 'B03002_007',
'B03002_008', 'B03002_009', 'B03002_012', 'B03002_001', 'B25024_001', 'B25024_002',
'B25024_003', 'B25024_004', 'B25024_005', 'B25024_006', 'B25024_007',
'B25024_008', 'B25024_009', 'B25024_010', 'B25024_011', 'B02009_001',
'B02010_001', 'B02011_001', 'B02012_001', 'B02013_001', 'B15002_001',
'B15002_002', 'B15002_003', 'B15002_004', 'B15002_005', 'B15002_006',
'B15002_007', 'B15002_008', 'B15002_009', 'B15002_010', 'B15002_011',
'B15002_012', 'B15002_013', 'B15002_014', 'B15002_015', 'B15002_016',
'B15002_017', 'B15002_018', 'B15002_019', 'B15002_020', 'B15002_021',
'B15002_022', 'B15002_023', 'B15002_024', 'B15002_025', 'B15002_026',
'B15002_027', 'B15002_028', 'B15002_029', 'B15002_030', 'B15002_031',
'B15002_032', 'B15002_033', 'B15002_034', 'B15002_035', 'B01001_003',
'B01001_027', 'B01001_004', 'B01001_028', 'B01001_005', 'B01001_029',
'B01001_006', 'B01001_030', 'B01001_018', 'B01001_042', 'B01001_019',
'B01001_043', 'B01001_020', 'B01001_044', 'B01001_021', 'B01001_045',
'B01001_022', 'B01001_046', 'B01001_023', 'B01001_047', 'B01001_024',
'B01001_048', 'B01001_025', 'B01001_049', 'B05002_013')
suite_acs_16 <- get_acs(geography = 'tract', variables = variable_list,
state = c('OR', 'WA'), output = 'wide')
suite_acs_16_plc <- get_acs(geography = 'place', variables = variable_list,
state = c('OR', 'WA'), output = 'wide')
suite_acs_10 <- get_acs(geography = 'tract', variables = variable_list,
state = c('OR', 'WA'), output = 'wide', year = 2010)
suite_acs_10_plc <- get_acs(geography = 'place', variables = variable_list,
state = c('OR', 'WA'), output = 'wide', year = 2010)
suite_acs_16 <- suite_acs_16 %>%
mutate(cnty_fips = str_sub(GEOID, 1, 5),
year = as.factor('2012-2016'))
suite_acs_16_plc <- suite_acs_16_plc %>%
mutate(cnty_fips = 99999,
year = as.factor('2012-2016'))
suite_acs_10 <- suite_acs_10 %>%
mutate(cnty_fips = str_sub(GEOID, 1, 5),
year = as.factor('2006-2010'))
suite_acs_10_plc <- suite_acs_10_plc %>%
mutate(cnty_fips = 99999,
year = as.factor('2006-2010'))
suite_acs <- rbind(suite_acs_16, suite_acs_10, suite_acs_16_plc, suite_acs_10_plc)
suite_acs2 <- suite_acs %>%
mutate(poc_total = B03002_001E - B03002_003E,
unwgt_per_cap_income = B19301_001E * B03002_001E,
unwgt_hh_size = B25010_001E * B25003_001E,
commute_other = B08301_016E + B08301_017E + B08301_020E,
commute_active = B08301_018E + B08301_019E,
commute_non_sov = B08301_001E - B08301_003E,
edu_less_than_hs = B15002_003E + B15002_020E + B15002_004E + B15002_021E + B15002_005E +
B15002_022E + B15002_006E + B15002_023E + B15002_007E + B15002_024E + B15002_008E +
B15002_025E + B15002_009E + B15002_026E + B15002_010E + B15002_027E,
edu_hs_diploma = B15002_011E + B15002_028E,
edu_some_college = B15002_012E + B15002_029E + B15002_013E + B15002_030E + B15002_014E + B15002_031E,
edu_four_year = B15002_015E + B15002_032E,
edu_advanced = B15002_016E + B15002_033E + B15002_017E + B15002_034E + B15002_018E + B15002_035E,
edu_less_than_four_year = B15002_003E + B15002_020E + B15002_004E + B15002_021E + B15002_005E +
B15002_022E + B15002_006E + B15002_023E + B15002_007E + B15002_024E + B15002_008E +
B15002_025E + B15002_009E + B15002_026E + B15002_010E + B15002_027E + B15002_011E +
B15002_028E + B15002_012E + B15002_029E + B15002_013E + B15002_030E + B15002_014E + B15002_031E,
edu_hs_or_less = B15002_003E + B15002_020E + B15002_004E + B15002_021E + B15002_005E +
B15002_022E + B15002_006E + B15002_023E + B15002_007E + B15002_024E + B15002_008E +
B15002_025E + B15002_009E + B15002_026E + B15002_010E + B15002_027E + B15002_011E + B15002_028E,
edu_four_year_or_more = B15002_016E + B15002_033E + B15002_017E + B15002_034E + B15002_018E + B15002_035E +
B15002_015E + B15002_032E,
unit_detached = B25024_002E,
unit_small_mfr = B25024_003E + B25024_004E + B25024_005E,
unit_medium_mfr = B25024_006E + B25024_007E,
unit_large_mfr = B25024_008E + B25024_009E,
unit_other = B25024_010E + B25024_011E,
age_under_18 = B01001_003E + B01001_027E + B01001_004E + B01001_028E + B01001_005E +
B01001_029E + B01001_006E + B01001_030E,
age_over_59 = B01001_018E + B01001_042E + B01001_019E + B01001_043E + B01001_020E +
B01001_044E + B01001_021E + B01001_045E + B01001_022E + B01001_046E + B01001_023E +
B01001_047E + B01001_024E + B01001_048E + B01001_025E + B01001_049E,
age_over_64 = B01001_020E + B01001_044E + B01001_021E + B01001_045E + B01001_022E +
B01001_046E + B01001_023E + B01001_047E + B01001_024E + B01001_048E + B01001_025E + B01001_049E
) %>%
mutate(age_18_to_59 = B03002_001E - age_under_18 - age_over_59,
age_18_to_64 = B03002_001E - age_under_18 - age_over_64)
get_demographic_profile <- function(fips_list) {
get_median_hh_income <- function(list_of_tract_fips, acs_state='OR', acs_year=2016) {
require(tidycensus)
require(tidyverse)
require(dplyr)
get_income_value_keys <- function(dframe) {
return_list_of_lists <- list(c(dframe[[1]], 0),
c(dframe[[2]], 10000),
c(dframe[[3]], 15000),
c(dframe[[4]], 20000),
c(dframe[[5]], 25000),
c(dframe[[6]], 30000),
c(dframe[[7]], 35000),
c(dframe[[8]], 40000),
c(dframe[[9]], 45000),
c(dframe[[10]], 50000),
c(dframe[[11]], 60000),
c(dframe[[12]], 75000),
c(dframe[[13]], 100000),
c(dframe[[14]], 125000),
c(dframe[[15]], 150000),
c(dframe[[16]], 200000))
return(return_list_of_lists)
}
get_pareto_median <- function(list_of_lists) {
# Make sure list is sorted in ascending order
data_list <- list_of_lists[order(sapply(list_of_lists, '[[', 2))]
# Pull out elements into separate lists to make things clearer later on.
counts <- lapply(data_list, '[[', 1)
bases <- lapply(data_list, '[[', 2)
# Calculate the "widths" of each group as the difference between the bases
widths = list()
for (i in seq_along(bases[1:length(names(bases))-1])){
val = bases[[i + 1]] - bases[[i]]
widths <- append(widths, val)
}
# The last element returns an error because i + 1 is out of bounds.
# Just append 1, since there is no effective upper bound.
# Could be fixed with a tryCatch().
widths <- append(widths, 1)
# Find the group containing the median, which will be the group at which
# the cumulative counts is greater than the sum of all of the counts divided by 2
target = Reduce("+",counts) / 2
cumulative_counts = 1
index = 1
while (cumulative_counts <= target) {
cumulative_counts = cumulative_counts + counts[[index]]
index = index + 1
}
index = index - 1
# Calculate the sum of all the groups prior to the one that contains the median
previous_groups_sum = cumulative_counts - counts[[index]]
# Last, estimate the median
return(bases[[index]] + ((Reduce("+",counts) / 2 - previous_groups_sum) / counts[[index]]) * widths[[index]])
}
hhincome <- get_acs(geography = 'tract', table = 'B19001', state = acs_state, survey = 'acs5', year = acs_year, output = 'wide', cache_table = TRUE)
hhincome <- hhincome %>%
mutate(cnty_fips = str_sub(GEOID, 1, 5))
#hhincome_plc <- get_acs(geography = 'place', table = 'B19013', state = acs_state, survey = 'acs5', year = acs_year, output = 'wide', cache_table = TRUE)
area_hhincome <- hhincome %>%
filter(GEOID %in% list_of_tract_fips) %>%
summarize(inc_000k_009k = sum(B19001_002E),
inc_010k_014k = sum(B19001_003E),
inc_015k_019k = sum(B19001_004E),
inc_020k_024k = sum(B19001_005E),
inc_025k_029k = sum(B19001_006E),
inc_030k_034k = sum(B19001_007E),
inc_035k_039k = sum(B19001_008E),
inc_040k_044k = sum(B19001_009E),
inc_045k_049k = sum(B19001_010E),
inc_050k_059k = sum(B19001_011E),
inc_060k_074k = sum(B19001_012E),
inc_075k_099k = sum(B19001_013E),
inc_100k_124k = sum(B19001_014E),
inc_125k_149k = sum(B19001_015E),
inc_150k_199k = sum(B19001_016E),
inc_200k_999k = sum(B19001_017E)) %>%
select(inc_000k_009k, inc_010k_014k, inc_015k_019k, inc_020k_024k,
inc_025k_029k, inc_030k_034k, inc_035k_039k, inc_040k_044k,
inc_045k_049k, inc_050k_059k, inc_060k_074k, inc_075k_099k,
inc_100k_124k, inc_125k_149k, inc_150k_199k, inc_200k_999k)
get_pareto_median(get_income_value_keys(area_hhincome))
#place_hhincome <- hhincome_plc %>%
# filter(GEOID %>% list_of_tract_fips) %>%
}
result <- suite_acs2 %>%
filter(GEOID %in% fips_list) %>%
group_by(year) %>%
summarize(pop_total = sum(B03002_001E),
poc_total = sum(poc_total),
share_poc = sum(poc_total) / sum(B03002_001E),
total_black_combo = sum(B02009_001E),
total_aian_combo = sum(B02010_001E),
total_asian_combo = sum(B02011_001E),
total_nhpi_combo = sum(B02012_001E),
total_other_combo = sum(B02013_001E),
total_hisp_combo = sum(B03002_012E),
share_white_100 = sum(B03002_003E) / sum(B03002_001E),
share_black_100 = sum(B03002_004E) / sum(B03002_001E),
share_aian_100 = sum(B03002_005E) / sum(B03002_001E),
share_asian_100 = sum(B03002_006E) / sum(B03002_001E),
share_nhpi_100 = sum(B03002_007E) / sum(B03002_001E),
share_other_100 = sum(B03002_008E) / sum(B03002_001E),
share_two_100 = sum(B03002_009E) / sum(B03002_001E),
share_hisp_100 = sum(B03002_012E) / sum(B03002_001E),
total_adults = sum(B15002_001E),
share_edu_less_than_hs = sum(edu_less_than_hs) / sum(B15002_001E),
share_edu_hs_diploma = sum(edu_hs_diploma) / sum(B15002_001E),
share_edu_some_college = sum(edu_some_college) / sum(B15002_001E),
share_edu_four_year = sum(edu_four_year) / sum(B15002_001E),
share_edu_advanced = sum(edu_advanced) / sum(B15002_001E),
share_edu_less_than_four_year = sum(edu_less_than_four_year) / sum(B15002_001E),
hh_total = sum(B25003_001E),
hh_own = sum(B25003_002E),
hh_rent = sum(B25003_003E),
share_hh_own = sum(B25003_002E) / sum(B25003_001E),
share_hh_rent = sum(B25003_003E) / sum(B25003_001E),
hh_size = sum(unwgt_hh_size) / sum(B25003_001E),
unit_detached = sum(unit_detached),
unit_small_mfr = sum(unit_small_mfr),
unit_medium_mfr = sum(unit_medium_mfr),
unit_large_mfr = sum(unit_large_mfr),
unit_other = sum(unit_other),
share_unit_detached = sum(unit_detached) / sum(B25024_001E),
share_unit_small_mfr = sum(unit_small_mfr) / sum(B25024_001E),
share_unit_medium_mfr = sum(unit_medium_mfr) / sum(B25024_001E),
share_unit_large_mfr = sum(unit_large_mfr) / sum(B25024_001E),
share_unit_other = sum(unit_other) / sum(B25024_001E),
share_commute_sov = sum(B08301_003E) / sum(B08301_001E),
share_commute_non_sov = sum(commute_non_sov)/ sum(B08301_001E),
share_commute_active = sum(commute_active) / sum(B08301_001E),
share_commute_bicycle = sum(B08301_018E) / sum(B08301_001E),
share_commute_walk = sum(B08301_019E) / sum(B08301_001E),
share_commute_transit = sum(B08301_010E) / sum(B08301_001E),
share_commute_carpool = sum(B08301_004E) / sum(B08301_001E),
share_commute_other = sum(commute_other) / sum(B08301_001E),
share_commute_telework = sum(B08301_021E) / sum(B08301_001E),
age_under_18 = sum(age_under_18),
age_18_to_59 = sum(age_18_to_59),
age_18_to_64 = sum(age_18_to_64),
age_over_59 = sum(age_over_59),
age_over_64 = sum(age_over_64),
share_age_under_18 = sum(age_under_18) / sum(B03002_001E),
share_age_18_to_59 = sum(age_18_to_59) / sum(B03002_001E),
share_age_18_to_64 = sum(age_18_to_64) / sum(B03002_001E),
share_age_over_59 = sum(age_over_59) / sum(B03002_001E),
share_age_over_64 = sum(age_over_64) / sum(B03002_001E),
per_cap_income = sum(unwgt_per_cap_income) / sum(B03002_001E)
)
# Uncomment if also grabbing 2009 data
hh_inc_2016 <- get_median_hh_income(fips_list, acs_year = 2016)
hh_inc_2010 <- get_median_hh_income(fips_list, acs_year = 2010)
#hh_inc_2009 <- get_median_hh_income(fips_list, acs_year = 2009)
# Uncomment last line and also reorder the column references (1 = 2009 ... 3 = 2016)
result$med_hh_income <- 0
result$med_hh_income[[1]] <- get_median_hh_income(fips_list, 'OR', 2010)
result$med_hh_income[[2]] <- get_median_hh_income(fips_list, 'OR', 2016)
#result$med_hh_income[[3]] <- get_median_hh_income(fips_list, 'OR', 2016)
return(result)
}
pdx1 <- suite_acs2 %>%
filter(GEOID == '4159000') %>%
group_by(year) %>%
summarize(pop_total = sum(B03002_001E),
poc_total = sum(poc_total),
share_poc = sum(poc_total) / sum(B03002_001E),
total_black_combo = sum(B02009_001E),
total_aian_combo = sum(B02010_001E),
total_asian_combo = sum(B02011_001E),
total_nhpi_combo = sum(B02012_001E),
total_other_combo = sum(B02013_001E),
total_hisp_combo = sum(B03002_012E),
share_white_100 = sum(B03002_003E) / sum(B03002_001E),
share_black_100 = sum(B03002_004E) / sum(B03002_001E),
share_aian_100 = sum(B03002_005E) / sum(B03002_001E),
share_asian_100 = sum(B03002_006E) / sum(B03002_001E),
share_nhpi_100 = sum(B03002_007E) / sum(B03002_001E),
share_other_100 = sum(B03002_008E) / sum(B03002_001E),
share_two_100 = sum(B03002_009E) / sum(B03002_001E),
share_hisp_100 = sum(B03002_012E) / sum(B03002_001E),
total_adults = sum(B15002_001E),
share_edu_less_than_hs = sum(edu_less_than_hs) / sum(B15002_001E),
share_edu_hs_diploma = sum(edu_hs_diploma) / sum(B15002_001E),
share_edu_some_college = sum(edu_some_college) / sum(B15002_001E),
share_edu_four_year = sum(edu_four_year) / sum(B15002_001E),
share_edu_advanced = sum(edu_advanced) / sum(B15002_001E),
share_edu_less_than_four_year = sum(edu_less_than_four_year) / sum(B15002_001E),
hh_total = sum(B25003_001E),
hh_own = sum(B25003_002E),
hh_rent = sum(B25003_003E),
share_hh_own = sum(B25003_002E) / sum(B25003_001E),
share_hh_rent = sum(B25003_003E) / sum(B25003_001E),
hh_size = sum(unwgt_hh_size) / sum(B25003_001E),
unit_detached = sum(unit_detached),
unit_small_mfr = sum(unit_small_mfr),
unit_medium_mfr = sum(unit_medium_mfr),
unit_large_mfr = sum(unit_large_mfr),
unit_other = sum(unit_other),
share_unit_detached = sum(unit_detached) / sum(B25024_001E),
share_unit_small_mfr = sum(unit_small_mfr) / sum(B25024_001E),
share_unit_medium_mfr = sum(unit_medium_mfr) / sum(B25024_001E),
share_unit_large_mfr = sum(unit_large_mfr) / sum(B25024_001E),
share_unit_other = sum(unit_other) / sum(B25024_001E),
share_commute_sov = sum(B08301_003E) / sum(B08301_001E),
share_commute_non_sov = sum(commute_non_sov)/ sum(B08301_001E),
share_commute_active = sum(commute_active) / sum(B08301_001E),
share_commute_bicycle = sum(B08301_018E) / sum(B08301_001E),
share_commute_walk = sum(B08301_019E) / sum(B08301_001E),
share_commute_transit = sum(B08301_010E) / sum(B08301_001E),
share_commute_carpool = sum(B08301_004E) / sum(B08301_001E),
share_commute_other = sum(commute_other) / sum(B08301_001E),
share_commute_telework = sum(B08301_021E) / sum(B08301_001E),
age_under_18 = sum(age_under_18),
age_18_to_59 = sum(age_18_to_59),
age_18_to_64 = sum(age_18_to_64),
age_over_59 = sum(age_over_59),
age_over_64 = sum(age_over_64),
share_age_under_18 = sum(age_under_18) / sum(B03002_001E),
share_age_18_to_59 = sum(age_18_to_59) / sum(B03002_001E),
share_age_18_to_64 = sum(age_18_to_64) / sum(B03002_001E),
share_age_over_59 = sum(age_over_59) / sum(B03002_001E),
share_age_over_64 = sum(age_over_64) / sum(B03002_001E),
per_cap_income = sum(unwgt_per_cap_income) / sum(B03002_001E)
)
rossi_fips2 <- c('41051008001', '41051008002', '41051009501', '41051009400', '41051007800', '41051007900', '41051009502')
rossi_demo <- get_demographic_profile(rossi_fips2)
glimpse(rossi_demo)
glimpse(pdx1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment