Last active
July 31, 2018 23:43
-
-
Save plnnr/0d0c841cf6b04d2e3fce4333a21c0351 to your computer and use it in GitHub Desktop.
R code to get demographic profile for 2010 and 2016 by feeding in a list of census tract FIPS codes. It also calculates median household income by aggregating tract income data and finding the pareto median.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(tidycensus) | |
library(dplyr) | |
# Uncomment and run this code if it is your first time using tidycensus: | |
# census_api_key("<your_key_here>", install = T) | |
# If you do not have a Census API key, you can get one at the link below: | |
# https://api.census.gov/data/key_signup.html | |
variable_list = c('B08301_001', 'B08301_003', 'B08301_004', 'B08301_010', 'B08301_016', | |
'B08301_017', 'B08301_018', 'B08301_019', 'B08301_020', 'B08301_021', | |
'B25003_001', 'B25003_002', 'B25003_003', 'B19301_001', 'B25010_001', | |
'B03002_003', 'B03002_004', 'B03002_005', 'B03002_006', 'B03002_007', | |
'B03002_008', 'B03002_009', 'B03002_012', 'B03002_001', 'B25024_001', 'B25024_002', | |
'B25024_003', 'B25024_004', 'B25024_005', 'B25024_006', 'B25024_007', | |
'B25024_008', 'B25024_009', 'B25024_010', 'B25024_011', 'B02009_001', | |
'B02010_001', 'B02011_001', 'B02012_001', 'B02013_001', 'B15002_001', | |
'B15002_002', 'B15002_003', 'B15002_004', 'B15002_005', 'B15002_006', | |
'B15002_007', 'B15002_008', 'B15002_009', 'B15002_010', 'B15002_011', | |
'B15002_012', 'B15002_013', 'B15002_014', 'B15002_015', 'B15002_016', | |
'B15002_017', 'B15002_018', 'B15002_019', 'B15002_020', 'B15002_021', | |
'B15002_022', 'B15002_023', 'B15002_024', 'B15002_025', 'B15002_026', | |
'B15002_027', 'B15002_028', 'B15002_029', 'B15002_030', 'B15002_031', | |
'B15002_032', 'B15002_033', 'B15002_034', 'B15002_035', 'B01001_003', | |
'B01001_027', 'B01001_004', 'B01001_028', 'B01001_005', 'B01001_029', | |
'B01001_006', 'B01001_030', 'B01001_018', 'B01001_042', 'B01001_019', | |
'B01001_043', 'B01001_020', 'B01001_044', 'B01001_021', 'B01001_045', | |
'B01001_022', 'B01001_046', 'B01001_023', 'B01001_047', 'B01001_024', | |
'B01001_048', 'B01001_025', 'B01001_049', 'B05002_013') | |
suite_acs_16 <- get_acs(geography = 'tract', variables = variable_list, | |
state = c('OR', 'WA'), output = 'wide') | |
suite_acs_16_plc <- get_acs(geography = 'place', variables = variable_list, | |
state = c('OR', 'WA'), output = 'wide') | |
suite_acs_10 <- get_acs(geography = 'tract', variables = variable_list, | |
state = c('OR', 'WA'), output = 'wide', year = 2010) | |
suite_acs_10_plc <- get_acs(geography = 'place', variables = variable_list, | |
state = c('OR', 'WA'), output = 'wide', year = 2010) | |
suite_acs_16 <- suite_acs_16 %>% | |
mutate(cnty_fips = str_sub(GEOID, 1, 5), | |
year = as.factor('2012-2016')) | |
suite_acs_16_plc <- suite_acs_16_plc %>% | |
mutate(cnty_fips = 99999, | |
year = as.factor('2012-2016')) | |
suite_acs_10 <- suite_acs_10 %>% | |
mutate(cnty_fips = str_sub(GEOID, 1, 5), | |
year = as.factor('2006-2010')) | |
suite_acs_10_plc <- suite_acs_10_plc %>% | |
mutate(cnty_fips = 99999, | |
year = as.factor('2006-2010')) | |
suite_acs <- rbind(suite_acs_16, suite_acs_10, suite_acs_16_plc, suite_acs_10_plc) | |
suite_acs2 <- suite_acs %>% | |
mutate(poc_total = B03002_001E - B03002_003E, | |
unwgt_per_cap_income = B19301_001E * B03002_001E, | |
unwgt_hh_size = B25010_001E * B25003_001E, | |
commute_other = B08301_016E + B08301_017E + B08301_020E, | |
commute_active = B08301_018E + B08301_019E, | |
commute_non_sov = B08301_001E - B08301_003E, | |
edu_less_than_hs = B15002_003E + B15002_020E + B15002_004E + B15002_021E + B15002_005E + | |
B15002_022E + B15002_006E + B15002_023E + B15002_007E + B15002_024E + B15002_008E + | |
B15002_025E + B15002_009E + B15002_026E + B15002_010E + B15002_027E, | |
edu_hs_diploma = B15002_011E + B15002_028E, | |
edu_some_college = B15002_012E + B15002_029E + B15002_013E + B15002_030E + B15002_014E + B15002_031E, | |
edu_four_year = B15002_015E + B15002_032E, | |
edu_advanced = B15002_016E + B15002_033E + B15002_017E + B15002_034E + B15002_018E + B15002_035E, | |
edu_less_than_four_year = B15002_003E + B15002_020E + B15002_004E + B15002_021E + B15002_005E + | |
B15002_022E + B15002_006E + B15002_023E + B15002_007E + B15002_024E + B15002_008E + | |
B15002_025E + B15002_009E + B15002_026E + B15002_010E + B15002_027E + B15002_011E + | |
B15002_028E + B15002_012E + B15002_029E + B15002_013E + B15002_030E + B15002_014E + B15002_031E, | |
edu_hs_or_less = B15002_003E + B15002_020E + B15002_004E + B15002_021E + B15002_005E + | |
B15002_022E + B15002_006E + B15002_023E + B15002_007E + B15002_024E + B15002_008E + | |
B15002_025E + B15002_009E + B15002_026E + B15002_010E + B15002_027E + B15002_011E + B15002_028E, | |
edu_four_year_or_more = B15002_016E + B15002_033E + B15002_017E + B15002_034E + B15002_018E + B15002_035E + | |
B15002_015E + B15002_032E, | |
unit_detached = B25024_002E, | |
unit_small_mfr = B25024_003E + B25024_004E + B25024_005E, | |
unit_medium_mfr = B25024_006E + B25024_007E, | |
unit_large_mfr = B25024_008E + B25024_009E, | |
unit_other = B25024_010E + B25024_011E, | |
age_under_18 = B01001_003E + B01001_027E + B01001_004E + B01001_028E + B01001_005E + | |
B01001_029E + B01001_006E + B01001_030E, | |
age_over_59 = B01001_018E + B01001_042E + B01001_019E + B01001_043E + B01001_020E + | |
B01001_044E + B01001_021E + B01001_045E + B01001_022E + B01001_046E + B01001_023E + | |
B01001_047E + B01001_024E + B01001_048E + B01001_025E + B01001_049E, | |
age_over_64 = B01001_020E + B01001_044E + B01001_021E + B01001_045E + B01001_022E + | |
B01001_046E + B01001_023E + B01001_047E + B01001_024E + B01001_048E + B01001_025E + B01001_049E | |
) %>% | |
mutate(age_18_to_59 = B03002_001E - age_under_18 - age_over_59, | |
age_18_to_64 = B03002_001E - age_under_18 - age_over_64) | |
get_demographic_profile <- function(fips_list) { | |
get_median_hh_income <- function(list_of_tract_fips, acs_state='OR', acs_year=2016) { | |
require(tidycensus) | |
require(tidyverse) | |
require(dplyr) | |
get_income_value_keys <- function(dframe) { | |
return_list_of_lists <- list(c(dframe[[1]], 0), | |
c(dframe[[2]], 10000), | |
c(dframe[[3]], 15000), | |
c(dframe[[4]], 20000), | |
c(dframe[[5]], 25000), | |
c(dframe[[6]], 30000), | |
c(dframe[[7]], 35000), | |
c(dframe[[8]], 40000), | |
c(dframe[[9]], 45000), | |
c(dframe[[10]], 50000), | |
c(dframe[[11]], 60000), | |
c(dframe[[12]], 75000), | |
c(dframe[[13]], 100000), | |
c(dframe[[14]], 125000), | |
c(dframe[[15]], 150000), | |
c(dframe[[16]], 200000)) | |
return(return_list_of_lists) | |
} | |
get_pareto_median <- function(list_of_lists) { | |
# Make sure list is sorted in ascending order | |
data_list <- list_of_lists[order(sapply(list_of_lists, '[[', 2))] | |
# Pull out elements into separate lists to make things clearer later on. | |
counts <- lapply(data_list, '[[', 1) | |
bases <- lapply(data_list, '[[', 2) | |
# Calculate the "widths" of each group as the difference between the bases | |
widths = list() | |
for (i in seq_along(bases[1:length(names(bases))-1])){ | |
val = bases[[i + 1]] - bases[[i]] | |
widths <- append(widths, val) | |
} | |
# The last element returns an error because i + 1 is out of bounds. | |
# Just append 1, since there is no effective upper bound. | |
# Could be fixed with a tryCatch(). | |
widths <- append(widths, 1) | |
# Find the group containing the median, which will be the group at which | |
# the cumulative counts is greater than the sum of all of the counts divided by 2 | |
target = Reduce("+",counts) / 2 | |
cumulative_counts = 1 | |
index = 1 | |
while (cumulative_counts <= target) { | |
cumulative_counts = cumulative_counts + counts[[index]] | |
index = index + 1 | |
} | |
index = index - 1 | |
# Calculate the sum of all the groups prior to the one that contains the median | |
previous_groups_sum = cumulative_counts - counts[[index]] | |
# Last, estimate the median | |
return(bases[[index]] + ((Reduce("+",counts) / 2 - previous_groups_sum) / counts[[index]]) * widths[[index]]) | |
} | |
hhincome <- get_acs(geography = 'tract', table = 'B19001', state = acs_state, survey = 'acs5', year = acs_year, output = 'wide', cache_table = TRUE) | |
hhincome <- hhincome %>% | |
mutate(cnty_fips = str_sub(GEOID, 1, 5)) | |
#hhincome_plc <- get_acs(geography = 'place', table = 'B19013', state = acs_state, survey = 'acs5', year = acs_year, output = 'wide', cache_table = TRUE) | |
area_hhincome <- hhincome %>% | |
filter(GEOID %in% list_of_tract_fips) %>% | |
summarize(inc_000k_009k = sum(B19001_002E), | |
inc_010k_014k = sum(B19001_003E), | |
inc_015k_019k = sum(B19001_004E), | |
inc_020k_024k = sum(B19001_005E), | |
inc_025k_029k = sum(B19001_006E), | |
inc_030k_034k = sum(B19001_007E), | |
inc_035k_039k = sum(B19001_008E), | |
inc_040k_044k = sum(B19001_009E), | |
inc_045k_049k = sum(B19001_010E), | |
inc_050k_059k = sum(B19001_011E), | |
inc_060k_074k = sum(B19001_012E), | |
inc_075k_099k = sum(B19001_013E), | |
inc_100k_124k = sum(B19001_014E), | |
inc_125k_149k = sum(B19001_015E), | |
inc_150k_199k = sum(B19001_016E), | |
inc_200k_999k = sum(B19001_017E)) %>% | |
select(inc_000k_009k, inc_010k_014k, inc_015k_019k, inc_020k_024k, | |
inc_025k_029k, inc_030k_034k, inc_035k_039k, inc_040k_044k, | |
inc_045k_049k, inc_050k_059k, inc_060k_074k, inc_075k_099k, | |
inc_100k_124k, inc_125k_149k, inc_150k_199k, inc_200k_999k) | |
get_pareto_median(get_income_value_keys(area_hhincome)) | |
#place_hhincome <- hhincome_plc %>% | |
# filter(GEOID %>% list_of_tract_fips) %>% | |
} | |
result <- suite_acs2 %>% | |
filter(GEOID %in% fips_list) %>% | |
group_by(year) %>% | |
summarize(pop_total = sum(B03002_001E), | |
poc_total = sum(poc_total), | |
share_poc = sum(poc_total) / sum(B03002_001E), | |
total_black_combo = sum(B02009_001E), | |
total_aian_combo = sum(B02010_001E), | |
total_asian_combo = sum(B02011_001E), | |
total_nhpi_combo = sum(B02012_001E), | |
total_other_combo = sum(B02013_001E), | |
total_hisp_combo = sum(B03002_012E), | |
share_white_100 = sum(B03002_003E) / sum(B03002_001E), | |
share_black_100 = sum(B03002_004E) / sum(B03002_001E), | |
share_aian_100 = sum(B03002_005E) / sum(B03002_001E), | |
share_asian_100 = sum(B03002_006E) / sum(B03002_001E), | |
share_nhpi_100 = sum(B03002_007E) / sum(B03002_001E), | |
share_other_100 = sum(B03002_008E) / sum(B03002_001E), | |
share_two_100 = sum(B03002_009E) / sum(B03002_001E), | |
share_hisp_100 = sum(B03002_012E) / sum(B03002_001E), | |
total_adults = sum(B15002_001E), | |
share_edu_less_than_hs = sum(edu_less_than_hs) / sum(B15002_001E), | |
share_edu_hs_diploma = sum(edu_hs_diploma) / sum(B15002_001E), | |
share_edu_some_college = sum(edu_some_college) / sum(B15002_001E), | |
share_edu_four_year = sum(edu_four_year) / sum(B15002_001E), | |
share_edu_advanced = sum(edu_advanced) / sum(B15002_001E), | |
share_edu_less_than_four_year = sum(edu_less_than_four_year) / sum(B15002_001E), | |
hh_total = sum(B25003_001E), | |
hh_own = sum(B25003_002E), | |
hh_rent = sum(B25003_003E), | |
share_hh_own = sum(B25003_002E) / sum(B25003_001E), | |
share_hh_rent = sum(B25003_003E) / sum(B25003_001E), | |
hh_size = sum(unwgt_hh_size) / sum(B25003_001E), | |
unit_detached = sum(unit_detached), | |
unit_small_mfr = sum(unit_small_mfr), | |
unit_medium_mfr = sum(unit_medium_mfr), | |
unit_large_mfr = sum(unit_large_mfr), | |
unit_other = sum(unit_other), | |
share_unit_detached = sum(unit_detached) / sum(B25024_001E), | |
share_unit_small_mfr = sum(unit_small_mfr) / sum(B25024_001E), | |
share_unit_medium_mfr = sum(unit_medium_mfr) / sum(B25024_001E), | |
share_unit_large_mfr = sum(unit_large_mfr) / sum(B25024_001E), | |
share_unit_other = sum(unit_other) / sum(B25024_001E), | |
share_commute_sov = sum(B08301_003E) / sum(B08301_001E), | |
share_commute_non_sov = sum(commute_non_sov)/ sum(B08301_001E), | |
share_commute_active = sum(commute_active) / sum(B08301_001E), | |
share_commute_bicycle = sum(B08301_018E) / sum(B08301_001E), | |
share_commute_walk = sum(B08301_019E) / sum(B08301_001E), | |
share_commute_transit = sum(B08301_010E) / sum(B08301_001E), | |
share_commute_carpool = sum(B08301_004E) / sum(B08301_001E), | |
share_commute_other = sum(commute_other) / sum(B08301_001E), | |
share_commute_telework = sum(B08301_021E) / sum(B08301_001E), | |
age_under_18 = sum(age_under_18), | |
age_18_to_59 = sum(age_18_to_59), | |
age_18_to_64 = sum(age_18_to_64), | |
age_over_59 = sum(age_over_59), | |
age_over_64 = sum(age_over_64), | |
share_age_under_18 = sum(age_under_18) / sum(B03002_001E), | |
share_age_18_to_59 = sum(age_18_to_59) / sum(B03002_001E), | |
share_age_18_to_64 = sum(age_18_to_64) / sum(B03002_001E), | |
share_age_over_59 = sum(age_over_59) / sum(B03002_001E), | |
share_age_over_64 = sum(age_over_64) / sum(B03002_001E), | |
per_cap_income = sum(unwgt_per_cap_income) / sum(B03002_001E) | |
) | |
# Uncomment if also grabbing 2009 data | |
hh_inc_2016 <- get_median_hh_income(fips_list, acs_year = 2016) | |
hh_inc_2010 <- get_median_hh_income(fips_list, acs_year = 2010) | |
#hh_inc_2009 <- get_median_hh_income(fips_list, acs_year = 2009) | |
# Uncomment last line and also reorder the column references (1 = 2009 ... 3 = 2016) | |
result$med_hh_income <- 0 | |
result$med_hh_income[[1]] <- get_median_hh_income(fips_list, 'OR', 2010) | |
result$med_hh_income[[2]] <- get_median_hh_income(fips_list, 'OR', 2016) | |
#result$med_hh_income[[3]] <- get_median_hh_income(fips_list, 'OR', 2016) | |
return(result) | |
} | |
pdx1 <- suite_acs2 %>% | |
filter(GEOID == '4159000') %>% | |
group_by(year) %>% | |
summarize(pop_total = sum(B03002_001E), | |
poc_total = sum(poc_total), | |
share_poc = sum(poc_total) / sum(B03002_001E), | |
total_black_combo = sum(B02009_001E), | |
total_aian_combo = sum(B02010_001E), | |
total_asian_combo = sum(B02011_001E), | |
total_nhpi_combo = sum(B02012_001E), | |
total_other_combo = sum(B02013_001E), | |
total_hisp_combo = sum(B03002_012E), | |
share_white_100 = sum(B03002_003E) / sum(B03002_001E), | |
share_black_100 = sum(B03002_004E) / sum(B03002_001E), | |
share_aian_100 = sum(B03002_005E) / sum(B03002_001E), | |
share_asian_100 = sum(B03002_006E) / sum(B03002_001E), | |
share_nhpi_100 = sum(B03002_007E) / sum(B03002_001E), | |
share_other_100 = sum(B03002_008E) / sum(B03002_001E), | |
share_two_100 = sum(B03002_009E) / sum(B03002_001E), | |
share_hisp_100 = sum(B03002_012E) / sum(B03002_001E), | |
total_adults = sum(B15002_001E), | |
share_edu_less_than_hs = sum(edu_less_than_hs) / sum(B15002_001E), | |
share_edu_hs_diploma = sum(edu_hs_diploma) / sum(B15002_001E), | |
share_edu_some_college = sum(edu_some_college) / sum(B15002_001E), | |
share_edu_four_year = sum(edu_four_year) / sum(B15002_001E), | |
share_edu_advanced = sum(edu_advanced) / sum(B15002_001E), | |
share_edu_less_than_four_year = sum(edu_less_than_four_year) / sum(B15002_001E), | |
hh_total = sum(B25003_001E), | |
hh_own = sum(B25003_002E), | |
hh_rent = sum(B25003_003E), | |
share_hh_own = sum(B25003_002E) / sum(B25003_001E), | |
share_hh_rent = sum(B25003_003E) / sum(B25003_001E), | |
hh_size = sum(unwgt_hh_size) / sum(B25003_001E), | |
unit_detached = sum(unit_detached), | |
unit_small_mfr = sum(unit_small_mfr), | |
unit_medium_mfr = sum(unit_medium_mfr), | |
unit_large_mfr = sum(unit_large_mfr), | |
unit_other = sum(unit_other), | |
share_unit_detached = sum(unit_detached) / sum(B25024_001E), | |
share_unit_small_mfr = sum(unit_small_mfr) / sum(B25024_001E), | |
share_unit_medium_mfr = sum(unit_medium_mfr) / sum(B25024_001E), | |
share_unit_large_mfr = sum(unit_large_mfr) / sum(B25024_001E), | |
share_unit_other = sum(unit_other) / sum(B25024_001E), | |
share_commute_sov = sum(B08301_003E) / sum(B08301_001E), | |
share_commute_non_sov = sum(commute_non_sov)/ sum(B08301_001E), | |
share_commute_active = sum(commute_active) / sum(B08301_001E), | |
share_commute_bicycle = sum(B08301_018E) / sum(B08301_001E), | |
share_commute_walk = sum(B08301_019E) / sum(B08301_001E), | |
share_commute_transit = sum(B08301_010E) / sum(B08301_001E), | |
share_commute_carpool = sum(B08301_004E) / sum(B08301_001E), | |
share_commute_other = sum(commute_other) / sum(B08301_001E), | |
share_commute_telework = sum(B08301_021E) / sum(B08301_001E), | |
age_under_18 = sum(age_under_18), | |
age_18_to_59 = sum(age_18_to_59), | |
age_18_to_64 = sum(age_18_to_64), | |
age_over_59 = sum(age_over_59), | |
age_over_64 = sum(age_over_64), | |
share_age_under_18 = sum(age_under_18) / sum(B03002_001E), | |
share_age_18_to_59 = sum(age_18_to_59) / sum(B03002_001E), | |
share_age_18_to_64 = sum(age_18_to_64) / sum(B03002_001E), | |
share_age_over_59 = sum(age_over_59) / sum(B03002_001E), | |
share_age_over_64 = sum(age_over_64) / sum(B03002_001E), | |
per_cap_income = sum(unwgt_per_cap_income) / sum(B03002_001E) | |
) | |
rossi_fips2 <- c('41051008001', '41051008002', '41051009501', '41051009400', '41051007800', '41051007900', '41051009502') | |
rossi_demo <- get_demographic_profile(rossi_fips2) | |
glimpse(rossi_demo) | |
glimpse(pdx1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment