Last active
March 5, 2022 21:51
-
-
Save chuckpurvis/281a786c06593afbf256f184a567a5ce to your computer and use it in GitHub Desktop.
Extract a Data Table from the American Community Survey-based Census Transportation Planning Package, 2012-2016, using the R package CTPPr, and analyze using TIDYVERSE tools like dplyr. https://ctpp.transportation.org/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######################################################### | |
# CTPPr_1216_Calif_Place_VHH_HHSize_Tenure.r | |
# Use the package CTPPr to get table A112309 from the | |
# 2012-2016 CTPP. This is used instead of going to | |
# Beyond 2020 to download a CSV file..... | |
# | |
# "Three-Way CTPP Table" | |
# | |
# A112309 = Households by Household Size (5) by | |
# Vehicles Available (5) by | |
# Tenure (5) | |
# | |
# Continuing with the Westat R Package "CTPPr" | |
# CTPPr is authored by: Anthony Fucci, Alexander Cates | |
# and Marcelo Simas of Westat | |
# Note that there isn't an option to select only certain | |
# tracts, places, TADs, TAZs, WITHIN a state... only the | |
# ENTIRE state! User may prefer to use the Beyond2020 | |
# software for extracting only some parts within a state! | |
# | |
# Job script prepared by Chuck Purvis, Hayward, California | |
# -- February 20, 2022 -- | |
############################################################## | |
# Install CTPPr onto local computer.. Just need to do once! | |
# I'm just not sure about etiquette for updating packages! | |
install.packages('devtools') | |
devtools::install_github('Westat-Transportation/CTPPr') | |
# Activate/load the CTPPr library for this script. | |
library("CTPPr") | |
# optional libraries to load, depending on how I expand the examples! | |
library('dplyr') | |
library("tidyverse") | |
library(tidyr) | |
library(stringr) | |
# This data pull from the AASHTO website takes several minutes | |
# Be prepared to wait.... | |
step1 <- download_ctpp(A112309,dataset='2016', | |
geography='Place', | |
state="California", | |
output = 'FIPS and Name') | |
# cleans up the tilde-n....La Canada, Flintridge, California | |
step1$RESIDENCE <- iconv(step1$RESIDENCE, "latin1", "ASCII","n") | |
# And now to continue with the pivot_wider tests.... | |
# pivot wider with longest/descriptive variable names. | |
# The input data is slightly different format, Beyond 2020 Export | |
# vs CTPPr, with BOTH the Estimate and Standard Error (SE) in the | |
# same record in CTPPr, but different record (row)in Beyond 2020. | |
results1 <- step1 %>% | |
pivot_wider(names_from = | |
c("Household Size 5", | |
"Vehicles Available 5", | |
"Tenure 5"), | |
values_from = Estimate, -SE) | |
results1_se <- step1 %>% | |
pivot_wider(names_from = | |
c("Household Size 5", | |
"Vehicles Available 5", | |
"Tenure 5"), | |
values_from = SE, -Estimate) | |
# The follow re-combines the Estimates and Standard Errors into one file | |
# but instead of "_est" and "_moe" in the variable name I get | |
# ".x" (for estimates) and ".y" (for standard errors) in all variable names. | |
combined1 <- dplyr::left_join(results1,results1_se, | |
by="RESIDENCE") | |
# I finally found the solutation!! | |
combined2 <- dplyr::left_join(results1,results1_se, | |
by="RESIDENCE", | |
suffix = c("_est", "_se")) | |
################################################################ | |
# Recode the variable names and variable strings to simpler mnemonics | |
step2 <- step1 %>% | |
rename(hhsizex="Household Size 5", | |
vhhx="Vehicles Available 5", | |
tenurex="Tenure 5") %>% | |
mutate(vhh = recode(vhhx, | |
"0 vehicles" = "0VHH", | |
"1 vehicle" = "1VHH", | |
"2 vehicles" = "2VHH", | |
"3-or-more vehicles" = "3pVHH", | |
"Total, vehicles available" = "TotalV")) %>% | |
mutate(hhsize = recode(hhsizex, | |
"Total households" = "TotalP", | |
"1-person Household" = "PHH1", | |
"2-person Household" = "PHH2", | |
"3-person Household" = "PHH3", | |
"4-or-moreperson Household" = "PHH4p")) %>% | |
mutate(tenure = recode(tenurex, | |
"Total households" = "TotalT", | |
"Owned with mortgage" = "Own_with", | |
"Owned without mortgage" = "Own_free", | |
"Rented" = "Rent", | |
"Occupied without rent" = "Rent_free")) %>% | |
mutate(Households=Estimate) | |
## mutate(Output2 = recode(Output, "Estimate" = "est", | |
## "Margin of Error" = "moe")) | |
# pivot wider with shorter mnemonic variable names. | |
results2 <- step2 %>% | |
select(-vhhx,-hhsizex, -tenurex, -SE,-Estimate) %>% | |
pivot_wider(names_from = | |
c(vhh,hhsize,tenure), | |
values_from = Households) | |
results2se <- step2 %>% | |
select(-vhhx,-hhsizex, -tenurex, -Households,-Estimate) %>% | |
pivot_wider(names_from = | |
c(vhh,hhsize,tenure), | |
values_from = SE) | |
combined3 <- dplyr::left_join(results2,results2se, | |
by="RESIDENCE", | |
suffix = c("_est", "_se")) | |
########################################################################## | |
## Split the RESIDENCE variable into its FIPS Code + Name | |
## and relocate those columns to the left. | |
combined3$FIPS_Resid <- substr(combined3$RESIDENCE,1,7) | |
combined3$Name_Resid <- substr(combined3$RESIDENCE,10,99) | |
combined3 <- combined3 %>% | |
dplyr::relocate(FIPS_Resid,Name_Resid,.after=RESIDENCE) | |
######################################################################## |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment