Skip to content

Instantly share code, notes, and snippets.

  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save bschneidr/6df0c611dc6f4afe13808afc4b8eadf3 to your computer and use it in GitHub Desktop.
library(magrittr)
library(dplyr)
library(tidyr)
library(stringr)
library(readr)
# Import the race/hispanic-origin ACS data downloaded through American Fact-Finder ----
b03002_data <- read_csv(file = "Data/ACS/ZCTA/2013-2017/Raw/B03002/ACS_17_5YR_B03002_with_ann.csv",
skip = 1L,
col_types = cols(
.default = col_integer(),
Id = col_character(),
Id2 = col_character(),
Geography = col_character()
))
# Tidy the data to a more user-friendly format
b03002_data %<>%
gather(key = "Measure_Name", value = "Value",
-one_of(c("Race", "Id", "Id2", "Geography"))) %>%
mutate(Measure_Type = case_when(str_detect(Measure_Name, "^Estimate") ~ "Estimate",
str_detect(Measure_Name, "^Margin") ~ "Margin of Error"),
Subpopulation = str_remove(Measure_Name, "(Estimate;)|(Margin of Error;)") %>%
str_trim() %>% str_remove("(:|;)$")) %>%
select(-Measure_Name) %>%
spread(key = "Measure_Type", value = "Value") %>%
rename_all(.funs = function(x) str_replace_all(x, "[[:space:]]+", "_"))
b03002_data %<>%
mutate(Hispanic_or_Latino_Status = case_when(str_detect(Subpopulation, "^Hispanic or Latino") ~ "Hispanic or Latino",
str_detect(Subpopulation, "^Not Hispanic or Latino") ~ "Not Hispanic or Latino",
str_detect(Subpopulation, "^Total") ~ "Total")) %>%
mutate(Race = case_when(Subpopulation == "Hispanic or Latino" ~ "Total",
Subpopulation == "Not Hispanic or Latino" ~ "Total",
TRUE ~ str_remove(Subpopulation, "^(Not |)Hispanic or Latino: -") %>% str_trim)) %>%
select(-Subpopulation)
b03002_data %<>%
filter(!Race %in% c("Two or more races: - Two races excluding Some other race, and three or more races",
"Two or more races: - Two races including Some other race"))
# Obtain easy-to-read table which separates all Hispanic/Latino persons
# into a distinct category
# (i.e. the 'Black or African-American' category excludes persons who are also Hispanic-Latino)
total_population_sizes_by_race_or_hisp_latino_status <- b03002_data %>%
filter((Hispanic_or_Latino_Status == "Hispanic or Latino" & Race == "Total")
|
(Hispanic_or_Latino_Status == "Not Hispanic or Latino" & Race != "Total")) %>%
mutate(Race_or_Hispanic_Latino_Status = case_when(Hispanic_or_Latino_Status == "Hispanic or Latino" ~ "Hispanic or Latino",
TRUE ~ Race)) %>%
select(-Race, - Hispanic_or_Latino_Status) %>%
select(Id, Id2, Geography, Race_or_Hispanic_Latino_Status,
Estimate, Margin_of_Error)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment