Created
August 3, 2018 19:43
top 1 percent ipums
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# File # 1 | |
# https://drive.google.com/open?id=1Lhz23JP4gRW4p_01D7OzaL60CT1wfem- | |
# File # 2 | |
# https://drive.google.com/open?id=1XOWD8COTdDx30HNLZgrp57WzWUGOtaKA | |
# NOTE: To load data, you must download both the extract's data and the DDI | |
# and also set the working directory to the folder with these files (or change the path below). | |
#install.packages("tidyverse") | |
library(tidyverse) | |
install.packages("ipumsr") | |
library(ipumsr) | |
# Put the two files (usa_00008.dat.gz and usa_00008.xml) into your working directory | |
ddi <- read_ipums_ddi("usa_00008.xml") | |
data <- read_ipums_micro(ddi) | |
install.packages("Hmisc") | |
library(Hmisc) | |
View(data) | |
data %>% | |
select_if(is.labelled) | |
ipums_val_labels(data$STATEFIP) | |
# Convert the labels to factors (and drop the unused levels) | |
data <- data %>% | |
mutate(STATE_factor = as_factor(lbl_clean(STATEFIP)), | |
COUNTY_factor = as_factor(lbl_clean(COUNTY)), | |
COUNTYFIPS_factor = as_factor(lbl_clean(GQ)), | |
HHINCOME_factor = as_factor(lbl_clean(HHINCOME))) | |
md <- filter(data, STATE_factor=="Maryland") | |
View(md) | |
mont <- filter(data, COUNTYFIPS==31) | |
View(mont) | |
# wrong | |
wtd.quantile(mont$HHINCOME, mont$HHWT, probs=c(.99)) | |
range(mont$HHINCOME) | |
# let's fix | |
mont2 <- mont %>% mutate( | |
HHINCOME = as.numeric(as.character(HHINCOME)), | |
HHINCOME=case_when( | |
HHINCOME==9999999 ~ 0, | |
TRUE ~ HHINCOME | |
)) | |
# What's the range now? | |
range(mont2$HHINCOME) | |
wtd.quantile(mont2$HHINCOME, mont2$HHWT, probs=c(.99)) | |
# Now let's see where the county ranks in the country | |
ranked <- filter(data, HHINCOME!=9999999) %>% | |
group_by(STATE_factor) %>% | |
summarize(one_percent=wtd.quantile(HHINCOME, HHWT, probs=c(.99))) %>% | |
mutate(rank=rank(desc(one_percent))) | |
View(ranked) | |
# Prep the data | |
data <- mutate(data, | |
statef=as.character(STATEFIP), | |
countyf=as.character(COUNTYFIPS), | |
statef=case_when( | |
nchar(statef)==1 ~ paste0("0", statef), | |
TRUE ~ statef), | |
countyf=case_when( | |
nchar(countyf)==1 ~ paste0("00", countyf), | |
nchar(countyf)==2 ~ paste0("0", countyf), | |
TRUE ~ countyf), | |
fips=paste0(statef, countyf)) | |
# Rank by all available counties | |
# PERNUM is persons per household. | |
# We want this to be 1 so it's not double counting | |
# https://usa.ipums.org/usa-action/variables/PERNUM#description_section [usa.ipums.org] | |
# and GQ stands for Group Housing status | |
# https://usa.ipums.org/usa-action/variables/PERNUM#description_section [usa.ipums.org] | |
# Everything higher than 3 is institutions and dorms or group homes, etc | |
ranked <- filter(data, HHINCOME!=9999999) %>% | |
filter(PERNUM==1) %>% | |
filter(GQ<3) %>% | |
group_by(STATE_factor, fips) %>% | |
summarize(one_percent=wtd.quantile(HHINCOME, HHWT, probs=c(.99)), | |
count=n()) %>% | |
ungroup() %>% | |
mutate(rank=rank(-one_percent)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment