Skip to content

Instantly share code, notes, and snippets.

@azadag
Last active April 10, 2018 18:39
Show Gist options
  • Save azadag/96b7d6ed371c241d755114d5b5e43a18 to your computer and use it in GitHub Desktop.
Save azadag/96b7d6ed371c241d755114d5b5e43a18 to your computer and use it in GitHub Desktop.
basic ipums summarisation examples
install.packages('data.table', 'dplyr', 'plyr')
library(dplyr)
data_acs <- data.table::fread("./data/usa_00035.csv")
data_acs <- tbl_df(data_acs)
## make an example age dummy variable
data_acs$AGE_VAR <- ifelse(data_acs$AGE < 15, 1, 0)
data_acs$AGE_VAR <- ifelse(data_acs$AGE >= 24 & data_acs$AGE <= 36 , 2, data_acs$AGE_VAR)
data_acs$AGE_VAR <- ifelse(data_acs$AGE > 36 & data_acs$AGE <= 51 , 3, data_acs$AGE_VAR)
data_acs$AGE_VAR <- ifelse(data_acs$AGE > 51 & data_acs$AGE < 70, 4, data_acs$AGE_VAR)
data_acs$AGE_VAR <- ifelse(data_acs$AGE > 71 , 5, data_acs$AGE_VAR)
#### create income groups determination #####
levels <- c(-Inf, 50000, 74999, 99999, 124999, 149999, 199999, 299999, 399999, 599999, Inf)
labels <- c("Less50k", "b50_75", "b75_100", "b100_125", "b125_149", "b150_200", "b200_300",
"b300_400", "b400_600", "More600")
data_acs$HHINCOME <- with( data_acs, ifelse( HHINCOME == 9999999, 0, HHINCOME))
## make an education dummy ##
data_acs <- data_acs %>% mutate(EDUC1 = as.factor(ifelse(EDUCD <= 61, "Less than HS",
ifelse(EDUCD == 62 | EDUCD == 63 | EDUCD == 64, "HS / equiv",
ifelse(EDUCD == 65 | EDUCD == 70 | EDUCD == 71 | EDUCD == 80| EDUCD == 90 | EDUCD == 100 , "Some College",
ifelse(EDUCD == 82 | EDUCD == 83, "AD",
ifelse(EDUCD == 101, "College Degree",
ifelse( EDUCD == 114 | EDUCD == 115, "Masters / Professional",
ifelse( EDUCD == 116, "Doctoral",
"NA")))))))))
data_acs18$MARST <- as.factor(data_acs18$MARST)
data_acs18 <- data_acs18 %>% mutate(married = plyr::mapvalues(MARST, from = c("1","2","3","4","5","6"), to = c("1","1","3","2","3","2")))
# only keep 18+
data_acs18 <- data_acs %>% filter(AGE >= 18)
## example summarisations
## of owners what percentage have different education levels
counts <- data_acs18 %>% filter( OWNERSHP== "2", YEAR == 2014) %>%
group_by(EDUC1) %>%
summarize( num = sum(PERWT) ) %>%
mutate(PERpct = num / sum(num))
counts
## Of owners what is the ownership rate by education and age groups
counts <- data_acs18 %>% filter( OWNERSHP== "2", YEAR == 2014) %>% group_by(EDUC1) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num))
counts
write.csv(counts, "./csv_out/CA_educ_nums.csv")
counts <- data_acs18 %>% filter( OWNERSHP== "2", YEAR == 2014) %>% group_by(AGE_VAR3, EDUC1) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num))
counts
write.csv(counts, "./csv_out/CA_educ_agevar_nums.csv")
## marriage renters 2014
counts <- data_acs18 %>% filter( OWNERSHP== "2", YEAR == 2014) %>% group_by(married) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num))
counts
write.csv(counts, "./csv_out/CA_married_nums.csv")
counts <- data_acs18 %>% filter(OWNERSHP== "2", YEAR == 2014) %>% group_by(AGE_VAR3, married) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num))
counts
write.csv(counts, "./csv_out/CA_age_var_married_nums.csv")
####
counts <- data_acs18 %>% filter(OWNERSHP!= "0", YEAR == 2014) %>% group_by(OWNERSHP, AGE_VAR3, SEX, RACEB, hhincsum) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num))
counts
write.csv(counts, "./csv_out/CA_rentersnums.csv")
counts <- data_acs18 %>% filter(OWNERSHP!= "0", YEAR == 2014, !is.na(AGE_VAR3), AGE_VAR3 !=4, AGE_VAR3 !=0, RACEB != "NA", RACEB != "2X" ) %>%
group_by(OWNERSHP, AGE_VAR3, RACEB, SEX) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num))
counts
write.csv(counts, "./csv_out/CA_rentersnums_2.csv")
counts <- data_acs18 %>% filter(OWNERSHP!= "0", YEAR == 2014, !is.na(AGE_VAR3), AGE_VAR3 !=4, AGE_VAR3 !=0) %>%
group_by(OWNERSHP, AGE_VAR3, SEX, hhincsum) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num))
counts
write.csv(counts, "./csv_out/CA_rentersnumsinc_0.csv")
counts <- data_acs18 %>% filter(OWNERSHP!= "0", YEAR == 2014, !is.na(AGE_VAR3), AGE_VAR3 !=4, AGE_VAR3 !=0) %>%
group_by(OWNERSHP, AGE_VAR3, SEX, RACEB) %>% summarize( num = sum(PERWT) ) #%>% mutate(PERpct = num / sum(num))
counts
write.csv(counts, "./csv_out/CA_rentersnumsrace_0.csv")
counts <- data_acs18 %>% filter(OWNERSHP!= "0", YEAR == 2014, !is.na(AGE_VAR4)) %>% group_by(OWNERSHP, AGE_VAR4, SEX, RACEB) %>% summarize( num = sum(PERWT) ) %>% mutate(PERpct = num / sum(num))
counts
write.csv(counts, "./csv_out/CA_rentersnumsrace_0.csv")
chartHH <- data_acs_hhinc1 %>% filter(YEAR == "2014", YEAR == 2014) %>% group_by(HHTYPE, RACEB, SEX) %>% summarize(HH = sum(HHWT)) %>%
ungroup() %>% group_by(HHTYPE) %>% mutate(HHpct = HH / sum(HH))
chartHH0 <- data_acs_hhinc1 %>% group_by(YEAR, OWNERSHP) %>% summarize(HH = sum(HHWT)) %>%
ungroup() %>% group_by(YEAR) %>% mutate(HHpct = HH / sum(HH))
chartHH0$OWNERSHP <- as.factor(chartHH0$OWNERSHP)
chartHHyearOwnRA2015 <- data_acs_hhinc1 %>% filter(YEAR == "2014") %>% group_by( OWNERSHP, RACEB, AGE_VAR3, SEX) %>% summarize(HH = sum(HHWT)) %>%
group_by( RACEB, AGE_VAR3) %>% mutate(HHpct = HH / sum(HH))
chartHHyearOwnRA2015$OWNERSHP <- as.factor(chartHHyearOwnRA2015$OWNERSHP)
head(chartHHyearOwnRA2015)
chartHH0out <- chartHH0
chartHH0out$HHpct <- round(chartHH0out$HHpct,2)
chartHH0out <- chartHH0out %>% filter( OWNERSHP != "0")
chartHH0out <- chartHH0out %>% mutate( OwnershipType = plyr::mapvalues(OWNERSHP, c(1,2), c('Owners', 'Renters')))
chartHH0out <- chartHH0out %>% select(YEAR, HH, HHpct, OwnershipType)
# chartHH0out %>% tidyr::spread(OwnershipType, HH )
write.csv(chartHH0out, ".\\csv_out\\CA_ownerssh_pct.csv")
chartHH0out1 <- chartHH0out %>% select(YEAR, HHpct, OwnershipType) %>% tidyr::spread(OwnershipType, HHpct )
chartHH0out2 <- chartHH0out %>% select(YEAR, HH, OwnershipType) %>% tidyr::spread(OwnershipType, HH )
names(chartHH0out1) <- c("YEAR","OwnersPCT", "RentersPCT")
chartHH0outstate <- cbind(chartHH0out1, chartHH0out2)
write.csv(chartHH0outstate, ".\\csv_out\\CA_ownerssh_pctWIDE.csv")
## race
chartHH0out <- chartHHyearOwn %>% filter( OWNERSHP == "1") %>% select(-OWNERSHP)
chartHH0out$HHpct <- round(chartHH0out$HHpct,3)
# chartHH0out <- chartHH0out %>% filter( OWNERSHP != "0")
# chartHH0out <- chartHH0out %>% mutate( OwnershipType = plyr::mapvalues(OWNERSHP, c(1,2), c('Owners', 'Renters')))
chartHH0out <- chartHH0out %>% select(YEAR, HH, HHpct, RACEB)
# chartHH0out %>% tidyr::spread(OwnershipType, HH )
write.csv(chartHH0out, ".\\csv_out\\CA_ownerssh_pctRACE.csv")
chartHH0out1 <- chartHH0out %>% select(YEAR, HHpct, RACEB) %>% tidyr::spread(RACEB, HHpct )
chartHH0out2 <- chartHH0out %>% select(YEAR, HH, RACEB) %>% tidyr::spread(RACEB, HH )
names(chartHH0out1) <- c("YEAR","ASIANpct", "BLACKpct", "HISPANICpct", "OTHERpct", "WHITEpct")
chartHH0outstateRACE <- cbind(chartHH0out1, chartHH0out2)
write.csv(chartHH0outstateRACE, ".\\csv_out\\CA_ownerssh_pctRACWIDE.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment