Last active
October 22, 2016 23:04
-
-
Save morganmelon/072d0356becbfc0350e0414225a8d218 to your computer and use it in GitHub Desktop.
Lab 6: Spine Graphs for Households and Children's Parents
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Morgan Waterman | |
#Lab 6 | |
#Weighting by Households & Children | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(RColorBrewer) | |
#Program to print plots | |
printplot <- function(plot, filename) { | |
png(paste(filename,'.png', sep=''), height= 500, width = 1000) | |
print(plot) | |
dev.off() | |
} | |
ipums <- read_csv('data/CoupleKidsData.csv', col_types = cols(HHWT = col_double(), PERWT = col_double())) | |
a <- ipums %>% filter(!(STATEFIP %in% c(2,15))| YEAR >=1960) | |
#Recode region | |
aa <- a %>% mutate(Region=factor(ifelse(STATEFIP %in% c(4, 6, 8, 16, 30, 32, 35, 41, 49, 53, 56, 2, 15), 4, | |
ifelse(STATEFIP %in% c(17, 18, 19, 20, 26, 27, 29, 31, 38, 39, 46, 55), 3, | |
ifelse(STATEFIP %in% c(9, 23, 25, 33, 34, 36, 42, 44, 50), 1, 2))), | |
labels = c('Northeast', 'South', 'Midwest', 'West'))) | |
#data frame households | |
households <- aa %>% filter(GQ==1) | |
#Data frame just heads | |
heads <- households %>% filter(RELATE==1) %>% rename(Hrace = RACE) | |
#data frame just spouses | |
spouse <- households %>% filter(RELATE==2) %>% select(YEAR, SERIAL, PERNUM, RACE) %>% rename(Srace=RACE) | |
#joining heads and spouses | |
totalheads <- left_join(heads, spouse, by = c('YEAR', 'SERIAL', 'SPLOC' = 'PERNUM')) %>% | |
mutate(Race=factor(ifelse(SPLOC==0, 1, | |
ifelse(Hrace!=Srace, 2, 3)), | |
labels = c('Unmarried', 'Different Race', 'Same Race'))) | |
#aggregate | |
allgroup <- totalheads %>% group_by(YEAR, Race, Region) %>% summarise(Number=sum(HHWT)) | |
#make variable for total in a year | |
householdyr <- allgroup %>% group_by(YEAR, Region) %>% mutate(totalhouse=sum(Number)) | |
#join allgroup and total | |
pctrace <- left_join(allgroup, householdyr) | |
#create percentage variable | |
pctrace<- pctrace %>% mutate(pct = Number/totalhouse) | |
library(scales) | |
#spine graph 1 with adjustments | |
graph1 <- ggplot(pctrace, aes(x=Region, y=pct, fill=Race)) + | |
geom_bar(stat='identity', aes(width=rescale(totalhouse, c(0.1,1)))) + | |
labs(fill = 'Race of Head and Spouse', | |
title = 'Percent of Households Headed by Couples of Same and Different Races, 1900-1990', x = 'Region', y= 'Percent of Households') + | |
theme_bw(base_size=8) + | |
facet_wrap(~YEAR, nrow=2) + | |
scale_y_continuous(labels=scales::percent) | |
printplot(graph1, 'couplesraceagain') | |
#add text | |
graph2 <- graph1 + | |
geom_text(label=ifelse(pctrace$Race=='Different Race', paste('Different =', round(pctrace$pct, digits = 3), '%', sep = ''), ''), | |
y = ifelse(pctrace$Race=='Different Race', .5,.9), angle = 90) | |
printplot(graph2, 'withtext') | |
#Kid Graph | |
#make children | |
children <- households %>% filter(AGE<=18 & (POPLOC>0 | MOMLOC>0)) | |
#identify mothers | |
mother <- households %>% filter(SEX==2) %>% select(YEAR, SERIAL, PERNUM, RACE) %>% rename(momrace=RACE) | |
#identify fathers | |
father <- households %>% filter(SEX==1) %>% select(YEAR, SERIAL, PERNUM, RACE) %>% rename(poprace=RACE) | |
#join | |
parents <- left_join(children, mother, by=c('YEAR', 'SERIAL', 'MOMLOC'='PERNUM')) %>% left_join(father, by=c('YEAR', 'SERIAL', 'POPLOC'='PERNUM')) | |
#exclude kids in households without parents | |
notorphans <- parents %>% filter(!(MOMLOC==0 & POPLOC==0)) | |
#Make race variable | |
raceparents <- notorphans %>% mutate(Racep=factor(ifelse(MOMLOC==0 | POPLOC==0, 1, | |
ifelse(momrace!=poprace, 2, 3)), | |
labels = c('Only one parent', 'Different Race', 'Same Race'))) | |
#aggregate | |
allparents <- raceparents %>% group_by(YEAR, Racep, Region) %>% summarise(Numberp=sum(HHWT)) | |
#make total for yr | |
parentsyr <- allparents %>% group_by(YEAR, Region) %>% mutate(totalparents=sum(Numberp)) | |
#join | |
pctracep <- left_join(allparents, parentsyr) | |
#calculate variable for percent | |
pctracep<- pctracep %>% mutate(pct = Numberp/totalparents) | |
#kid spine graph | |
graphkids <- ggplot(pctracep, aes(x=Region, y=pct, fill=Racep)) + | |
geom_bar(stat='identity', aes(width=rescale(totalparents, c(0.1,1)))) + | |
labs(fill = 'Race of Parents', title = 'Percent of Children with Parents of Same Race and Different Races, 1900-1990', x = 'Region', y= 'Percent of Children') + | |
theme_bw(base_size=8) + | |
facet_wrap(~YEAR, nrow=2) + | |
scale_y_continuous(labels=scales::percent) + | |
geom_text(label=ifelse(pctracep$Racep=='Different Race', paste('Different =', round(pctracep$pct, digits = 3), '%', sep = ''), ''), | |
y = ifelse(pctracep$Racep=='Different Race', .5,.9), angle = 90) | |
printplot(graphkids, 'parentsrace') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment