Skip to content

Instantly share code, notes, and snippets.

@morganmelon
Last active October 22, 2016 23:04
Show Gist options
  • Save morganmelon/072d0356becbfc0350e0414225a8d218 to your computer and use it in GitHub Desktop.
Save morganmelon/072d0356becbfc0350e0414225a8d218 to your computer and use it in GitHub Desktop.
Lab 6: Spine Graphs for Households and Children's Parents
#Morgan Waterman
#Lab 6
#Weighting by Households & Children
library(readr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
#Program to print plots
printplot <- function(plot, filename) {
png(paste(filename,'.png', sep=''), height= 500, width = 1000)
print(plot)
dev.off()
}
ipums <- read_csv('data/CoupleKidsData.csv', col_types = cols(HHWT = col_double(), PERWT = col_double()))
a <- ipums %>% filter(!(STATEFIP %in% c(2,15))| YEAR >=1960)
#Recode region
aa <- a %>% mutate(Region=factor(ifelse(STATEFIP %in% c(4, 6, 8, 16, 30, 32, 35, 41, 49, 53, 56, 2, 15), 4,
ifelse(STATEFIP %in% c(17, 18, 19, 20, 26, 27, 29, 31, 38, 39, 46, 55), 3,
ifelse(STATEFIP %in% c(9, 23, 25, 33, 34, 36, 42, 44, 50), 1, 2))),
labels = c('Northeast', 'South', 'Midwest', 'West')))
#data frame households
households <- aa %>% filter(GQ==1)
#Data frame just heads
heads <- households %>% filter(RELATE==1) %>% rename(Hrace = RACE)
#data frame just spouses
spouse <- households %>% filter(RELATE==2) %>% select(YEAR, SERIAL, PERNUM, RACE) %>% rename(Srace=RACE)
#joining heads and spouses
totalheads <- left_join(heads, spouse, by = c('YEAR', 'SERIAL', 'SPLOC' = 'PERNUM')) %>%
mutate(Race=factor(ifelse(SPLOC==0, 1,
ifelse(Hrace!=Srace, 2, 3)),
labels = c('Unmarried', 'Different Race', 'Same Race')))
#aggregate
allgroup <- totalheads %>% group_by(YEAR, Race, Region) %>% summarise(Number=sum(HHWT))
#make variable for total in a year
householdyr <- allgroup %>% group_by(YEAR, Region) %>% mutate(totalhouse=sum(Number))
#join allgroup and total
pctrace <- left_join(allgroup, householdyr)
#create percentage variable
pctrace<- pctrace %>% mutate(pct = Number/totalhouse)
library(scales)
#spine graph 1 with adjustments
graph1 <- ggplot(pctrace, aes(x=Region, y=pct, fill=Race)) +
geom_bar(stat='identity', aes(width=rescale(totalhouse, c(0.1,1)))) +
labs(fill = 'Race of Head and Spouse',
title = 'Percent of Households Headed by Couples of Same and Different Races, 1900-1990', x = 'Region', y= 'Percent of Households') +
theme_bw(base_size=8) +
facet_wrap(~YEAR, nrow=2) +
scale_y_continuous(labels=scales::percent)
printplot(graph1, 'couplesraceagain')
#add text
graph2 <- graph1 +
geom_text(label=ifelse(pctrace$Race=='Different Race', paste('Different =', round(pctrace$pct, digits = 3), '%', sep = ''), ''),
y = ifelse(pctrace$Race=='Different Race', .5,.9), angle = 90)
printplot(graph2, 'withtext')
#Kid Graph
#make children
children <- households %>% filter(AGE<=18 & (POPLOC>0 | MOMLOC>0))
#identify mothers
mother <- households %>% filter(SEX==2) %>% select(YEAR, SERIAL, PERNUM, RACE) %>% rename(momrace=RACE)
#identify fathers
father <- households %>% filter(SEX==1) %>% select(YEAR, SERIAL, PERNUM, RACE) %>% rename(poprace=RACE)
#join
parents <- left_join(children, mother, by=c('YEAR', 'SERIAL', 'MOMLOC'='PERNUM')) %>% left_join(father, by=c('YEAR', 'SERIAL', 'POPLOC'='PERNUM'))
#exclude kids in households without parents
notorphans <- parents %>% filter(!(MOMLOC==0 & POPLOC==0))
#Make race variable
raceparents <- notorphans %>% mutate(Racep=factor(ifelse(MOMLOC==0 | POPLOC==0, 1,
ifelse(momrace!=poprace, 2, 3)),
labels = c('Only one parent', 'Different Race', 'Same Race')))
#aggregate
allparents <- raceparents %>% group_by(YEAR, Racep, Region) %>% summarise(Numberp=sum(HHWT))
#make total for yr
parentsyr <- allparents %>% group_by(YEAR, Region) %>% mutate(totalparents=sum(Numberp))
#join
pctracep <- left_join(allparents, parentsyr)
#calculate variable for percent
pctracep<- pctracep %>% mutate(pct = Numberp/totalparents)
#kid spine graph
graphkids <- ggplot(pctracep, aes(x=Region, y=pct, fill=Racep)) +
geom_bar(stat='identity', aes(width=rescale(totalparents, c(0.1,1)))) +
labs(fill = 'Race of Parents', title = 'Percent of Children with Parents of Same Race and Different Races, 1900-1990', x = 'Region', y= 'Percent of Children') +
theme_bw(base_size=8) +
facet_wrap(~YEAR, nrow=2) +
scale_y_continuous(labels=scales::percent) +
geom_text(label=ifelse(pctracep$Racep=='Different Race', paste('Different =', round(pctracep$pct, digits = 3), '%', sep = ''), ''),
y = ifelse(pctracep$Racep=='Different Race', .5,.9), angle = 90)
printplot(graphkids, 'parentsrace')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment