Skip to content

Instantly share code, notes, and snippets.

@helenaeitel
Created October 24, 2016 15:29
Show Gist options
  • Save helenaeitel/4e7dab7846ab6a6c1b97abea61ac09c1 to your computer and use it in GitHub Desktop.
Save helenaeitel/4e7dab7846ab6a6c1b97abea61ac09c1 to your computer and use it in GitHub Desktop.
#Helena Eitel
#Professor Merchant
#QSS 30.05
#Lab Assignment 6
library(RColorBrewer)
library(dplyr)
library(readr)
library(ggplot2)
library(scales)
a <- read_csv('Extract 6.csv',col_types=cols(HHWT=col_double(),PERWT=col_double()))
#exclude Alaska and Hawaii before 1960, exclude anyone living in group quarters
c <- a %>% filter(YEAR>= 1960 | !(STATEFIP %in% c(2,15) & GQ==1))
#add a region variable
d <- c %>% mutate(Region=factor(ifelse(STATEFIP %in% c(9,23,25,33,34,36,42,44,50),1,
ifelse(STATEFIP %in% c(17,18,19,20,26,27,29,31,38,39,46,55),3,
ifelse(STATEFIP %in% c(4,6,8,16,30,32,41,49,53,56),4,2))),
labels=c('Northeast','South','Midwest','West')))
#GRAPH 1: Number of households headed by same or different race couples or single parents
#join husbands to their wives
husbands <- d %>% filter(SEX==1 & SPLOC>0) %>%
rename(hRace=RACE)
wives <- d %>% filter(SEX==2 & SPLOC>0) %>%
select(YEAR,SERIAL,PERNUM,RACE) %>% rename(wRace=RACE)
couples <- left_join(husbands,wives,by=c('YEAR','SERIAL','SPLOC'='PERNUM')) %>%
select(YEAR,SERIAL,Region,hRace,wRace)
#join the couples to a dataframe of all household heads (including single household heads)
heads <- d %>% filter(RELATE==1)
total <- left_join(heads,couples)
#create a new variable for the race of the couple
total2 <- total %>% mutate(cRace=factor(ifelse(SPLOC==0,1,
ifelse(hRace!=wRace,2,3)),
labels=c('Single','Different Races','Same Race')))
#calculate percents
households <- total2 %>% group_by(YEAR,Region,cRace) %>% summarise(numhh=sum(HHWT))
totalhouseholds <- total2 %>% group_by(YEAR,Region) %>% summarise(tothh=sum(HHWT))
graph1 <- left_join(households,totalhouseholds)
graph2 <- graph1 %>% mutate(Percent=numhh/tothh*100)
GRAPH1 <- ggplot(graph2, aes(x=Region,y=Percent/100,fill=cRace)) + theme_gray(18) +
geom_bar(data=graph2,stat='identity',aes(width=rescale(tothh,c(0.1,1))),position='stack') +
labs(fill='Head and Spouse',title='Percent of Households Headed by Couples of the Same Race and Different Races, 1900-1990',x='Region',y='Percent of Households') +
scale_y_continuous(labels=scales::percent) +
facet_wrap(~YEAR,ncol=5) +
scale_fill_brewer(palette='Set2') +
geom_text(label=ifelse(graph2$cRace=='Different Races',paste('Different=',round(graph2$Percent,digits=1),'%',sep=''),''),angle=90,y=.5,size=5)
png('GRAPH1.png',width=1500,height=1000)
print(GRAPH1)
dev.off()
#GRAPH 2
#make dataframes for childen, all possible fathers and mothers (all men and women)
children <- d %>% filter(AGE<=18 & (POPLOC>0 | MOMLOC>0))
father <- d %>% filter(SEX==1) %>% select(YEAR,SERIAL,PERNUM,RACE) %>% rename(popRace=RACE)
mother <- d %>% filter(SEX==2) %>% select(YEAR,SERIAL,PERNUM,RACE) %>% rename(momRace=RACE)
#join mothers and fathers to their children (where POPLOC and/or MOMLOC equal PERNUM)
Parents <- left_join(children, mother, by=c('YEAR', 'SERIAL', 'MOMLOC'='PERNUM')) %>%
left_join(father, by=c('YEAR', 'SERIAL', 'POPLOC'='PERNUM'))
Parents <- Parents %>% mutate(Racep=factor(ifelse((MOMLOC==0 | POPLOC==0),1,
ifelse(momRace==popRace,3,2)),
labels=c('Only one Parent','Different Races','Same Race')))
#calculate percents
Total <- Parents %>% group_by(YEAR,Region,Racep) %>% summarise(Number=sum(PERWT))
Totalchildren <- children %>% group_by(YEAR,Region) %>% summarise(TotNumber=sum(PERWT))
Graph2 <- left_join(Total,Totalchildren) %>% mutate(Perc=Number/TotNumber*100)
GRAPH2 <- ggplot(Graph2, aes(x=Region,y=Perc/100,fill=Racep)) + theme_gray(18) +
geom_bar(data=Graph2,stat='identity',aes(width=rescale(TotNumber,c(0.1,1))),position='stack') +
labs(fill='Parents',title='Percent of Children with Parents of Same Race and Different Races, 1900-1990',x='Region',y='Percent of Children') +
scale_y_continuous(labels=scales::percent) +
facet_wrap(~YEAR,ncol=5) +
scale_fill_brewer(palette='Set2') +
geom_text(label=ifelse(Graph2$Racep=='Different Races',
paste('Different=',round(Graph2$Perc,digits=1),'%',sep=''),''),
angle=90,y=.5,size=5)
png('GRAPH2.png',width=1500,height=1000)
print(GRAPH2)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment