Created
October 24, 2016 15:29
-
-
Save helenaeitel/4e7dab7846ab6a6c1b97abea61ac09c1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Helena Eitel | |
#Professor Merchant | |
#QSS 30.05 | |
#Lab Assignment 6 | |
library(RColorBrewer) | |
library(dplyr) | |
library(readr) | |
library(ggplot2) | |
library(scales) | |
a <- read_csv('Extract 6.csv',col_types=cols(HHWT=col_double(),PERWT=col_double())) | |
#exclude Alaska and Hawaii before 1960, exclude anyone living in group quarters | |
c <- a %>% filter(YEAR>= 1960 | !(STATEFIP %in% c(2,15) & GQ==1)) | |
#add a region variable | |
d <- c %>% mutate(Region=factor(ifelse(STATEFIP %in% c(9,23,25,33,34,36,42,44,50),1, | |
ifelse(STATEFIP %in% c(17,18,19,20,26,27,29,31,38,39,46,55),3, | |
ifelse(STATEFIP %in% c(4,6,8,16,30,32,41,49,53,56),4,2))), | |
labels=c('Northeast','South','Midwest','West'))) | |
#GRAPH 1: Number of households headed by same or different race couples or single parents | |
#join husbands to their wives | |
husbands <- d %>% filter(SEX==1 & SPLOC>0) %>% | |
rename(hRace=RACE) | |
wives <- d %>% filter(SEX==2 & SPLOC>0) %>% | |
select(YEAR,SERIAL,PERNUM,RACE) %>% rename(wRace=RACE) | |
couples <- left_join(husbands,wives,by=c('YEAR','SERIAL','SPLOC'='PERNUM')) %>% | |
select(YEAR,SERIAL,Region,hRace,wRace) | |
#join the couples to a dataframe of all household heads (including single household heads) | |
heads <- d %>% filter(RELATE==1) | |
total <- left_join(heads,couples) | |
#create a new variable for the race of the couple | |
total2 <- total %>% mutate(cRace=factor(ifelse(SPLOC==0,1, | |
ifelse(hRace!=wRace,2,3)), | |
labels=c('Single','Different Races','Same Race'))) | |
#calculate percents | |
households <- total2 %>% group_by(YEAR,Region,cRace) %>% summarise(numhh=sum(HHWT)) | |
totalhouseholds <- total2 %>% group_by(YEAR,Region) %>% summarise(tothh=sum(HHWT)) | |
graph1 <- left_join(households,totalhouseholds) | |
graph2 <- graph1 %>% mutate(Percent=numhh/tothh*100) | |
GRAPH1 <- ggplot(graph2, aes(x=Region,y=Percent/100,fill=cRace)) + theme_gray(18) + | |
geom_bar(data=graph2,stat='identity',aes(width=rescale(tothh,c(0.1,1))),position='stack') + | |
labs(fill='Head and Spouse',title='Percent of Households Headed by Couples of the Same Race and Different Races, 1900-1990',x='Region',y='Percent of Households') + | |
scale_y_continuous(labels=scales::percent) + | |
facet_wrap(~YEAR,ncol=5) + | |
scale_fill_brewer(palette='Set2') + | |
geom_text(label=ifelse(graph2$cRace=='Different Races',paste('Different=',round(graph2$Percent,digits=1),'%',sep=''),''),angle=90,y=.5,size=5) | |
png('GRAPH1.png',width=1500,height=1000) | |
print(GRAPH1) | |
dev.off() | |
#GRAPH 2 | |
#make dataframes for childen, all possible fathers and mothers (all men and women) | |
children <- d %>% filter(AGE<=18 & (POPLOC>0 | MOMLOC>0)) | |
father <- d %>% filter(SEX==1) %>% select(YEAR,SERIAL,PERNUM,RACE) %>% rename(popRace=RACE) | |
mother <- d %>% filter(SEX==2) %>% select(YEAR,SERIAL,PERNUM,RACE) %>% rename(momRace=RACE) | |
#join mothers and fathers to their children (where POPLOC and/or MOMLOC equal PERNUM) | |
Parents <- left_join(children, mother, by=c('YEAR', 'SERIAL', 'MOMLOC'='PERNUM')) %>% | |
left_join(father, by=c('YEAR', 'SERIAL', 'POPLOC'='PERNUM')) | |
Parents <- Parents %>% mutate(Racep=factor(ifelse((MOMLOC==0 | POPLOC==0),1, | |
ifelse(momRace==popRace,3,2)), | |
labels=c('Only one Parent','Different Races','Same Race'))) | |
#calculate percents | |
Total <- Parents %>% group_by(YEAR,Region,Racep) %>% summarise(Number=sum(PERWT)) | |
Totalchildren <- children %>% group_by(YEAR,Region) %>% summarise(TotNumber=sum(PERWT)) | |
Graph2 <- left_join(Total,Totalchildren) %>% mutate(Perc=Number/TotNumber*100) | |
GRAPH2 <- ggplot(Graph2, aes(x=Region,y=Perc/100,fill=Racep)) + theme_gray(18) + | |
geom_bar(data=Graph2,stat='identity',aes(width=rescale(TotNumber,c(0.1,1))),position='stack') + | |
labs(fill='Parents',title='Percent of Children with Parents of Same Race and Different Races, 1900-1990',x='Region',y='Percent of Children') + | |
scale_y_continuous(labels=scales::percent) + | |
facet_wrap(~YEAR,ncol=5) + | |
scale_fill_brewer(palette='Set2') + | |
geom_text(label=ifelse(Graph2$Racep=='Different Races', | |
paste('Different=',round(Graph2$Perc,digits=1),'%',sep=''),''), | |
angle=90,y=.5,size=5) | |
png('GRAPH2.png',width=1500,height=1000) | |
print(GRAPH2) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment