Created
October 24, 2016 03:04
-
-
Save SirSamAlot280/7c0e3acbcb2974c884f14e8de102cfbd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Load packages | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(RColorBrewer) | |
#Read in appropriate IPUMs data; Change HHWT & PERWT from decimals to intergers. | |
ipums <- read_csv('./data/10_20.csv',col_types=cols(HHWT=col_double(),PERWT=col_double())) | |
#Filter out data for Alaska and Hawaii before 1960. | |
ipumsfilter <- ipums %>% filter(YEAR>=1960 |!(STATEFIP %in% c(2,15))) | |
#Create regions of the United States from STATEFIP | |
ipumsfilter <- ipumsfilter %>% mutate(regions=factor(ifelse(STATEFIP %in% c(9,23,25,33,34,36,42,44,50),1, | |
ifelse(STATEFIP %in% c(17,18,19,20,26,27,29,31,38,39,46,55),3, | |
ifelse(STATEFIP %in% c(1,5,10,11,12,13,21,22,24,28,37,45,47,48,51,54),2,4))), | |
labels=c('Northeast','South','Midwest','West'))) | |
#Task 1 | |
#Condese race into five racial groups | |
ipumsfilter <- ipumsfilter %>% mutate(Race=ifelse(RACE %in% c(1),'White', | |
ifelse(RACE %in% c(2),'Black', | |
ifelse(RACE %in% c(3),'Native American', | |
ifelse(RACE %in% c(4,5,6),'Asian','Other'))))) | |
#Only focus on households that relate to the 1970 defintion/understanding. | |
households <- ipumsfilter %>% filter(GQ==1) | |
#Aggregate for the head of household and their indvidiual race | |
heads <- households %>% filter(RELATE==1) %>% rename(Hrac=Race) | |
#Aggregate for the ehad of household and their individual race, while also selecting variables to | |
#correspond with their race | |
spouse <- households %>% filter(RELATE==2) %>% select(YEAR,SERIAL,PERNUM,Race) %>% rename(Srac=Race) | |
#Determine who is unmarried and their propspective race by connecting those who are married together, | |
#while also defining who qualifies as unmarried. Additionally, define those couples who are of different races. | |
unmarried <- left_join(heads,spouse,by=c('YEAR','SERIAL','SPLOC'='PERNUM')) %>% | |
mutate(Race=factor(ifelse(SPLOC==0,1, | |
ifelse(Hrac!=Srac,2,3)), | |
labels=c('Head Unmarried','Different race','Same race'))) | |
#Calculate the percentage for the couples who are married and unmarried, while also deifining the number of relationships | |
#in a given year. | |
pctunmarried <- unmarried %>% group_by(YEAR,Race,regions) %>% summarise(Number=sum(HHWT)) | |
allpctmarried <- pctunmarried %>% group_by(YEAR,regions) %>% mutate(Total=sum(Number)) | |
allpctmarried2 <- left_join(pctunmarried,allpctmarried) %>% mutate(pct=Number/Total*100) | |
#Create graph for the selected information | |
printplot <- function(plot) { | |
png('plot.png',height=500,width=1000) | |
print(plot) | |
dev.off() | |
} | |
library(scales) | |
graph1 <- ggplot(allpctmarried2, aes(x=regions,y=pct/100,fill=Race)) + | |
geom_bar(stat='identity',aes(width=rescale(Total,c(.1,1)))) + | |
geom_text(label=ifelse(allpctmarried2$Race=='Different race',paste('Different =',round(allpctmarried2$pct,1),'%',sep=''),''), | |
y=ifelse(pctunmarried$Race=='Different race',.5,.9),angle=90) + | |
labs(fill='Head and Spouse',title='Percent of Households by Married and Unmarried Couples Who are of Same and Different Races, 1900-1990',x='Regions',y='Percent of Households') + | |
theme_bw(base_size=10) + | |
facet_wrap(~YEAR,nrow=2) + | |
scale_y_continuous(labels=scales::percent) + | |
scale_fill_brewer(palette ='Set2') | |
printplot(graph1) | |
#Task 2 | |
#Determine who qualifies as a child | |
children <- households %>% filter(AGE<=18 & (MOMLOC!=0 |POPLOC!=0)) | |
#Determine who can potentially be a mother | |
mother <- households %>% filter(SEX==2) %>% select(YEAR,SERIAL,PERNUM,Race) %>% rename(MRac=Race) | |
#Determine who can potential be a father | |
father <-households %>% filter(SEX==1) %>% select(YEAR,SERIAL,PERNUM,Race) %>% rename(FRac=Race) | |
#Join the children to their mother and/or father | |
parentchild <- children %>% left_join(mother,by=c('YEAR','SERIAL','MOMLOC'='PERNUM')) %>% | |
left_join(father,by=c('YEAR','SERIAL','POPLOC'='PERNUM')) | |
#Aggregate each parent's race as well as defining what qualifies as one parent | |
childwparent <- parentchild %>% mutate(PRace=factor(ifelse(MOMLOC==0 | POPLOC==0,1, | |
ifelse(MRac!=FRac,2,3)), | |
labels=c('Only on parent','Different race','Same race'))) | |
#Calculate percentage for children and their parent(s) | |
pctchildwparent <- childwparent %>% group_by(YEAR,PRace,regions) %>% summarise(Number=sum(PERWT)) | |
allpctchildwparent <- pctchildwparent %>% group_by(YEAR,regions) %>% mutate(Total=sum(Number)) | |
allpctchildwparent2 <- left_join(pctchildwparent,allpctchildwparent) %>% mutate(pct=Number/Total*100) | |
#Create graph for selected information | |
printplot <- function(plot) { | |
png('plot2.png',height=500,width=1000) | |
print(plot) | |
dev.off() | |
} | |
library(scales) | |
graph2 <- ggplot(allpctchildwparent2, aes(x=regions,y=pct/100,fill=PRace)) + | |
geom_bar(stat='identity', aes(width=rescale(Total,c(.1,1)))) + | |
geom_text(label=ifelse(allpctchildwparent2$PRace=='Different race',paste('Different =',round(allpctchildwparent2$pct,1),'%',sep=''),''), | |
y=ifelse(allpctchildwparent2$PRace=='Different race',.5,.9),angle=90) + | |
labs(fill='Parents',title='Percent of Children Living with Parents of Same Race and Different Races, 1900-1990',x='Region',y='Percent of children') + | |
theme_bw(base_size=10) + | |
facet_wrap(~YEAR,nrow=2) + | |
scale_y_continuous(labels=scales::percent) + | |
scale_fill_brewer(palette='Set2') | |
printplot(graph2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment