Skip to content

Instantly share code, notes, and snippets.

@SirSamAlot280
Created October 24, 2016 03:04
Show Gist options
  • Save SirSamAlot280/7c0e3acbcb2974c884f14e8de102cfbd to your computer and use it in GitHub Desktop.
Save SirSamAlot280/7c0e3acbcb2974c884f14e8de102cfbd to your computer and use it in GitHub Desktop.
#Load packages
library(readr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
#Read in appropriate IPUMs data; Change HHWT & PERWT from decimals to intergers.
ipums <- read_csv('./data/10_20.csv',col_types=cols(HHWT=col_double(),PERWT=col_double()))
#Filter out data for Alaska and Hawaii before 1960.
ipumsfilter <- ipums %>% filter(YEAR>=1960 |!(STATEFIP %in% c(2,15)))
#Create regions of the United States from STATEFIP
ipumsfilter <- ipumsfilter %>% mutate(regions=factor(ifelse(STATEFIP %in% c(9,23,25,33,34,36,42,44,50),1,
ifelse(STATEFIP %in% c(17,18,19,20,26,27,29,31,38,39,46,55),3,
ifelse(STATEFIP %in% c(1,5,10,11,12,13,21,22,24,28,37,45,47,48,51,54),2,4))),
labels=c('Northeast','South','Midwest','West')))
#Task 1
#Condese race into five racial groups
ipumsfilter <- ipumsfilter %>% mutate(Race=ifelse(RACE %in% c(1),'White',
ifelse(RACE %in% c(2),'Black',
ifelse(RACE %in% c(3),'Native American',
ifelse(RACE %in% c(4,5,6),'Asian','Other')))))
#Only focus on households that relate to the 1970 defintion/understanding.
households <- ipumsfilter %>% filter(GQ==1)
#Aggregate for the head of household and their indvidiual race
heads <- households %>% filter(RELATE==1) %>% rename(Hrac=Race)
#Aggregate for the ehad of household and their individual race, while also selecting variables to
#correspond with their race
spouse <- households %>% filter(RELATE==2) %>% select(YEAR,SERIAL,PERNUM,Race) %>% rename(Srac=Race)
#Determine who is unmarried and their propspective race by connecting those who are married together,
#while also defining who qualifies as unmarried. Additionally, define those couples who are of different races.
unmarried <- left_join(heads,spouse,by=c('YEAR','SERIAL','SPLOC'='PERNUM')) %>%
mutate(Race=factor(ifelse(SPLOC==0,1,
ifelse(Hrac!=Srac,2,3)),
labels=c('Head Unmarried','Different race','Same race')))
#Calculate the percentage for the couples who are married and unmarried, while also deifining the number of relationships
#in a given year.
pctunmarried <- unmarried %>% group_by(YEAR,Race,regions) %>% summarise(Number=sum(HHWT))
allpctmarried <- pctunmarried %>% group_by(YEAR,regions) %>% mutate(Total=sum(Number))
allpctmarried2 <- left_join(pctunmarried,allpctmarried) %>% mutate(pct=Number/Total*100)
#Create graph for the selected information
printplot <- function(plot) {
png('plot.png',height=500,width=1000)
print(plot)
dev.off()
}
library(scales)
graph1 <- ggplot(allpctmarried2, aes(x=regions,y=pct/100,fill=Race)) +
geom_bar(stat='identity',aes(width=rescale(Total,c(.1,1)))) +
geom_text(label=ifelse(allpctmarried2$Race=='Different race',paste('Different =',round(allpctmarried2$pct,1),'%',sep=''),''),
y=ifelse(pctunmarried$Race=='Different race',.5,.9),angle=90) +
labs(fill='Head and Spouse',title='Percent of Households by Married and Unmarried Couples Who are of Same and Different Races, 1900-1990',x='Regions',y='Percent of Households') +
theme_bw(base_size=10) +
facet_wrap(~YEAR,nrow=2) +
scale_y_continuous(labels=scales::percent) +
scale_fill_brewer(palette ='Set2')
printplot(graph1)
#Task 2
#Determine who qualifies as a child
children <- households %>% filter(AGE<=18 & (MOMLOC!=0 |POPLOC!=0))
#Determine who can potentially be a mother
mother <- households %>% filter(SEX==2) %>% select(YEAR,SERIAL,PERNUM,Race) %>% rename(MRac=Race)
#Determine who can potential be a father
father <-households %>% filter(SEX==1) %>% select(YEAR,SERIAL,PERNUM,Race) %>% rename(FRac=Race)
#Join the children to their mother and/or father
parentchild <- children %>% left_join(mother,by=c('YEAR','SERIAL','MOMLOC'='PERNUM')) %>%
left_join(father,by=c('YEAR','SERIAL','POPLOC'='PERNUM'))
#Aggregate each parent's race as well as defining what qualifies as one parent
childwparent <- parentchild %>% mutate(PRace=factor(ifelse(MOMLOC==0 | POPLOC==0,1,
ifelse(MRac!=FRac,2,3)),
labels=c('Only on parent','Different race','Same race')))
#Calculate percentage for children and their parent(s)
pctchildwparent <- childwparent %>% group_by(YEAR,PRace,regions) %>% summarise(Number=sum(PERWT))
allpctchildwparent <- pctchildwparent %>% group_by(YEAR,regions) %>% mutate(Total=sum(Number))
allpctchildwparent2 <- left_join(pctchildwparent,allpctchildwparent) %>% mutate(pct=Number/Total*100)
#Create graph for selected information
printplot <- function(plot) {
png('plot2.png',height=500,width=1000)
print(plot)
dev.off()
}
library(scales)
graph2 <- ggplot(allpctchildwparent2, aes(x=regions,y=pct/100,fill=PRace)) +
geom_bar(stat='identity', aes(width=rescale(Total,c(.1,1)))) +
geom_text(label=ifelse(allpctchildwparent2$PRace=='Different race',paste('Different =',round(allpctchildwparent2$pct,1),'%',sep=''),''),
y=ifelse(allpctchildwparent2$PRace=='Different race',.5,.9),angle=90) +
labs(fill='Parents',title='Percent of Children Living with Parents of Same Race and Different Races, 1900-1990',x='Region',y='Percent of children') +
theme_bw(base_size=10) +
facet_wrap(~YEAR,nrow=2) +
scale_y_continuous(labels=scales::percent) +
scale_fill_brewer(palette='Set2')
printplot(graph2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment