Skip to content

Instantly share code, notes, and snippets.

@jailee
Last active November 16, 2016 21:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jailee/6fd518c685604eac9aa4613682db3aaa to your computer and use it in GitHub Desktop.
Save jailee/6fd518c685604eac9aa4613682db3aaa to your computer and use it in GitHub Desktop.
#Visualization 2: The Column graph of white women by number of children and education level
#load packages
library(dplyr)
library(readr)
library(ggplot2)
library(RColorBrewer)
#Read in IPUMS data;
a <- read_csv('usa_00075.csv',col_types=cols(PERWT=col_double()))
#Filter women aged 20 to 40 in 1940 to 2000 without ones in Alaska and Hawaii
b <- a %>% filter(AGE>=20 & AGE<=40 & YEAR>=1960 | !(STATEFIP %in% c(2,15)))
#Label race by character
c <- b %>% mutate(RACE=factor(ifelse(RACE %in% c(1), 1, 2),
labels=c('white','nonwhite')))
#filter the current data to include only female population
e <- c %>% subset(SEX ==2)
#Label women by college-educated level into highe education and non-higher education
E <- e %>% mutate(EDUC=factor(ifelse(EDUC %in% c(10,11), 1,2),
labels=c('4 years+ College Education','No 4-year college or higher education')))
#Label women by age
e2 <- E %>% mutate(AGE=factor(ifelse(AGE>20 & AGE<=25, 1,
ifelse(AGE>25 & AGE<=30, 2,
ifelse(AGE>30 & AGE<=35, 3,
ifelse(AGE>35 & AGE<=40, 4,5)))),
labels=c('21-25','26-30','31-35','36-40','others')))
#Subset Age 'others'
e3 <- e2 %>% subset(AGE!='others')
#e3 <- e2 %>% subset(AGE==5)
#Label women by number of their children
e4 <- e3 %>% mutate(NCHILD=factor(ifelse(NCHILD==1, 1,
ifelse(NCHILD==2, 2,
ifelse(NCHILD==3, 3,
ifelse(NCHILD==4, 4,
ifelse(NCHILD>4, 5,6))))), labels=c('one child','two children','three children','four children','many children', 'no children')))
#Keep the variables I need for the graph: SEX,AGE,EDUC,NCHILD,YEAR,AND PERWT
e5 <- e4 %>% select(SEX,AGE,EDUC,NCHILD,YEAR,PERWT)
#Group by SEX,AGE,EDUC,NCHILD,YEAR and aggregated weight
e6 <- e5 %>% group_by(SEX,AGE,EDUC, NCHILD,YEAR) %>% summarise(Number=sum(PERWT))
#e4<-e4[order(e1$NCHILD),]
dev.off()
png('ChildEdu.png',height=500,width=1000)
fig2 <- ggplot(data=e6,aes(x=YEAR,y=Number,fill=NCHILD),order=NCHILD)+
labs(x='Year',y='Number of Children',fill='',title="Percent Distribution of the Number of Children by Women by Age and Education Level, 1940-2000")+
geom_bar(stat='identity', position='fill') +
scale_y_continuous(labels=scales::percent) +
scale_x_continuous(breaks=c(1920,1930,1940,1950,1960,1970,1980,1990,2000)) +
scale_fill_brewer(palette='Set2',guide=guide_legend(reverse=TRUE)) +
facet_grid(EDUC~.~AGE) +
theme_bw() + theme(legend.position='bottom')
#Export figures to png
png('ChildEdu.png',height=500,width=1000)
print(fig2)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment