Last active
November 16, 2016 21:11
-
-
Save jailee/6fd518c685604eac9aa4613682db3aaa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Visualization 2: The Column graph of white women by number of children and education level | |
#load packages | |
library(dplyr) | |
library(readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
#Read in IPUMS data; | |
a <- read_csv('usa_00075.csv',col_types=cols(PERWT=col_double())) | |
#Filter women aged 20 to 40 in 1940 to 2000 without ones in Alaska and Hawaii | |
b <- a %>% filter(AGE>=20 & AGE<=40 & YEAR>=1960 | !(STATEFIP %in% c(2,15))) | |
#Label race by character | |
c <- b %>% mutate(RACE=factor(ifelse(RACE %in% c(1), 1, 2), | |
labels=c('white','nonwhite'))) | |
#filter the current data to include only female population | |
e <- c %>% subset(SEX ==2) | |
#Label women by college-educated level into highe education and non-higher education | |
E <- e %>% mutate(EDUC=factor(ifelse(EDUC %in% c(10,11), 1,2), | |
labels=c('4 years+ College Education','No 4-year college or higher education'))) | |
#Label women by age | |
e2 <- E %>% mutate(AGE=factor(ifelse(AGE>20 & AGE<=25, 1, | |
ifelse(AGE>25 & AGE<=30, 2, | |
ifelse(AGE>30 & AGE<=35, 3, | |
ifelse(AGE>35 & AGE<=40, 4,5)))), | |
labels=c('21-25','26-30','31-35','36-40','others'))) | |
#Subset Age 'others' | |
e3 <- e2 %>% subset(AGE!='others') | |
#e3 <- e2 %>% subset(AGE==5) | |
#Label women by number of their children | |
e4 <- e3 %>% mutate(NCHILD=factor(ifelse(NCHILD==1, 1, | |
ifelse(NCHILD==2, 2, | |
ifelse(NCHILD==3, 3, | |
ifelse(NCHILD==4, 4, | |
ifelse(NCHILD>4, 5,6))))), labels=c('one child','two children','three children','four children','many children', 'no children'))) | |
#Keep the variables I need for the graph: SEX,AGE,EDUC,NCHILD,YEAR,AND PERWT | |
e5 <- e4 %>% select(SEX,AGE,EDUC,NCHILD,YEAR,PERWT) | |
#Group by SEX,AGE,EDUC,NCHILD,YEAR and aggregated weight | |
e6 <- e5 %>% group_by(SEX,AGE,EDUC, NCHILD,YEAR) %>% summarise(Number=sum(PERWT)) | |
#e4<-e4[order(e1$NCHILD),] | |
dev.off() | |
png('ChildEdu.png',height=500,width=1000) | |
fig2 <- ggplot(data=e6,aes(x=YEAR,y=Number,fill=NCHILD),order=NCHILD)+ | |
labs(x='Year',y='Number of Children',fill='',title="Percent Distribution of the Number of Children by Women by Age and Education Level, 1940-2000")+ | |
geom_bar(stat='identity', position='fill') + | |
scale_y_continuous(labels=scales::percent) + | |
scale_x_continuous(breaks=c(1920,1930,1940,1950,1960,1970,1980,1990,2000)) + | |
scale_fill_brewer(palette='Set2',guide=guide_legend(reverse=TRUE)) + | |
facet_grid(EDUC~.~AGE) + | |
theme_bw() + theme(legend.position='bottom') | |
#Export figures to png | |
png('ChildEdu.png',height=500,width=1000) | |
print(fig2) | |
dev.off() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment