Last active
November 16, 2016 17:41
-
-
Save helenaeitel/46e865647bdb427651b4fdb0c4d87589 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Helena Eitel | |
#Professor Merchant | |
#QSS 30.05 | |
#Final Visualizations | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(RColorBrewer) | |
library(maptools) | |
library(gtools) | |
library(devtools) | |
library(gganimate) | |
library(scales) | |
#read a selection of ipums data, making sure PERWT is a double | |
a <- read_csv('./FinalExtract.csv',col_types=cols(PERWT=col_double())) | |
#exclude Alaska and Hawaii before 1960 | |
aa <- a %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15))) | |
#recode sex variable and create a single race/ethnicity variable | |
#as well as binary labor force participation variable | |
#as well as creating an adjusted income variable | |
#as well as limiting the dataframe to working age people | |
#exclude people of 'Other' race | |
all <- aa %>% mutate(Sex=factor(SEX,labels=c('Male','Female'))) %>% | |
mutate(Race=factor(ifelse(HISPAN>0 & HISPAN<9,1, | |
ifelse(RACE==1,2, | |
ifelse(RACE==2,3, | |
ifelse(RACE==3,4, | |
ifelse(RACE %in% c(4,5,6),5,6))))), | |
labels=c('Hispanic', 'white','black','Native American','Asian','Other'))) %>% | |
mutate(Labs=factor(ifelse(LABFORCE>1,1,2), | |
labels=c('Working','Not Working'))) %>% | |
#convert INCWAGE and CP199 to integers to make them easier to work with | |
mutate(Inc=as.integer(INCWAGE)) %>% mutate(CP=as.integer(CPI99)) %>% | |
#adjust incomes to 1999 US Dollars | |
mutate(IncAdj=Inc*CP) %>% | |
#weight with perwt in all years excpet 1950 where weight by sample line weight | |
mutate(WT=as.integer(ifelse(YEAR!=1950,PERWT,SLWT))) %>% | |
filter(AGE>16 & AGE<65 & Race!='Other') | |
#make a dataframe of working-age women of all races but 'Other' | |
women <- all %>% filter(Sex=='Female') | |
#FIGURE 1 | |
#data frame of total number of working-age women by race in each year | |
c <- women %>% group_by(YEAR,Race) %>% summarise(TotRace=sum(PERWT)) | |
#data frame of total number of working women by race in each year | |
d <- women %>% group_by(YEAR,Race,Labs) %>% summarise(work=sum(PERWT)) | |
#combine dataframes and create percent of women working by race in each year | |
laborgraph <- left_join(d,c) %>% mutate(PercW=work/TotRace*100) %>% filter(Race!='Other') | |
#plot figure 1, bar graph | |
Laborgraph1 <- ggplot(laborgraph, aes(x=YEAR,y=PercW/100,fill=Race)) + | |
#filter data to only include the percentages of women working | |
geom_bar(data=laborgraph[laborgraph$Labs=='Working',],stat='identity',position='stack') + | |
labs(fill='Race',title='Percent of Women in the Labor Force by Race, 1920-1990',x='Year',y='Percent of Women') + | |
scale_y_continuous(labels=scales::percent) + | |
facet_wrap(~Race,ncol=3) + | |
guides(fill=FALSE) + | |
scale_fill_brewer(palette='Set2') | |
#export image as a png | |
png('Laborgraph1.png',width=600,height=400) | |
print(Laborgraph1) | |
dev.off() | |
#FIGURE 2 | |
#create overall dataframe of people who recieved an income in the past year | |
income <- all %>% filter(INCWAGE>0 & INCWAGE<999999) | |
#create dataframe of median income by year, race, and sex | |
e <- income %>% group_by(YEAR,Race,Sex) %>% summarise(MedInc=median(rep(IncAdj,times=WT)),numberw=sum(WT)) | |
#plot Figure 2, line graph | |
Incomeline <- ggplot(data=e,aes(x=YEAR,y=MedInc,colour=Race)) + | |
geom_line() + geom_point() + | |
labs(title='Median Income of Women by Race, 1940-1990',x='Year',y='Median Income, US Dollars') + | |
scale_colour_brewer(palette='Set2') + | |
scale_y_continuous(breaks=c(10000,20000,30000),labels=c('10,000','20,000','30,000')) + | |
facet_grid(Sex~.) + | |
theme(legend.position='bottom') | |
#export image as a png | |
png('Incomeline.png',width=500,height=400) | |
print(Incomeline) | |
dev.off() | |
#FIGURE 3 | |
#make a dataframe of men who recieved an income in the past year | |
#rename MedInc and numberw values for comparison | |
#select all variables other than sex | |
meninc <- e %>% filter(Sex=='Male') %>% rename(MedIncM=MedInc,numberm=numberw) %>% | |
select(YEAR,Race,MedIncM,numberm) | |
#dataframe of women who recieved an income in the past year | |
womeninc <- e %>% filter(Sex=='Female') | |
#join women's median income to mens's median income | |
#create variable that is the percent of men's median income equal to women's median income | |
j <- left_join(meninc,womeninc) %>% mutate(gap=MedInc/MedIncM,ratio=numberw/numberm) | |
#plot figure 3, bar graph | |
Gapgraph <- ggplot(j, aes(x=YEAR,y=gap,fill=Race)) + | |
geom_bar(data=j,stat='identity',position='stack',aes(width=rescale(ratio,c(1.05,7.35)))) + | |
labs(fill='Race',title='Wage Gap by Race, 1940-1990',x='Year',y='Percent of Income of Men') + | |
scale_y_continuous(labels=scales::percent) + | |
guides(fill=FALSE) + | |
facet_wrap(~Race,ncol=3) + | |
scale_fill_brewer(palette='Set2') + | |
theme(legend.position='bottom') | |
png('Gapgraph.png',width=600,height=400) | |
print(Gapgraph) | |
dev.off() | |
#FIGURE 6 | |
#create a new variable for family structure | |
k <- women %>% mutate(afam=factor(ifelse(SPLOC==0 & NCHILD==0,4, | |
ifelse(SPLOC>0 & NCHILD>0,1, | |
ifelse(SPLOC>0,2,3))), | |
labels=c('Married with Children','Married without Children','Single Parent','Single without Children'))) | |
#filter for only women who recieved income in the previous year | |
l <- k %>% filter(INCWAGE>0 & INCWAGE<999999) | |
#total income for women by race and family structure in each year | |
n <- l %>% group_by(YEAR,afam,Race) %>% summarise(MedInc1=median(rep(IncAdj,times=WT))) | |
#plot Figure 6, line graph | |
Famline1 <- ggplot(data=n,aes(x=YEAR,y=MedInc1,colour=afam)) + | |
geom_line() + geom_point() + | |
scale_colour_brewer(palette='Paired') + | |
scale_x_continuous(breaks=c(1940,1960,1980)) + | |
scale_y_continuous(breaks=c(5000,10000,15000),labels=c('5,000','10,000','15,000')) + | |
labs(title='Median Income of Women by Race and Family Structure',x='Year',y='Median Income, US Dollars',colour='') + | |
facet_wrap(~Race, ncol=3) + | |
theme(legend.position='bottom') | |
#export image as a png | |
png('Famline1.png',width=600,height=400) | |
print(Famline1) | |
dev.off() | |
#FIGURE 4 | |
#dataframe of total number of women (regardless of income) within each race group each year | |
oo <- k %>% group_by(Race,YEAR) %>% summarise(racenum=sum(WT)) | |
#data frame of total number women by race and family structure each year | |
pp <- k %>% group_by(Race,afam,YEAR) %>% summarise(racea=sum(WT)) | |
#join dataframes and create the percentage of women within a race in each family structure in every year | |
q <- left_join(pp,oo) %>% mutate(percRA=racea/racenum*100) | |
#plot Figure 4, bar graph | |
RAgraph <- ggplot(q, aes(x=YEAR,y=percRA/100,fill=afam)) + | |
geom_bar(data=q,stat='identity',position='stack') + | |
labs(fill='',title='Distribution of Women by Race and Family Structure, 1940-1990',x='Year',y='Percent of Women') + | |
scale_y_continuous(labels=scales::percent) + | |
facet_wrap(~Race,ncol=3) + | |
scale_fill_brewer(palette='Paired') + | |
theme(legend.position='bottom') | |
#export image as a png | |
png('RAgraph.png',width=600,height=400) | |
print(RAgraph) | |
dev.off() | |
#FIGURE 5 | |
#dataframe of the total number of women (regardless of income) | |
#by race and family strucutre in each year | |
mm <- k %>% group_by(YEAR,afam,Race) %>% summarise(TotFam=sum(PERWT)) | |
#dataframe of the total number of women (regardless of income) | |
#by race, family strucutre, and labor force status in each year | |
nn <- k %>% group_by(YEAR,afam,Race,Labs) %>% summarise(FamLab=sum(PERWT)) | |
#joined dataframe with variable for the percent of women by race and family structure | |
#participating in the labor force | |
FamilyLabor <- left_join(nn,mm) %>% mutate(PercFamLab=FamLab/TotFam*100) | |
#plot Figure 5, line graph | |
Laborgraph2 <- ggplot(data=FamilyLabor[FamilyLabor$Labs=='Working',], | |
aes(x=YEAR,y=PercFamLab/100,group=afam,colour=afam)) + | |
geom_line() + geom_point() + | |
scale_colour_brewer(palette='Paired') + | |
scale_x_continuous(breaks=c(1940,1960,1980)) + | |
scale_y_continuous(labels=scales::percent) + | |
labs(title='Percent of Women in the Labor Force by Family Structure',x='Year',y='Percent of Women',colour='') + | |
facet_wrap(~Race, ncol=3) + | |
theme(legend.position='bottom') | |
#export image as a png | |
png('Famline2.png',width=600,height=400) | |
print(Laborgraph2) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment