Skip to content

Instantly share code, notes, and snippets.

@helenaeitel
Last active November 16, 2016 17:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save helenaeitel/46e865647bdb427651b4fdb0c4d87589 to your computer and use it in GitHub Desktop.
Save helenaeitel/46e865647bdb427651b4fdb0c4d87589 to your computer and use it in GitHub Desktop.
#Helena Eitel
#Professor Merchant
#QSS 30.05
#Final Visualizations
library(readr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(maptools)
library(gtools)
library(devtools)
library(gganimate)
library(scales)
#read a selection of ipums data, making sure PERWT is a double
a <- read_csv('./FinalExtract.csv',col_types=cols(PERWT=col_double()))
#exclude Alaska and Hawaii before 1960
aa <- a %>% filter(YEAR>=1960 | !(STATEFIP %in% c(2,15)))
#recode sex variable and create a single race/ethnicity variable
#as well as binary labor force participation variable
#as well as creating an adjusted income variable
#as well as limiting the dataframe to working age people
#exclude people of 'Other' race
all <- aa %>% mutate(Sex=factor(SEX,labels=c('Male','Female'))) %>%
mutate(Race=factor(ifelse(HISPAN>0 & HISPAN<9,1,
ifelse(RACE==1,2,
ifelse(RACE==2,3,
ifelse(RACE==3,4,
ifelse(RACE %in% c(4,5,6),5,6))))),
labels=c('Hispanic', 'white','black','Native American','Asian','Other'))) %>%
mutate(Labs=factor(ifelse(LABFORCE>1,1,2),
labels=c('Working','Not Working'))) %>%
#convert INCWAGE and CP199 to integers to make them easier to work with
mutate(Inc=as.integer(INCWAGE)) %>% mutate(CP=as.integer(CPI99)) %>%
#adjust incomes to 1999 US Dollars
mutate(IncAdj=Inc*CP) %>%
#weight with perwt in all years excpet 1950 where weight by sample line weight
mutate(WT=as.integer(ifelse(YEAR!=1950,PERWT,SLWT))) %>%
filter(AGE>16 & AGE<65 & Race!='Other')
#make a dataframe of working-age women of all races but 'Other'
women <- all %>% filter(Sex=='Female')
#FIGURE 1
#data frame of total number of working-age women by race in each year
c <- women %>% group_by(YEAR,Race) %>% summarise(TotRace=sum(PERWT))
#data frame of total number of working women by race in each year
d <- women %>% group_by(YEAR,Race,Labs) %>% summarise(work=sum(PERWT))
#combine dataframes and create percent of women working by race in each year
laborgraph <- left_join(d,c) %>% mutate(PercW=work/TotRace*100) %>% filter(Race!='Other')
#plot figure 1, bar graph
Laborgraph1 <- ggplot(laborgraph, aes(x=YEAR,y=PercW/100,fill=Race)) +
#filter data to only include the percentages of women working
geom_bar(data=laborgraph[laborgraph$Labs=='Working',],stat='identity',position='stack') +
labs(fill='Race',title='Percent of Women in the Labor Force by Race, 1920-1990',x='Year',y='Percent of Women') +
scale_y_continuous(labels=scales::percent) +
facet_wrap(~Race,ncol=3) +
guides(fill=FALSE) +
scale_fill_brewer(palette='Set2')
#export image as a png
png('Laborgraph1.png',width=600,height=400)
print(Laborgraph1)
dev.off()
#FIGURE 2
#create overall dataframe of people who recieved an income in the past year
income <- all %>% filter(INCWAGE>0 & INCWAGE<999999)
#create dataframe of median income by year, race, and sex
e <- income %>% group_by(YEAR,Race,Sex) %>% summarise(MedInc=median(rep(IncAdj,times=WT)),numberw=sum(WT))
#plot Figure 2, line graph
Incomeline <- ggplot(data=e,aes(x=YEAR,y=MedInc,colour=Race)) +
geom_line() + geom_point() +
labs(title='Median Income of Women by Race, 1940-1990',x='Year',y='Median Income, US Dollars') +
scale_colour_brewer(palette='Set2') +
scale_y_continuous(breaks=c(10000,20000,30000),labels=c('10,000','20,000','30,000')) +
facet_grid(Sex~.) +
theme(legend.position='bottom')
#export image as a png
png('Incomeline.png',width=500,height=400)
print(Incomeline)
dev.off()
#FIGURE 3
#make a dataframe of men who recieved an income in the past year
#rename MedInc and numberw values for comparison
#select all variables other than sex
meninc <- e %>% filter(Sex=='Male') %>% rename(MedIncM=MedInc,numberm=numberw) %>%
select(YEAR,Race,MedIncM,numberm)
#dataframe of women who recieved an income in the past year
womeninc <- e %>% filter(Sex=='Female')
#join women's median income to mens's median income
#create variable that is the percent of men's median income equal to women's median income
j <- left_join(meninc,womeninc) %>% mutate(gap=MedInc/MedIncM,ratio=numberw/numberm)
#plot figure 3, bar graph
Gapgraph <- ggplot(j, aes(x=YEAR,y=gap,fill=Race)) +
geom_bar(data=j,stat='identity',position='stack',aes(width=rescale(ratio,c(1.05,7.35)))) +
labs(fill='Race',title='Wage Gap by Race, 1940-1990',x='Year',y='Percent of Income of Men') +
scale_y_continuous(labels=scales::percent) +
guides(fill=FALSE) +
facet_wrap(~Race,ncol=3) +
scale_fill_brewer(palette='Set2') +
theme(legend.position='bottom')
png('Gapgraph.png',width=600,height=400)
print(Gapgraph)
dev.off()
#FIGURE 6
#create a new variable for family structure
k <- women %>% mutate(afam=factor(ifelse(SPLOC==0 & NCHILD==0,4,
ifelse(SPLOC>0 & NCHILD>0,1,
ifelse(SPLOC>0,2,3))),
labels=c('Married with Children','Married without Children','Single Parent','Single without Children')))
#filter for only women who recieved income in the previous year
l <- k %>% filter(INCWAGE>0 & INCWAGE<999999)
#total income for women by race and family structure in each year
n <- l %>% group_by(YEAR,afam,Race) %>% summarise(MedInc1=median(rep(IncAdj,times=WT)))
#plot Figure 6, line graph
Famline1 <- ggplot(data=n,aes(x=YEAR,y=MedInc1,colour=afam)) +
geom_line() + geom_point() +
scale_colour_brewer(palette='Paired') +
scale_x_continuous(breaks=c(1940,1960,1980)) +
scale_y_continuous(breaks=c(5000,10000,15000),labels=c('5,000','10,000','15,000')) +
labs(title='Median Income of Women by Race and Family Structure',x='Year',y='Median Income, US Dollars',colour='') +
facet_wrap(~Race, ncol=3) +
theme(legend.position='bottom')
#export image as a png
png('Famline1.png',width=600,height=400)
print(Famline1)
dev.off()
#FIGURE 4
#dataframe of total number of women (regardless of income) within each race group each year
oo <- k %>% group_by(Race,YEAR) %>% summarise(racenum=sum(WT))
#data frame of total number women by race and family structure each year
pp <- k %>% group_by(Race,afam,YEAR) %>% summarise(racea=sum(WT))
#join dataframes and create the percentage of women within a race in each family structure in every year
q <- left_join(pp,oo) %>% mutate(percRA=racea/racenum*100)
#plot Figure 4, bar graph
RAgraph <- ggplot(q, aes(x=YEAR,y=percRA/100,fill=afam)) +
geom_bar(data=q,stat='identity',position='stack') +
labs(fill='',title='Distribution of Women by Race and Family Structure, 1940-1990',x='Year',y='Percent of Women') +
scale_y_continuous(labels=scales::percent) +
facet_wrap(~Race,ncol=3) +
scale_fill_brewer(palette='Paired') +
theme(legend.position='bottom')
#export image as a png
png('RAgraph.png',width=600,height=400)
print(RAgraph)
dev.off()
#FIGURE 5
#dataframe of the total number of women (regardless of income)
#by race and family strucutre in each year
mm <- k %>% group_by(YEAR,afam,Race) %>% summarise(TotFam=sum(PERWT))
#dataframe of the total number of women (regardless of income)
#by race, family strucutre, and labor force status in each year
nn <- k %>% group_by(YEAR,afam,Race,Labs) %>% summarise(FamLab=sum(PERWT))
#joined dataframe with variable for the percent of women by race and family structure
#participating in the labor force
FamilyLabor <- left_join(nn,mm) %>% mutate(PercFamLab=FamLab/TotFam*100)
#plot Figure 5, line graph
Laborgraph2 <- ggplot(data=FamilyLabor[FamilyLabor$Labs=='Working',],
aes(x=YEAR,y=PercFamLab/100,group=afam,colour=afam)) +
geom_line() + geom_point() +
scale_colour_brewer(palette='Paired') +
scale_x_continuous(breaks=c(1940,1960,1980)) +
scale_y_continuous(labels=scales::percent) +
labs(title='Percent of Women in the Labor Force by Family Structure',x='Year',y='Percent of Women',colour='') +
facet_wrap(~Race, ncol=3) +
theme(legend.position='bottom')
#export image as a png
png('Famline2.png',width=600,height=400)
print(Laborgraph2)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment