Skip to content

Instantly share code, notes, and snippets.

@r-conway
Created October 31, 2016 17:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save r-conway/ffb7d13e87141da425f89a0eeafe37c6 to your computer and use it in GitHub Desktop.
Save r-conway/ffb7d13e87141da425f89a0eeafe37c6 to your computer and use it in GitHub Desktop.
#load libraries
library(readr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(scales)
#load data
ipums <- read_csv('data/usa_00014.csv', col_types=cols(PERWT=col_double()))
#Filter by age
a <- ipums %>% filter(GQ==1 & AGE>=16 & INCWAGE>0 & INCWAGE<999999)
#BOX PLOT
#Split up by race
b <- a %>% mutate(SingleRace=factor(ifelse(HISPAN>0,1,
ifelse(RACESING==1,2,
ifelse(RACESING==2,3,
ifelse(RACESING==3,4,
ifelse(RACESING==4,5,6)))))))
levels(b$SingleRace) <- c('HIspanic','White','Black','Native American','Asian','Other')
#Remove Other
c <- b %>% filter(SingleRace!='Other')
#Crete Sex variable
d <- c %>% mutate(Sex=factor(SEX,labels=c('Male','Female')))
#Adjust for inflataion
e <- d %>% mutate(AdjInc=INCWAGE*CPI99)
#In 1950 this question was used on the long form so for that year we use SLWT
ee <- e %>% mutate(Weight=ifelse(YEAR!=1950,PERWT,SLWT))
#Account for the income ceiling
eee <- ee %>% mutate(AdjInc=ifelse(AdjInc>59941.9,59941.99,AdjInc))
#Identify the median, first and third quartile, 10% and 90%
f <- eee %>% group_by(SingleRace,YEAR,Sex) %>%
summarize(MED=median(rep(AdjInc,times=Weight)),
MIN=quantile(rep(AdjInc,times=Weight),0.1),
LOW=quantile(rep(AdjInc,times=Weight),0.25),
HIGH=quantile(rep(AdjInc,times=Weight),0.75),
MAX=quantile(rep(AdjInc,times=Weight),0.9))
#Graph the Box plot
ggplot(data=f,aes(x=YEAR,ymin=MIN,lower=LOW,middle=MED,upper=HIGH,ymax=MAX,fill=Sex)) +
geom_boxplot(stat='identity',position='dodge') +
facet_wrap(~SingleRace) +
labs(title ='Income by Race and Sex for Those with Income, 1940-2000', y='Income, in U.S. Dollars', x='Year')
ggsave('Box.pdf',width=10, height=7.5)
#Graph the line graph
ggplot(f,aes(x=YEAR,y=MED,color=SingleRace)) +
geom_line() + geom_point() +
facet_grid(Sex~.) +
labs(title= 'Median Income by Race and Sex for Those with Income, 1940-2000', x='Year', y='Median Income, in U.S. Dollars')
ggsave('Line.pdf',width=10, height=7.5)
#Create income groups
g <- eee %>% mutate(Group=factor(ifelse(AdjInc<=9999,1,
ifelse(AdjInc<=19000,2,
ifelse(AdjInc<=29999,3,
ifelse(AdjInc<=39999,4,
ifelse(AdjInc<=58999,5,6)))))))
#label income groups
levels(g$Group) <- c('$1-9,999','$10,000-19,000','$20,000-29,999','$30,000-39,999','$40-58,999','$59,000+')
#Create graphing groups
h <- g %>% group_by(YEAR,Sex,SingleRace,Group) %>% summarise(Number=sum(Weight))
#Create the bar graph
ggplot(data=h, aes(x=YEAR,y=Number, fill=Group)) +
geom_bar(stat='identity',position='fill') +
facet_grid(Sex~.~SingleRace) +
scale_fill_brewer(palette='Set2') +
scale_y_continuous(labels=scales::percent) +
labs(title='Income by Race and Sex for Those with Income, 1940-2000',y='Percent',x='Year')
ggsave('IncomeColumns.pdf',width=10, height=7.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment