Created
October 31, 2016 17:31
-
-
Save r-conway/ffb7d13e87141da425f89a0eeafe37c6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#load libraries | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(RColorBrewer) | |
library(scales) | |
#load data | |
ipums <- read_csv('data/usa_00014.csv', col_types=cols(PERWT=col_double())) | |
#Filter by age | |
a <- ipums %>% filter(GQ==1 & AGE>=16 & INCWAGE>0 & INCWAGE<999999) | |
#BOX PLOT | |
#Split up by race | |
b <- a %>% mutate(SingleRace=factor(ifelse(HISPAN>0,1, | |
ifelse(RACESING==1,2, | |
ifelse(RACESING==2,3, | |
ifelse(RACESING==3,4, | |
ifelse(RACESING==4,5,6))))))) | |
levels(b$SingleRace) <- c('HIspanic','White','Black','Native American','Asian','Other') | |
#Remove Other | |
c <- b %>% filter(SingleRace!='Other') | |
#Crete Sex variable | |
d <- c %>% mutate(Sex=factor(SEX,labels=c('Male','Female'))) | |
#Adjust for inflataion | |
e <- d %>% mutate(AdjInc=INCWAGE*CPI99) | |
#In 1950 this question was used on the long form so for that year we use SLWT | |
ee <- e %>% mutate(Weight=ifelse(YEAR!=1950,PERWT,SLWT)) | |
#Account for the income ceiling | |
eee <- ee %>% mutate(AdjInc=ifelse(AdjInc>59941.9,59941.99,AdjInc)) | |
#Identify the median, first and third quartile, 10% and 90% | |
f <- eee %>% group_by(SingleRace,YEAR,Sex) %>% | |
summarize(MED=median(rep(AdjInc,times=Weight)), | |
MIN=quantile(rep(AdjInc,times=Weight),0.1), | |
LOW=quantile(rep(AdjInc,times=Weight),0.25), | |
HIGH=quantile(rep(AdjInc,times=Weight),0.75), | |
MAX=quantile(rep(AdjInc,times=Weight),0.9)) | |
#Graph the Box plot | |
ggplot(data=f,aes(x=YEAR,ymin=MIN,lower=LOW,middle=MED,upper=HIGH,ymax=MAX,fill=Sex)) + | |
geom_boxplot(stat='identity',position='dodge') + | |
facet_wrap(~SingleRace) + | |
labs(title ='Income by Race and Sex for Those with Income, 1940-2000', y='Income, in U.S. Dollars', x='Year') | |
ggsave('Box.pdf',width=10, height=7.5) | |
#Graph the line graph | |
ggplot(f,aes(x=YEAR,y=MED,color=SingleRace)) + | |
geom_line() + geom_point() + | |
facet_grid(Sex~.) + | |
labs(title= 'Median Income by Race and Sex for Those with Income, 1940-2000', x='Year', y='Median Income, in U.S. Dollars') | |
ggsave('Line.pdf',width=10, height=7.5) | |
#Create income groups | |
g <- eee %>% mutate(Group=factor(ifelse(AdjInc<=9999,1, | |
ifelse(AdjInc<=19000,2, | |
ifelse(AdjInc<=29999,3, | |
ifelse(AdjInc<=39999,4, | |
ifelse(AdjInc<=58999,5,6))))))) | |
#label income groups | |
levels(g$Group) <- c('$1-9,999','$10,000-19,000','$20,000-29,999','$30,000-39,999','$40-58,999','$59,000+') | |
#Create graphing groups | |
h <- g %>% group_by(YEAR,Sex,SingleRace,Group) %>% summarise(Number=sum(Weight)) | |
#Create the bar graph | |
ggplot(data=h, aes(x=YEAR,y=Number, fill=Group)) + | |
geom_bar(stat='identity',position='fill') + | |
facet_grid(Sex~.~SingleRace) + | |
scale_fill_brewer(palette='Set2') + | |
scale_y_continuous(labels=scales::percent) + | |
labs(title='Income by Race and Sex for Those with Income, 1940-2000',y='Percent',x='Year') | |
ggsave('IncomeColumns.pdf',width=10, height=7.5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment