Skip to content

Instantly share code, notes, and snippets.

@lfortin-117
Created October 31, 2016 20:56
Show Gist options
  • Save lfortin-117/022078cd272d2e3c88368ddacc95ed4a to your computer and use it in GitHub Desktop.
Save lfortin-117/022078cd272d2e3c88368ddacc95ed4a to your computer and use it in GitHub Desktop.
Lab 7 LF
#Part A For Boxplot and Line
library(readr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(scales)
setwd('/users/emily/dropbox/hist90_01/fall2016/labs/10_27')
#Reading in the data and filtering out those below earning age, as well as those in general quarters,
#and those not earning any income
ipums <- read_csv('Lab7.csv')
a <- ipums %>% filter(GQ==1 & AGE>=16 & INCWAGE>0 & INCWAGE<999999)
#Recoding for race
b <- a %>% mutate(SingleRace=factor(ifelse(HISPAN>0,1,
ifelse(RACESING==1,2,
ifelse(RACESING==2,3,
ifelse(RACESING==3,4,
ifelse(RACESING==4,5,6)))))))
levels(b$SingleRace) <- c('Hispanic','White','Black','Native American','Asian','Other')
c <- b %>% filter(SingleRace!='Other')
#Recoding for sex
d <- c %>% mutate(Sex=factor(SEX,labels=c('Male','Female')))
#Adjusting for inflation with the CPI99
e <- d %>% mutate(AdjInc=INCWAGE*CPI99)
#Weighting by PERWT in all years other than 1950, in which we use SLWT
ee <- e %>% mutate(Weight=ifelse(YEAR!=1950,PERWT,SLWT))
#Removing the very high income entries from the set
eee <- ee %>% mutate(AdjInc=ifelse(AdjInc>59941.99,59941.99,AdjInc))
#Aggregating the data and splitting up the quartiles whilst summarizing
f <- eee %>% group_by(SingleRace,YEAR,Sex) %>%
summarise(MED=median(rep(AdjInc,times=Weight)),
MIN=quantile(rep(AdjInc,times=Weight),0.1),
LOW=quantile(rep(AdjInc,times=Weight),0.25),
HIGH=quantile(rep(AdjInc,times=Weight),0.75),
MAX=quantile(rep(AdjInc,times=Weight),0.9))
#Plotting both the box plot and the column graph
Boxgraph <- ggplot(f, aes(x=YEAR,ymin=MIN,lower=LOW,middle=MED,upper=HIGH,ymax=MAX,fill=Sex)) +
geom_boxplot(stat='identity',position='dodge') +
facet_wrap(~SingleRace) +
labs(y= 'Income, US Dollars', title='Figure 1: Income by Race and Sex for Those With Income, 1940-2000')
Linegraph <- ggplot(f,aes(x=YEAR,y=MED,color=SingleRace)) +
geom_line() + geom_point() +
facet_grid(Sex~.) +
labs(y='Median Income, US Dollars',color='Race/Ethnicity', title='Figure 2: Median Income by Race and Sex For Those With Income, 1940-2000')
#Part B for Column Graph
library(readr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(scales)
#Reading in the data and filtering out those below earning age, as well as those in general quarters,
#and those not earning any income
ipums <- read_csv('Lab7.csv')
a <- ipums %>% filter(GQ==1 & AGE>=16 & INCWAGE>0 & INCWAGE<999999)
#Recoding for race
b <- a %>% mutate(SingleRace=factor(ifelse(HISPAN>0,1,
ifelse(RACESING==1,2,
ifelse(RACESING==2,3,
ifelse(RACESING==3,4,
ifelse(RACESING==4,5,6)))))))
levels(b$SingleRace) <- c('Hispanic','White','Black','Native American','Asian','Other')
c <- b %>% filter(SingleRace!='Other')
#Recoding for sex
d <- c %>% mutate(Sex=factor(SEX,labels=c('Male','Female')))
#Adjusting for inflation with the CPI99
e <- d %>% mutate(AdjInc=INCWAGE*CPI99)
#Weighting by PERWT in all years other than 1950, in which we use SLWT
ee <- e %>% mutate(Weight=ifelse(YEAR!=1950,PERWT,SLWT))
#Removing the very high income entries from the set
eee <- ee %>% mutate(AdjInc=ifelse(AdjInc>59941.99,59941.99,AdjInc))
#Recode the IncWage variable for the column graph
f <- eee %>% mutate(IncomeF=factor(ifelse(AdjInc<10000, 1,
ifelse(AdjInc<20000, 2,
ifelse(AdjInc<30000, 3,
ifelse(AdjInc<40000, 4,
ifelse(AdjInc<59000, 5, 6)))))))
levels(f$IncomeF) <- c('1-9,999','10,000-19,999','20,000-29,999','30,000-39,999',
'40,000-58,999','59,000+')
#Aggregating and weighting
f1 <- f %>% group_by(YEAR, Sex, SingleRace, IncomeF) %>% summarize(Number=sum(Weight))
#Plotting
Columngraph <- ggplot(data=f1, aes(x=YEAR,y=Number,fill=IncomeF)) +
geom_bar(stat='identity', position='fill') +
labs(x='Year', y='Percent of Population', fill='Income',title='Figure 3: Income by Sex and Race for Those With Income 1940-2000') +
scale_y_continuous(labels=scales::percent) +
scale_x_continuous(breaks=c(1950,1970,1990))+
scale_fill_brewer(palette = 'Set1') +
facet_grid(Sex~.~SingleRace) +
theme_bw() + theme(legend.position='bottom')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment