Created
October 31, 2016 20:56
-
-
Save lfortin-117/022078cd272d2e3c88368ddacc95ed4a to your computer and use it in GitHub Desktop.
Lab 7 LF
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Part A For Boxplot and Line | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(RColorBrewer) | |
library(scales) | |
setwd('/users/emily/dropbox/hist90_01/fall2016/labs/10_27') | |
#Reading in the data and filtering out those below earning age, as well as those in general quarters, | |
#and those not earning any income | |
ipums <- read_csv('Lab7.csv') | |
a <- ipums %>% filter(GQ==1 & AGE>=16 & INCWAGE>0 & INCWAGE<999999) | |
#Recoding for race | |
b <- a %>% mutate(SingleRace=factor(ifelse(HISPAN>0,1, | |
ifelse(RACESING==1,2, | |
ifelse(RACESING==2,3, | |
ifelse(RACESING==3,4, | |
ifelse(RACESING==4,5,6))))))) | |
levels(b$SingleRace) <- c('Hispanic','White','Black','Native American','Asian','Other') | |
c <- b %>% filter(SingleRace!='Other') | |
#Recoding for sex | |
d <- c %>% mutate(Sex=factor(SEX,labels=c('Male','Female'))) | |
#Adjusting for inflation with the CPI99 | |
e <- d %>% mutate(AdjInc=INCWAGE*CPI99) | |
#Weighting by PERWT in all years other than 1950, in which we use SLWT | |
ee <- e %>% mutate(Weight=ifelse(YEAR!=1950,PERWT,SLWT)) | |
#Removing the very high income entries from the set | |
eee <- ee %>% mutate(AdjInc=ifelse(AdjInc>59941.99,59941.99,AdjInc)) | |
#Aggregating the data and splitting up the quartiles whilst summarizing | |
f <- eee %>% group_by(SingleRace,YEAR,Sex) %>% | |
summarise(MED=median(rep(AdjInc,times=Weight)), | |
MIN=quantile(rep(AdjInc,times=Weight),0.1), | |
LOW=quantile(rep(AdjInc,times=Weight),0.25), | |
HIGH=quantile(rep(AdjInc,times=Weight),0.75), | |
MAX=quantile(rep(AdjInc,times=Weight),0.9)) | |
#Plotting both the box plot and the column graph | |
Boxgraph <- ggplot(f, aes(x=YEAR,ymin=MIN,lower=LOW,middle=MED,upper=HIGH,ymax=MAX,fill=Sex)) + | |
geom_boxplot(stat='identity',position='dodge') + | |
facet_wrap(~SingleRace) + | |
labs(y= 'Income, US Dollars', title='Figure 1: Income by Race and Sex for Those With Income, 1940-2000') | |
Linegraph <- ggplot(f,aes(x=YEAR,y=MED,color=SingleRace)) + | |
geom_line() + geom_point() + | |
facet_grid(Sex~.) + | |
labs(y='Median Income, US Dollars',color='Race/Ethnicity', title='Figure 2: Median Income by Race and Sex For Those With Income, 1940-2000') | |
#Part B for Column Graph | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(RColorBrewer) | |
library(scales) | |
#Reading in the data and filtering out those below earning age, as well as those in general quarters, | |
#and those not earning any income | |
ipums <- read_csv('Lab7.csv') | |
a <- ipums %>% filter(GQ==1 & AGE>=16 & INCWAGE>0 & INCWAGE<999999) | |
#Recoding for race | |
b <- a %>% mutate(SingleRace=factor(ifelse(HISPAN>0,1, | |
ifelse(RACESING==1,2, | |
ifelse(RACESING==2,3, | |
ifelse(RACESING==3,4, | |
ifelse(RACESING==4,5,6))))))) | |
levels(b$SingleRace) <- c('Hispanic','White','Black','Native American','Asian','Other') | |
c <- b %>% filter(SingleRace!='Other') | |
#Recoding for sex | |
d <- c %>% mutate(Sex=factor(SEX,labels=c('Male','Female'))) | |
#Adjusting for inflation with the CPI99 | |
e <- d %>% mutate(AdjInc=INCWAGE*CPI99) | |
#Weighting by PERWT in all years other than 1950, in which we use SLWT | |
ee <- e %>% mutate(Weight=ifelse(YEAR!=1950,PERWT,SLWT)) | |
#Removing the very high income entries from the set | |
eee <- ee %>% mutate(AdjInc=ifelse(AdjInc>59941.99,59941.99,AdjInc)) | |
#Recode the IncWage variable for the column graph | |
f <- eee %>% mutate(IncomeF=factor(ifelse(AdjInc<10000, 1, | |
ifelse(AdjInc<20000, 2, | |
ifelse(AdjInc<30000, 3, | |
ifelse(AdjInc<40000, 4, | |
ifelse(AdjInc<59000, 5, 6))))))) | |
levels(f$IncomeF) <- c('1-9,999','10,000-19,999','20,000-29,999','30,000-39,999', | |
'40,000-58,999','59,000+') | |
#Aggregating and weighting | |
f1 <- f %>% group_by(YEAR, Sex, SingleRace, IncomeF) %>% summarize(Number=sum(Weight)) | |
#Plotting | |
Columngraph <- ggplot(data=f1, aes(x=YEAR,y=Number,fill=IncomeF)) + | |
geom_bar(stat='identity', position='fill') + | |
labs(x='Year', y='Percent of Population', fill='Income',title='Figure 3: Income by Sex and Race for Those With Income 1940-2000') + | |
scale_y_continuous(labels=scales::percent) + | |
scale_x_continuous(breaks=c(1950,1970,1990))+ | |
scale_fill_brewer(palette = 'Set1') + | |
facet_grid(Sex~.~SingleRace) + | |
theme_bw() + theme(legend.position='bottom') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment