Skip to content

Instantly share code, notes, and snippets.

@helenaeitel
Created October 31, 2016 21:29
Show Gist options
  • Save helenaeitel/7e4a7e07289b50eac1de494070a3a81a to your computer and use it in GitHub Desktop.
Save helenaeitel/7e4a7e07289b50eac1de494070a3a81a to your computer and use it in GitHub Desktop.
#Helena Eitel
#Professor Merchant
#QSS 30.05
#Lab Assignment 7
library(readr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
setwd('./Documents/Dartmouth/16F/QSS 30.5/QSS 30.05 Projects/')
ipums <- read_csv('./Extract 7.csv')
#only working age people who don't live in group quarters and made an income in the last year
a <- ipums %>% filter(GQ==1 & AGE>=16 & INCWAGE>0 & INCWAGE<999999)
#create a single race variable from ethnicity and race
b <- a %>% mutate(singRace=factor(ifelse(HISPAN>0,1,
ifelse(RACESING==1,2,
ifelse(RACESING==2,3,
ifelse(RACESING==3,4,
ifelse(RACESING==4,5,6)))))))
levels(b$singRace) <- c('Hispanic','White','Black','Native American','Asian','Other')
c <- b %>% filter(singRace!='Other')
d <- c %>% mutate(Sex=factor(SEX,labels=c('Male','Female')))
#adjust for inflation to 1999 equivalent dollars
e <- d %>% mutate(AdjInc=INCWAGE*CPI99)
ee <- e %>% mutate(Weight=ifelse(YEAR!=1950,PERWT,SLWT))
#topcode is 59,941.99
eee <- ee %>% mutate(AdjInc=ifelse(AdjInc>59941.99,59941.99,AdjInc))
#converting AdjInc to an integer makes it easier to create buckets for income
eeee <- eee %>% mutate(AdjInc1=as.integer(AdjInc))
#create the boxplot quintile variables
f <- eee %>% group_by(singRace,YEAR,Sex) %>%
summarise(Med=median(rep(AdjInc,times=Weight)),
Min=quantile(rep(AdjInc,times=Weight),0.1),
Low=quantile(rep(AdjInc,times=Weight),0.25),
High=quantile(rep(AdjInc,times=Weight),0.75),
Max=quantile(rep(AdjInc,times=Weight),0.9))
#Make boxplot
boxplot <- ggplot(f, aes(x=YEAR,ymin=MIN,lower=LOW,middle=MED,upper=HIGH,ymax=MAX,fill=Sex)) +
scale_fill_brewer(palette='Set1') +
geom_boxplot(stat='identity',position='dodge') +
labs(title='Income by Race and Sex for those with Income, 1940-2000',x='Year',y='Income, US Dollars') +
facet_wrap(~singRace,ncol=3) +
theme(legend.position='bottom')
png('7Boxplot.png',width=1000,height=500)
print(boxplot)
dev.off()
#Make linegraph
linegraph <- ggplot(f,aes(x=YEAR,y=MED,color=singRace)) +
geom_line() + geom_point() +
labs(title='Median Income by Race and Sex for Those with Income, 1940-2000',x='Year',y='Median Income, US Dollars',colour='Race/Ethnicity') +
facet_grid(Sex~.)
png('7Linegraph.png',width=1000,height=500)
print(linegraph)
dev.off()
#create categories for income
g <- eeee %>% mutate(Income=factor(ifelse(AdjInc1 %in% c(1:9999),1,
ifelse(AdjInc1 %in% c(10000:19999),2,
ifelse(AdjInc1 %in% c(20000:29999),3,
ifelse(AdjInc1 %in% c(30000:39999),4,
ifelse(AdjInc1 %in% c(40000:58999),5,6)))))))
levels(g$Income) <- c('$1-9,999','$10,000-19,999','$20,000-29,999','$30,000-39,999','$40,000-58,999','$59,000+')
#collect the number of people in each income category
h <- g %>% group_by(singRace,YEAR,Sex,Income) %>% summarise(Inc=sum(Weight))
#collect the total number of people
i <- g %>% group_by(singRace,YEAR,Sex) %>% summarise(Totpeople=sum(Weight))
#calculate the percentage of people in each category
j <- left_join(h,i) %>% mutate(percinc=Inc/Totpeople)
#Make bargraph
bargraph <- ggplot(j,aes(x=YEAR,y=percinc,fill=Income)) +
geom_bar(stat='identity',position='fill') +
scale_fill_brewer(palette='Set2') +
scale_y_continuous(labels=scales::percent) +
labs(title='Income by Race and Sex for those with Income, 1940-2000',x='Year',y='Percent') +
theme(legend.position='bottom') +
facet_grid(Sex~.~singRace)
png('7Bargraph.png',width=1000,height=500)
print(bargraph)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment