Skip to content

Instantly share code, notes, and snippets.

@SirSamAlot280
Created October 29, 2016 20:44
Show Gist options
  • Save SirSamAlot280/abfd66d7727291f793d550278abc5bf2 to your computer and use it in GitHub Desktop.
Save SirSamAlot280/abfd66d7727291f793d550278abc5bf2 to your computer and use it in GitHub Desktop.
#Load packages
library(dplyr)
library(readr)
library(ggplot2)
library(RColorBrewer)
library(scales)
#Read in selected IPUMs data
ipums <- read_csv('./data/10_27.csv')
#Filter for only people not in group quarters, over the age of 16, and removing people
#who don't have an income
a <- ipums %>% filter(GQ==1 & AGE>16 & INCWAGE>0 & INCWAGE<999999)
#Condense race into five racial groups
b <- a %>% mutate(Race=factor(ifelse(HISPAN>0 & HISPAN<=4,1,
ifelse(RACESING==1,2,
ifelse(RACESING==2,3,
ifelse(RACESING==3,4,
ifelse(RACESING==4,5,6)))))))
levels(b$Race) <- c('Hispanic','White','Black','Native American','Asian','Other')
#Filter out individuals classified in the "Other" race category
c <- b %>% filter(Race!='Other')
#Calssify who is male and female
d <- c %>% mutate(Sex=factor(SEX,labels=c('Male','Female')))
#Weigh the income for indvidiuals
e <- d %>% mutate(AdjInc=INCWAGE*CPI99)
#Weigh the years accordingly
f <- e %>% mutate(Weight=ifelse(YEAR!=1950,PERWT,SLWT))
#Top code for all years to match the 1940 topcode
g <- f %>% mutate(AdjInc=ifelse(AdjInc>59941.99,59941.99,AdjInc))
#Group by necessary variables and summarise for median income
h <- g %>% group_by(Race,YEAR,Sex) %>%
summarise(MED=median(rep(AdjInc,times=Weight)),
MIN=quantile(rep(AdjInc,times=Weight),0.1),
LOW=quantile(rep(AdjInc,times=Weight),0.25),
HIGH=quantile(rep(AdjInc,times=Weight),0.75),
MAX=quantile(rep(AdjInc,times=Weight),0.9))
#Factor out the income levels/breaks
gg <- g %>% mutate(IncGr=factor(ifelse(AdjInc>=1 & AdjInc<10000,1,
ifelse(AdjInc>10000 & AdjInc<20000,2,
ifelse(AdjInc>=20000 & AdjInc<30000,3,
ifelse(AdjInc>=30000 & AdjInc<40000,4,
ifelse(AdjInc>=40000 & AdjInc<59000,5,6))))),
labels=c('$1-9,9999','$10,000-19,000','$20,000-29,999','$30,000-39,999','$40,000-58,999','$59,000+')))
#Calculate the percentage for each race in terms of the newly created income breaks
i <- gg %>% group_by(YEAR,IncGr,Sex,Race) %>% summarise(Number=sum(Weight))
j <- i %>% group_by(YEAR,Sex,Race) %>% summarise(Total=sum(Number))
k <- left_join(i,j) %>% mutate(pct=Number/Total*100)
#Create box plot for this data
png('Race_Box_Plot.png',height=500,width=1000)
ggplot(h,aes(x=YEAR,ymin=MIN,lower=LOW,middle=MED,
upper=HIGH,ymax=MAX,fill=Sex))+
geom_boxplot(stat='identity',position='dodge')+
labs(fill='Sex',title='Income by Sex and Race for Those with Income,1940-2000',x='Year',y='Income in U.S. Dollars')+
facet_wrap(~Race)+
theme_bw() + theme(legend.position='bottom')+
scale_y_continuous(labels=scales::comma)+
scale_fill_brewer(palette='Set1')
dev.off()
#Create line graph with markers for the data
png('Race_AdjInc_Line_Plot.png',height=500,width=1000)
ggplot(h,aes(x=YEAR,y=MED,color=Race)) +
geom_line() + geom_point() +
labs(color='Race',
title='Median Income by Sex and Race for Those with Income,1940-2000',
x='Year',y='Income in U.S. Dollars')+
scale_y_continuous(labels=scales::comma)+
scale_fill_brewer(palette='Set2')+
facet_grid(Sex~.)
dev.off()
#Create bar graph for this data
png('Race_IncGr_Bar_Graph.png',height=500,width=1000)
ggplot(k, aes(x=YEAR,y=pct/100,fill=IncGr)) +
geom_bar(stat='identity') +
labs(fill='Sex',title='Income by Race and Sex for Those with Income,1940-2000',
x='Year',y='Income in U.S. Dollars') +
scale_y_continuous(labels=scales::percent) +
facet_grid(Sex~.~Race) +
scale_fill_brewer(palette ='Set2')
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment