Skip to content

Instantly share code, notes, and snippets.

@morganmelon
Created October 29, 2016 17:47
Show Gist options
  • Save morganmelon/4553c7f8f6ac7c537f9dfe772efcb217 to your computer and use it in GitHub Desktop.
Save morganmelon/4553c7f8f6ac7c537f9dfe772efcb217 to your computer and use it in GitHub Desktop.
Income by Race and Sex 1940-2000: Box-plot, Line graph, Column Graph
#Morgan Waterm
#Lab 7
#US History Through Census Data
#Income Analysis
library(readr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(scales)
#read in data
ipums <- read_csv('data/Lab7dataYUS.csv')
a <- ipums %>% filter(AGE>=16 & GQ==1 & INCWAGE>0 & INCWAGE<999999)
b <- a %>% mutate(Race= factor(ifelse(HISPAN>0, 1,
ifelse(RACESING==1, 2,
ifelse(RACESING==2, 3,
ifelse(RACESING==3, 4,
ifelse(RACESING==4, 5, 6)))))))
levels(b$Race) <- c('Hispanic', 'White', 'Black', 'Native American', 'Asian', 'Other')
c <- b %>% filter(Race!='Other')
d <- c %>% mutate(Sex = factor(SEX, labels=c('Male', 'Female')))
e <- d %>% mutate(AdjInc=INCWAGE*CPI99)
ee <- e %>% mutate(Weight=ifelse(YEAR!=1950, PERWT, SLWT))
eee <- ee %>% mutate(AdjInc=ifelse(AdjInc>599941.99, 59941.99, AdjInc))
inc <- eee %>% mutate(Income=factor(ifelse(AdjInc<9999, 1,
ifelse(AdjInc<20000, 2,
ifelse(AdjInc<30000, 3,
ifelse(AdjInc<40000, 4,
ifelse(AdjInc<59000, 5, 6))))),
labels = c('$1-9,999', '$10,000-19,999', '$20,000-29,999', '$30,000-39,999', '$40,000-58,999', '$59,000+')))
inc2 <- inc %>% group_by(YEAR, Race, Sex, Income) %>% summarise(ptotal=sum(Weight))
inc3 <- inc %>% group_by(YEAR, Race, Sex) %>% summarise(total= sum(Weight))
incjoin <- left_join(inc2, inc3) %>% mutate(Percent= ptotal/total)
f <- eee %>% group_by(Race, YEAR, Sex) %>%
summarise(MED= median(rep(AdjInc, times=Weight)),
MIN= quantile(rep(AdjInc, times=Weight), 0.1),
LOW= quantile(rep(AdjInc, times=Weight), 0.25),
HIGH= quantile(rep(AdjInc, times=Weight), 0.75),
MAX= quantile(rep(AdjInc, times=Weight), 0.9))
boxplot <- ggplot(f, aes(x=YEAR, ymin=MIN, lower=LOW, middle=MED, upper=HIGH, ymax=MAX, fill=Sex)) +
labs(title = 'Income by Race and Sex for Those with Income, 1940-2000', x = 'Year', y = 'Income, US Dollars', Fill = 'Sex')+
geom_boxplot(stat='identity', position= 'dodge') +
facet_wrap(~Race) +
theme(legend.position = 'bottom') +
scale_y_continuous(labels=scales::comma)
png('boxplotLab7.png', width=1000, height=500)
print(boxplot)
dev.off()
linegraph <- ggplot(f, aes(x=YEAR, y=MED, color=Race))+
labs(title='Median Income by Race and Sex for Those with Income, 1940-2000', x='Year', y='Median Income, US Dollars', color = 'Race/Ethnicity') +
geom_line(size=1.5) + geom_point(size=2)+
facet_grid(Sex~.)+
scale_y_continuous(labels=scales::comma)
png('linegraphLab7.png', width= 1000, height= 500)
print(linegraph)
dev.off()
columngraph <- ggplot(data = incjoin, aes(x = YEAR, y=Percent, fill=Income)) +
geom_bar(stat='identity')+
labs(title= 'Income by Race and Sex for Those with Income, 1940-2000', x='Year', y='Percent', fill='Income') +
scale_y_continuous(labels=scales::percent) +
theme(legend.position = 'bottom') +
facet_grid(Sex~.~Race)
png('columngraphlab7.png', width=1000, height=500)
print(columngraph)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment