Skip to content

Instantly share code, notes, and snippets.

@jailee jailee/JaiCode
Created Nov 15, 2016

Embed
What would you like to do?
Final Project Visualization Data Code
#Visualization 1: Income differential between white men and women in boxplot graph
library(readr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(scales)
#Read in IPUMS data
ipums <- read_csv('usa_00070.csv',col_types=cols(PERWT=col_double()))
#Filter Income bigger than 0 and less than 999999,
a1 <- ipums %>% filter(INCWAGE>0 & INCWAGE<999999)
#Filter women aged 20 to 40 in 1940 to 1990 without ones in Alaska and Hawaii
b <- a1 %>% filter(AGE>=16 & AGE<=65 & !(STATEFIP %in% c(2,15)) & YEAR>=1960)
#Label race by character
c <- b %>% mutate(RACE=factor(ifelse(RACE %in% c(1), 1, 2),
labels=c('White','Nonwhite')))
#label sex
c1 <- c %>% mutate(SEX=ifelse(SEX==1,'Male','Female'))
#Adjust income with CPI99
e <- c1 %>% mutate(AdjInc=INCWAGE*CPI99)
#filter out sampline-line variable in 1950
ee <- e %>% mutate(Weight=ifelse(YEAR!=1950,PERWT,SLWT))
#adjust and set highest income as 59941.99
eee <- ee %>% mutate(AdjInc=ifelse(AdjInc>59941.99,59941.99,AdjInc))
#Classify quantile
f <- eee %>% group_by(RACE,YEAR,SEX) %>%
summarise(MED=median(rep(AdjInc,times=Weight)),
MIN=quantile(rep(AdjInc,times=Weight),0.1),
LOW=quantile(rep(AdjInc,times=Weight),0.25),
HIGH=quantile(rep(AdjInc,times=Weight),0.75),
MAX=quantile(rep(AdjInc,times=Weight),0.9))
#Draw Box plot
outputbox <-ggplot(f, aes(x=YEAR,ymin=MIN,lower=LOW,middle=MED,upper=HIGH,ymax=MAX,fill=SEX)) +
geom_boxplot(stat='identity',position='dodge') +
facet_wrap(~RACE)+
labs(x='YEAR',y='Income, U.S. Dollars',fill='SEX',title="Income by Race and Sex for Those with Income, 1940-2000")+
theme(legend.position='bottom')
#Export figures to png
png('IncomeBox.png',height=500,width=1000)
print(outputbox)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.