jakeesse/work_project.R

## work_project.R
setwd('/users/JakeEsse/Desktop/HIST 90')

library(readr)
library(dplyr)
library(ggplot2)
library(scales)
library(grid)

work_master_data <- read_csv('usa_00009.csv')

a <- work_master_data

aa <- filter(a, EMPSTAT==1)

b <- filter(aa, AGE >= 18 & AGE <65) # Filter out working age people

c <- mutate(b, sex=factor(SEX, labels = c('male','female'))) # create a new sex variable with labels

d <- mutate(c, race=factor(ifelse(RACE==1,1,0),labels=c('nonwhite','white'))) # factor race by 'white' and 'nonwhite'

e <- mutate(d, weight=ifelse(YEAR==1950, SLWT, PERWT)) # apply the correct weight variables by year

f <- mutate(e, income=ifelse(INCWAGE==999999, 0, INCWAGE)) # fix the 0 for income

g <- filter(f, URBAN>0) #filter out for those people that have a known urban or rural status

h <- mutate(g, urban=factor(URBAN, labels=c('rural', 'urban'))) #apply labels for the urban variable
head(h)

hh <- filter(h, race=='white')

hhh <- filter(h, race=='nonwhite')

i <- summarize(group_by(h, YEAR, urban, race), inc=median(rep(income, times=weight)))
head(i)

years <- c(1940, 1950, 1960, 1970, 1980, 1990)

index <- c(11.986, 7, 5.725, 4.54, 2.295, 1.344)

cpi <- as.data.frame(cbind(years,index))
print(cpi)

j <- merge(i, cpi, by.x='YEAR', by.y='years')
head(j)

k <- ggplot(j, aes(x=YEAR, y=inc*index, color=race))+geom_line()+geom_point()+facet_grid(.~urban)+labs(title='Median Income, inflation adjusted', x='year', y='dollars')
print(k)

ii <- summarize(group_by(h, YEAR, urban, sex), inc=median(rep(income, times=weight)))
head(ii)

jj <- merge(ii, cpi, by.x='YEAR', by.y = 'years')
head(jj)

kk <- ggplot(jj, aes(x=YEAR, y=inc*index, color=sex))+geom_line()+geom_point()+facet_grid(.~urban)+labs(title='Median Income, inflation adjusted', x='year', y='dollars')
print(kk)

iii <- summarize(group_by(hh, YEAR, urban, sex), inc=median(rep(income, times=weight)))
head(iii)

jjj <- merge(iii, cpi, by.x='YEAR', by.y='years')
head(jjj)

kkk <- ggplot(jjj, aes(x=YEAR, y=inc*index, color=sex))+geom_line()+geom_point()+facet_grid(.~urban)+labs(title='White Cohort Median Income, inflation adjusted', x='year', y='dollars')
print(kkk)

iiii <- summarize(group_by(hhh, YEAR, urban, sex), inc=median(rep(income, times=weight)))
head(iiii)

jjjj <- merge(iiii, cpi, by.x='YEAR', by.y='years')
head(jjjj)

kkkk <- ggplot(jjjj, aes(x=YEAR, y=inc*index, color=sex))+geom_line()+geom_point()+facet_grid(.~urban)+labs(title='Non-White Cohort Median Income, inflation adjusted', x='year', y='dollars')
print(kkkk)

l <- mutate(h, hrswork=factor(HRSWORK2, labels = c('N/A', '1-14', '15-29', '30-34', '35-39', '40', '41-48', '49-59', '60+')))
head (l)

ll <- filter(l, HRSWORK2>0)

m <- select(ll, YEAR, race, urban, hrswork, weight)

n <- merge(m, cpi, by.x='YEAR', by.y='years')

head(n)

o <- summarize(group_by(n, YEAR, race, urban, hrswork), NUMBER=sum(weight))
head(o)

p <- ggplot(o, aes(x=YEAR, y=NUMBER, fill=hrswork)) + geom_bar(stat='identity', position = 'fill')+facet_grid(race~.~urban) + scale_fill_brewer(palette = 'Set3') + guides(fill=guide_legend(reverse=TRUE,title='Hours')) + labs(title='Hours Worked by Urban Status and Race', x='Year', y='Number') + scale_y_continuous(labels=scales::percent)
print(p)

mm <- select(ll, YEAR, sex, urban, hrswork, weight)

nn <- merge(mm, cpi, by.x='YEAR', by.y='years')
head(nn)

oo <- summarize(group_by(nn, YEAR, sex, urban, hrswork), NUMBER=sum(weight))
head(oo)

pp <- ggplot(oo, aes(x=YEAR, y=NUMBER, fill=hrswork)) + geom_bar(stat='identity', position = 'fill')+facet_grid(sex~.~urban) + scale_fill_brewer(palette = 'Set3') + guides(fill=guide_legend(reverse=TRUE,title='Hours')) + labs(title='Hours Worked by Urban Status and Sex', x='Year', y='Number') + scale_y_continuous(labels=scales::percent)
print(pp)

z <- filter(ll, race=='white')

mmm <- select(z, YEAR, sex, urban, hrswork, weight)

nnn <- merge(mmm, cpi, by.x='YEAR', by.y='years')
head(nnn)

ooo <- summarize(group_by(nnn, YEAR, sex, urban, hrswork), NUMBER=sum(weight))
head(ooo)

ppp <- ggplot(ooo, aes(x=YEAR, y=NUMBER, fill=hrswork)) + geom_bar(stat='identity', position = 'fill')+facet_grid(sex~.~urban) + scale_fill_brewer(palette = 'Set3') + guides(fill=guide_legend(reverse=TRUE,title='Hours')) + labs(title='White Cohort Hours Worked by Urban Status and Sex', x='Year', y='Number') + scale_y_continuous(labels=scales::percent)
print(ppp)

zz <- filter(ll, race=='nonwhite')

mmmm <- select(zz, YEAR, sex, urban, hrswork, weight)

nnnn <- merge(mmmm, cpi, by.x='YEAR', by.y='years')
head(nnnn)

oooo <- summarize(group_by(nnnn, YEAR, sex, urban, hrswork), NUMBER=sum(weight))
head(oooo)

pppp <- ggplot(oooo, aes(x=YEAR, y=NUMBER, fill=hrswork)) + geom_bar(stat='identity', position = 'fill')+facet_grid(sex~.~urban) + scale_fill_brewer(palette = 'Set3') + guides(fill=guide_legend(reverse=TRUE,title='Hours')) + labs(title='Non-White Cohort Hours Worked by Urban Status and Sex', x='Year', y='Number') + scale_y_continuous(labels=scales::percent)
print(pppp)
	setwd('/users/JakeEsse/Desktop/HIST 90')

	library(readr)
	library(dplyr)
	library(ggplot2)
	library(scales)
	library(grid)

	work_master_data <- read_csv('usa_00009.csv')

	a <- work_master_data

	aa <- filter(a, EMPSTAT==1)

	b <- filter(aa, AGE >= 18 & AGE <65) # Filter out working age people

	c <- mutate(b, sex=factor(SEX, labels = c('male','female'))) # create a new sex variable with labels

	d <- mutate(c, race=factor(ifelse(RACE==1,1,0),labels=c('nonwhite','white'))) # factor race by 'white' and 'nonwhite'

	e <- mutate(d, weight=ifelse(YEAR==1950, SLWT, PERWT)) # apply the correct weight variables by year

	f <- mutate(e, income=ifelse(INCWAGE==999999, 0, INCWAGE)) # fix the 0 for income

	g <- filter(f, URBAN>0) #filter out for those people that have a known urban or rural status

	h <- mutate(g, urban=factor(URBAN, labels=c('rural', 'urban'))) #apply labels for the urban variable
	head(h)

	hh <- filter(h, race=='white')

	hhh <- filter(h, race=='nonwhite')

	i <- summarize(group_by(h, YEAR, urban, race), inc=median(rep(income, times=weight)))
	head(i)

	years <- c(1940, 1950, 1960, 1970, 1980, 1990)

	index <- c(11.986, 7, 5.725, 4.54, 2.295, 1.344)

	cpi <- as.data.frame(cbind(years,index))
	print(cpi)

	j <- merge(i, cpi, by.x='YEAR', by.y='years')
	head(j)

	k <- ggplot(j, aes(x=YEAR, y=inc*index, color=race))+geom_line()+geom_point()+facet_grid(.~urban)+labs(title='Median Income, inflation adjusted', x='year', y='dollars')
	print(k)

	ii <- summarize(group_by(h, YEAR, urban, sex), inc=median(rep(income, times=weight)))
	head(ii)

	jj <- merge(ii, cpi, by.x='YEAR', by.y = 'years')
	head(jj)

	kk <- ggplot(jj, aes(x=YEAR, y=inc*index, color=sex))+geom_line()+geom_point()+facet_grid(.~urban)+labs(title='Median Income, inflation adjusted', x='year', y='dollars')
	print(kk)

	iii <- summarize(group_by(hh, YEAR, urban, sex), inc=median(rep(income, times=weight)))
	head(iii)

	jjj <- merge(iii, cpi, by.x='YEAR', by.y='years')
	head(jjj)

	kkk <- ggplot(jjj, aes(x=YEAR, y=inc*index, color=sex))+geom_line()+geom_point()+facet_grid(.~urban)+labs(title='White Cohort Median Income, inflation adjusted', x='year', y='dollars')
	print(kkk)

	iiii <- summarize(group_by(hhh, YEAR, urban, sex), inc=median(rep(income, times=weight)))
	head(iiii)

	jjjj <- merge(iiii, cpi, by.x='YEAR', by.y='years')
	head(jjjj)

	kkkk <- ggplot(jjjj, aes(x=YEAR, y=inc*index, color=sex))+geom_line()+geom_point()+facet_grid(.~urban)+labs(title='Non-White Cohort Median Income, inflation adjusted', x='year', y='dollars')
	print(kkkk)

	l <- mutate(h, hrswork=factor(HRSWORK2, labels = c('N/A', '1-14', '15-29', '30-34', '35-39', '40', '41-48', '49-59', '60+')))
	head (l)

	ll <- filter(l, HRSWORK2>0)

	m <- select(ll, YEAR, race, urban, hrswork, weight)

	n <- merge(m, cpi, by.x='YEAR', by.y='years')

	head(n)

	o <- summarize(group_by(n, YEAR, race, urban, hrswork), NUMBER=sum(weight))
	head(o)

	p <- ggplot(o, aes(x=YEAR, y=NUMBER, fill=hrswork)) + geom_bar(stat='identity', position = 'fill')+facet_grid(race~.~urban) + scale_fill_brewer(palette = 'Set3') + guides(fill=guide_legend(reverse=TRUE,title='Hours')) + labs(title='Hours Worked by Urban Status and Race', x='Year', y='Number') + scale_y_continuous(labels=scales::percent)
	print(p)

	mm <- select(ll, YEAR, sex, urban, hrswork, weight)

	nn <- merge(mm, cpi, by.x='YEAR', by.y='years')
	head(nn)

	oo <- summarize(group_by(nn, YEAR, sex, urban, hrswork), NUMBER=sum(weight))
	head(oo)

	pp <- ggplot(oo, aes(x=YEAR, y=NUMBER, fill=hrswork)) + geom_bar(stat='identity', position = 'fill')+facet_grid(sex~.~urban) + scale_fill_brewer(palette = 'Set3') + guides(fill=guide_legend(reverse=TRUE,title='Hours')) + labs(title='Hours Worked by Urban Status and Sex', x='Year', y='Number') + scale_y_continuous(labels=scales::percent)
	print(pp)

	z <- filter(ll, race=='white')

	mmm <- select(z, YEAR, sex, urban, hrswork, weight)

	nnn <- merge(mmm, cpi, by.x='YEAR', by.y='years')
	head(nnn)

	ooo <- summarize(group_by(nnn, YEAR, sex, urban, hrswork), NUMBER=sum(weight))
	head(ooo)

	ppp <- ggplot(ooo, aes(x=YEAR, y=NUMBER, fill=hrswork)) + geom_bar(stat='identity', position = 'fill')+facet_grid(sex~.~urban) + scale_fill_brewer(palette = 'Set3') + guides(fill=guide_legend(reverse=TRUE,title='Hours')) + labs(title='White Cohort Hours Worked by Urban Status and Sex', x='Year', y='Number') + scale_y_continuous(labels=scales::percent)
	print(ppp)

	zz <- filter(ll, race=='nonwhite')

	mmmm <- select(zz, YEAR, sex, urban, hrswork, weight)

	nnnn <- merge(mmmm, cpi, by.x='YEAR', by.y='years')
	head(nnnn)

	oooo <- summarize(group_by(nnnn, YEAR, sex, urban, hrswork), NUMBER=sum(weight))
	head(oooo)

	pppp <- ggplot(oooo, aes(x=YEAR, y=NUMBER, fill=hrswork)) + geom_bar(stat='identity', position = 'fill')+facet_grid(sex~.~urban) + scale_fill_brewer(palette = 'Set3') + guides(fill=guide_legend(reverse=TRUE,title='Hours')) + labs(title='Non-White Cohort Hours Worked by Urban Status and Sex', x='Year', y='Number') + scale_y_continuous(labels=scales::percent)
	print(pppp)