Last active
February 25, 2016 00:31
-
-
Save carolineallan/0c1af2fd204a42fe28b9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Caroline Allan | |
## U.S. History Through Census Data | |
## Project 3 | |
## Female Labor Force Participation 1880-2000 | |
## February 11, 2016 | |
rm(list = ls()) | |
# load packages | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(scales) | |
library(ggthemes) | |
library(tidyr) | |
## Set Working Directory | |
setwd("~/Desktop/HistThroughCensusData/Project3") | |
## Read In Data | |
data_raw <- read_csv('usa_00008.csv') | |
## Clean Data | |
a <- data_raw %>% | |
filter(SEX == 2, AGE >=18 & AGE < 65) %>% | |
select(YEAR, PERWT, SLWT, AGE, RACE, LABFORCE, INCWAGE, MARST) %>% | |
mutate(weight = ifelse(YEAR == 1950, SLWT, PERWT)) %>% | |
mutate(race = factor(ifelse(RACE == 1, 1, 0), labels = c('nonwhite', 'white'))) %>% | |
mutate(income = ifelse(INCWAGE == 999999, 0, INCWAGE)) %>% | |
mutate(married = factor(ifelse(MARST == 1 | MARST == 2, 1, 0), labels = c('Not Married', 'Married'))) | |
a$InLab <- ifelse(a$LABFORCE == 2, 1, 0) | |
a$NotInLab <- ifelse(a$LABFORCE == 1, 1, 0) | |
## Labor force participation over time by race | |
b <- a %>% | |
group_by(YEAR, race, married) %>% | |
summarise(InLab = sum(InLab*PERWT), NotInLab = sum(NotInLab*PERWT)) %>% | |
mutate(rate = InLab/(InLab + NotInLab)) | |
## Plot 1 | |
plot1 <- ggplot(b, aes(x = YEAR, y = rate, color = race)) + | |
geom_line() + | |
geom_point() + | |
theme_few() + | |
labs(x = "Year", | |
y = "Labor Force Participation Rate", | |
title = "Female Labor Force Participation Rate by Race 1880-2000") + | |
scale_x_continuous(breaks = c(1880, 1890, 1900, 1910, 1920, 1930, 1940, 1950, 1960, 1970, | |
1980, 1990, 2000)) + | |
scale_y_continuous(labels = percent_format()) + | |
facet_grid(married~.) + | |
scale_color_manual(values=c("darkmagenta", "dodgerblue"), | |
name ="Race", | |
breaks=c("nonwhite", "white"), | |
labels=c("Non-White", "White")) | |
ggsave(file="participation.png",dpi=300) | |
plot1 | |
## Median Income in each race in each year | |
c <- a %>% | |
filter(YEAR >= 1940) %>% | |
select(YEAR, race, married, income, weight, InLab) | |
d <- summarise(group_by(c, YEAR, race, married), inc = median(rep(as.numeric(income), times = weight))) | |
## Prices not adjusted | |
plot2 <- ggplot(d, aes(x = YEAR, y = inc, color = race)) + | |
geom_line() + | |
geom_point() + | |
theme_few() + | |
labs(x = "Year", | |
y = "Median Income", | |
title = "Median Female Income by Race 1940-2000") + | |
scale_x_continuous(breaks = c(1940, 1950, 1960, 1970, | |
1980, 1990, 2000)) + | |
facet_grid(married~.) + | |
scale_color_manual(values=c("deeppink", "lightseagreen"), | |
name ="Race", | |
breaks=c("nonwhite", "white"), | |
labels=c("Non-White", "White")) + | |
scale_y_continuous(labels = comma) | |
ggsave(file="income.png",dpi=300) | |
plot2 | |
## Price adjustment | |
YEAR <- c(1940, 1950, 1960, 1970, 1980, 1990, 2000) | |
index <- c(11.986, 7, 5.725, 4.54, 2.295, 1.344, 1) | |
cpi <- as.data.frame(cbind(YEAR, index)) | |
e <- merge(d, cpi, by = "YEAR", all.x = TRUE) | |
## Plot 3 | |
plot3 <- ggplot(e, aes(x = YEAR, y = inc*index, color = race)) + | |
geom_line() + | |
geom_point() + | |
theme_few() + | |
labs(x = "Year", | |
y = "Adjusted Median Income", | |
title = "Price-Adjusted Median Female Income by Race 1940-2000") + | |
scale_x_continuous(breaks = c(1940, 1950, 1960, 1970, | |
1980, 1990, 2000)) + | |
facet_grid(married~.) + | |
scale_color_manual(values=c("deeppink", "lightseagreen"), | |
name ="Race", | |
breaks=c("nonwhite", "white"), | |
labels=c("Non-White", "White")) + | |
scale_y_continuous(labels = comma) | |
ggsave(file="adjincome.png",dpi=300) | |
plot3 | |
## Median Income in each race in each year | |
f <- a %>% | |
filter(YEAR >= 1940) %>% | |
select(YEAR, race, married, income, weight, InLab) %>% | |
filter(InLab == 1) | |
g <- summarise(group_by(f, YEAR, race, married), inc = median(rep(as.numeric(income), times = weight))) | |
## Price adjustment | |
YEAR <- c(1940, 1950, 1960, 1970, 1980, 1990, 2000) | |
index <- c(11.986, 7, 5.725, 4.54, 2.295, 1.344, 1) | |
cpi <- as.data.frame(cbind(YEAR, index)) | |
h <- merge(g, cpi, by = "YEAR", all.x = TRUE) | |
## Plot 4 | |
plot4 <- ggplot(h, aes(x = YEAR, y = inc*index, color = race)) + | |
geom_line() + | |
geom_point() + | |
theme_few() + | |
labs(x = "Year", | |
y = "Adjusted Median Income", | |
title = "Price-Adjusted Median Female Income In Labor Force by Race 1940-2000") + | |
scale_x_continuous(breaks = c(1940, 1950, 1960, 1970, | |
1980, 1990, 2000)) + | |
facet_grid(married~.) + | |
scale_color_manual(values=c("deeppink", "lightseagreen"), | |
name ="Race", | |
breaks=c("nonwhite", "white"), | |
labels=c("Non-White", "White")) + | |
scale_y_continuous(labels = comma) | |
ggsave(file="inLabadjincome.png",dpi=300) | |
plot4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment