Skip to content

Instantly share code, notes, and snippets.

@carolineallan
Last active February 25, 2016 00:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save carolineallan/0c1af2fd204a42fe28b9 to your computer and use it in GitHub Desktop.
Save carolineallan/0c1af2fd204a42fe28b9 to your computer and use it in GitHub Desktop.
## Caroline Allan
## U.S. History Through Census Data
## Project 3
## Female Labor Force Participation 1880-2000
## February 11, 2016
rm(list = ls())
# load packages
library(readr)
library(dplyr)
library(ggplot2)
library(scales)
library(ggthemes)
library(tidyr)
## Set Working Directory
setwd("~/Desktop/HistThroughCensusData/Project3")
## Read In Data
data_raw <- read_csv('usa_00008.csv')
## Clean Data
a <- data_raw %>%
filter(SEX == 2, AGE >=18 & AGE < 65) %>%
select(YEAR, PERWT, SLWT, AGE, RACE, LABFORCE, INCWAGE, MARST) %>%
mutate(weight = ifelse(YEAR == 1950, SLWT, PERWT)) %>%
mutate(race = factor(ifelse(RACE == 1, 1, 0), labels = c('nonwhite', 'white'))) %>%
mutate(income = ifelse(INCWAGE == 999999, 0, INCWAGE)) %>%
mutate(married = factor(ifelse(MARST == 1 | MARST == 2, 1, 0), labels = c('Not Married', 'Married')))
a$InLab <- ifelse(a$LABFORCE == 2, 1, 0)
a$NotInLab <- ifelse(a$LABFORCE == 1, 1, 0)
## Labor force participation over time by race
b <- a %>%
group_by(YEAR, race, married) %>%
summarise(InLab = sum(InLab*PERWT), NotInLab = sum(NotInLab*PERWT)) %>%
mutate(rate = InLab/(InLab + NotInLab))
## Plot 1
plot1 <- ggplot(b, aes(x = YEAR, y = rate, color = race)) +
geom_line() +
geom_point() +
theme_few() +
labs(x = "Year",
y = "Labor Force Participation Rate",
title = "Female Labor Force Participation Rate by Race 1880-2000") +
scale_x_continuous(breaks = c(1880, 1890, 1900, 1910, 1920, 1930, 1940, 1950, 1960, 1970,
1980, 1990, 2000)) +
scale_y_continuous(labels = percent_format()) +
facet_grid(married~.) +
scale_color_manual(values=c("darkmagenta", "dodgerblue"),
name ="Race",
breaks=c("nonwhite", "white"),
labels=c("Non-White", "White"))
ggsave(file="participation.png",dpi=300)
plot1
## Median Income in each race in each year
c <- a %>%
filter(YEAR >= 1940) %>%
select(YEAR, race, married, income, weight, InLab)
d <- summarise(group_by(c, YEAR, race, married), inc = median(rep(as.numeric(income), times = weight)))
## Prices not adjusted
plot2 <- ggplot(d, aes(x = YEAR, y = inc, color = race)) +
geom_line() +
geom_point() +
theme_few() +
labs(x = "Year",
y = "Median Income",
title = "Median Female Income by Race 1940-2000") +
scale_x_continuous(breaks = c(1940, 1950, 1960, 1970,
1980, 1990, 2000)) +
facet_grid(married~.) +
scale_color_manual(values=c("deeppink", "lightseagreen"),
name ="Race",
breaks=c("nonwhite", "white"),
labels=c("Non-White", "White")) +
scale_y_continuous(labels = comma)
ggsave(file="income.png",dpi=300)
plot2
## Price adjustment
YEAR <- c(1940, 1950, 1960, 1970, 1980, 1990, 2000)
index <- c(11.986, 7, 5.725, 4.54, 2.295, 1.344, 1)
cpi <- as.data.frame(cbind(YEAR, index))
e <- merge(d, cpi, by = "YEAR", all.x = TRUE)
## Plot 3
plot3 <- ggplot(e, aes(x = YEAR, y = inc*index, color = race)) +
geom_line() +
geom_point() +
theme_few() +
labs(x = "Year",
y = "Adjusted Median Income",
title = "Price-Adjusted Median Female Income by Race 1940-2000") +
scale_x_continuous(breaks = c(1940, 1950, 1960, 1970,
1980, 1990, 2000)) +
facet_grid(married~.) +
scale_color_manual(values=c("deeppink", "lightseagreen"),
name ="Race",
breaks=c("nonwhite", "white"),
labels=c("Non-White", "White")) +
scale_y_continuous(labels = comma)
ggsave(file="adjincome.png",dpi=300)
plot3
## Median Income in each race in each year
f <- a %>%
filter(YEAR >= 1940) %>%
select(YEAR, race, married, income, weight, InLab) %>%
filter(InLab == 1)
g <- summarise(group_by(f, YEAR, race, married), inc = median(rep(as.numeric(income), times = weight)))
## Price adjustment
YEAR <- c(1940, 1950, 1960, 1970, 1980, 1990, 2000)
index <- c(11.986, 7, 5.725, 4.54, 2.295, 1.344, 1)
cpi <- as.data.frame(cbind(YEAR, index))
h <- merge(g, cpi, by = "YEAR", all.x = TRUE)
## Plot 4
plot4 <- ggplot(h, aes(x = YEAR, y = inc*index, color = race)) +
geom_line() +
geom_point() +
theme_few() +
labs(x = "Year",
y = "Adjusted Median Income",
title = "Price-Adjusted Median Female Income In Labor Force by Race 1940-2000") +
scale_x_continuous(breaks = c(1940, 1950, 1960, 1970,
1980, 1990, 2000)) +
facet_grid(married~.) +
scale_color_manual(values=c("deeppink", "lightseagreen"),
name ="Race",
breaks=c("nonwhite", "white"),
labels=c("Non-White", "White")) +
scale_y_continuous(labels = comma)
ggsave(file="inLabadjincome.png",dpi=300)
plot4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment