Skip to content

Instantly share code, notes, and snippets.

@carolineallan
Created March 2, 2016 22:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save carolineallan/cedcfe0beaa6c64fa85a to your computer and use it in GitHub Desktop.
Save carolineallan/cedcfe0beaa6c64fa85a to your computer and use it in GitHub Desktop.
## Caroline Allan
## U.S. History Through Census Data
## Project 4
## Children with Divorced Parents 1940-2000
## March 1, 2016
rm(list = ls())
# load packages
library(readr)
library(dplyr)
library(ggplot2)
library(scales)
library(ggthemes)
library(tidyr)
## Set Working Directory
setwd("~/Desktop/HistThroughCensusData/Project4")
## Read In Data
data_raw <- read_csv('usa_00016.csv')
## Remove Alaska and Hawaii before 1960, GQ = 1
a <- data_raw %>%
filter(STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2, 15)))) %>%
filter(GQ == 1)
## Create dataset of children <18
children <- a %>%
filter(AGE < 18) %>%
select(YEAR, SERIAL, PERNUM, PERWT, MOMLOC, POPLOC, AGE, RACE, HISPAN)
## Parent dataset
parent <- a %>%
select(YEAR, SERIAL, PERNUM, MARST)
## Dataset with mom's marital status (if kid lives with mom)
mom <- merge(children, parent, by.x = c('YEAR', 'SERIAL', 'MOMLOC'),
by.y = c('YEAR', 'SERIAL', 'PERNUM'))
mom <- mom %>%
rename(MOM_MARST = MARST)%>%
select(YEAR, SERIAL, PERNUM, MOM_MARST)
head(mom)
## Dataset with dad's marital status (if kid lives with dad)
dad <- merge(children, parent, by.x = c('YEAR', 'SERIAL', 'POPLOC'),
by.y = c('YEAR', 'SERIAL', 'PERNUM'))
dad <- dad %>%
rename(DAD_MARST = MARST) %>%
select(YEAR, SERIAL, PERNUM, DAD_MARST)
head(dad)
## Merge mom marital status in with child data
data_final1 <- merge(children, mom, by = c('YEAR', 'SERIAL', 'PERNUM'), all.x = TRUE)
## Merge dad marital status in with child data
data_final2 <- merge(data_final1, dad, by = c('YEAR', 'SERIAL', 'PERNUM'), all.x = TRUE)
## Add in divorced variable
data_final3 <- data_final2 %>%
mutate(divorced = ifelse(data_final2$MOM_MARST == 4 | data_final2$DAD_MARST == 4,
"divorced", "not divorced"))
## Select necessary variables, label race categories
data_final4 <- data_final3 %>%
select(YEAR, PERWT, AGE, RACE, HISPAN, divorced) %>%
mutate(RACE = ifelse(HISPAN == 1, "Non-White",
ifelse(RACE == 1, "White",
"Non-White")))
## Create divorced and not divorced variables
data_final5 <- data_final4 %>%
mutate(divorce = ifelse(divorced == 'divorced', PERWT, 0)) %>%
mutate(notdivorce = ifelse(divorced == 'not divorced', PERWT, 0)) %>%
filter(!is.na(divorced))
## Save data to save future compile time
write.csv(data_final5, "finaldata.csv")
## Plot 1 and 2 Data
byyear <- data_final5 %>%
group_by(YEAR) %>%
summarize(NumDivorced = sum(divorce),
NumNot = sum(notdivorce)) %>%
mutate(Percent = NumDivorced/(NumDivorced + NumNot))
## Plot 3 and 4 Data
byyearraceage <- data_final5 %>%
mutate(AGECAT = factor(cut(AGE, c(0, 6, 12, 18), right = FALSE))) %>%
group_by(YEAR, RACE, AGECAT) %>%
summarize(NumDivorced = sum(divorce),
NumNot = sum(notdivorce)) %>%
mutate(Percent = NumDivorced/(NumDivorced + NumNot))
## Plot 1
## Create a plot of # kids with divorced parents by year
plot1 <- ggplot(byyear, aes(x = YEAR, y = NumDivorced)) +
geom_bar(stat = 'identity', fill = "#33CCCC", color = "black") +
theme_few() +
labs(x = "Year",
y = "Number of Children Living With a Divorced Parent",
title = "Estimated Number of Children 0-17 Living with a\nDivorced Parent 1940-2000") +
scale_x_continuous(breaks = c(1940, 1950, 1960, 1970, 1980, 1990, 2000)) +
scale_y_continuous(labels = comma)
print(plot1)
ggsave(file="NumKids.png",dpi=300)
## Plot 2
## Create a plot of % kids with divorced parents by year
plot2 <- ggplot(byyear, aes(x = YEAR, y = Percent)) +
geom_bar(stat = 'identity', fill = "#33CCCC", color = "black") +
theme_few() +
labs(x = "Year",
y = "Percent of Children Living With a Divorced Parent",
title = "Estimated Percent of Children 0-17 Living with a\nDivorced Parent 1940-2000") +
scale_x_continuous(breaks = c(1940, 1950, 1960, 1970, 1980, 1990, 2000)) +
scale_y_continuous(labels = percent)
print(plot2)
ggsave(file="PercKids.png",dpi=300)
## Plot 3
## Create a plot of # kids with divorced parents by year, race, and age
plot3 <- ggplot(byyearraceage, aes(x = YEAR, y = NumDivorced, colour = AGECAT)) +
geom_line() +
theme_few() +
facet_grid(RACE~.) +
scale_color_brewer(name = 'Age\nCategory',
labels = c('0-5', '6-11', '11-17'),
palette = "Set2",
guide = guide_legend(reverse=TRUE)) +
labs(x = "Year",
y = "Number of Children Living With a Divorced Parent",
title = "Estimated Number of Children 0-17 Living with a\nDivorced Parent 1940-2000 by Age and Race") +
scale_x_continuous(breaks = c(1940, 1950, 1960, 1970, 1980, 1990, 2000)) +
scale_y_continuous(labels = comma)
print(plot3)
ggsave(file="NumKidsRaceAge.png",dpi=300)
## Plot 4
## Create a plot of % kids with divorced parents by year, race, and age
plot4 <- ggplot(byyearraceage, aes(x = YEAR, y = Percent, colour = AGECAT)) +
geom_line() +
theme_few() +
scale_color_brewer(name = 'Age\nCategory',
labels = c('0-5', '6-11', '11-17'),
palette = "Set2",
guide = guide_legend(reverse=TRUE)) +
facet_grid(RACE~.) +
labs(x = "Year",
y = "Percent of Children Living With a Divorced Parent",
title = "Estimated Percent of Children 0-17 Living with a\nDivorced Parent 1940-2000 by Age and Race") +
scale_x_continuous(breaks = c(1940, 1950, 1960, 1970, 1980, 1990, 2000)) +
scale_y_continuous(labels = percent)
print(plot4)
ggsave(file="PercKidsRaceAge.png",dpi=300)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment