-
-
Save jpearl1395/7d90ced9289eb83d81bd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(RColorBrewer) | |
library(maptools) | |
library(ggmap) | |
library(gtools) | |
setwd('/Users/joshpearl/Documents/Dartmouth/Junior Year 16W-16X/16W/HIST 90.01- US Hist Through Census/Family Assignment') | |
a <- read_csv('usa_00017.csv') | |
# Remove Alaska and Hawaii, women only and born in the USA. Create Race categories | |
b <- filter(a, STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2, 15)))) | |
c <- filter(b, YEAR>=1940 & SEX==2) %>% filter(BPL>=1 & BPL<=56) | |
c2 <- mutate(c, weight=ifelse(YEAR==1950,SLWT,PERWT)) | |
d <- mutate(c2, RACE=factor(RACE, labels=c('White', 'Black', 'Native American', | |
'Chinese', 'Japanese', 'Other Asian', | |
'Other', '2 Races', '3+ Races'))) | |
# Add hispanic to race | |
d2 <- mutate(d, HISPAN=ifelse(HISPAN==0, 0, 1)) | |
d3 <- mutate(d2, HISPAN=factor(HISPAN, labels=c('Not Hispanic', 'Hispanic'))) | |
d4 <- mutate(d3, RPlus=ifelse(RACE=='White' & HISPAN=='Not Hispanic', 1, | |
ifelse((RACE=='White' | RACE=='Black' | RACE=='Chinese' | RACE=='Japanese' | RACE=='Other Asian' | RACE=='Other') & HISPAN=='Hispanic', 2, | |
ifelse(RACE=='Black' & HISPAN=='Not Hispanic', 3, | |
ifelse(RACE=='Native American' & HISPAN=='Not Hispanic', 4, | |
ifelse((RACE=='Chinese' | RACE=='Japanese' | RACE=='Other Asian') & HISPAN=='Not Hispanic', 5, | |
ifelse(RACE=='Other' & HISPAN=='Not Hispanic', 6, 7))))))) | |
d5 <- mutate(d4, RPlus=factor(RPlus, labels=c('White', 'Hispanic', 'Black', | |
'Native American', 'All Asian', | |
'Other', '2 Races'))) | |
d6 <- filter(d5, RPlus=='White' | RPlus=='Hispanic' | RPlus=='Black' | RPlus=='Native American' | RPlus=='All Asian' | RPlus=='Other') | |
# Create income variable and categories | |
e <- mutate(d6, income=ifelse(INCWAGE==999999, 0, INCWAGE*CPI99)) | |
f <- mutate(e, incomecat=ifelse(income==0, 1, | |
ifelse(income<20000, 2, | |
ifelse(income<40000, 3, | |
ifelse(income<59999, 4,5))))) | |
g <- mutate(f, incomecat=factor(incomecat, labels=c('No Income', '$1 - $19,999', | |
'$20,000 - $39,999', '$40,000 - $59,999', | |
'$59,999+'))) | |
# Remove never married and widowed | |
h <- filter(g, MARST>=1 & MARST<=4) | |
i <- mutate(h, together=ifelse(MARST==1, 0, | |
ifelse(MARST!=1, 1, 2))) | |
j <- mutate(i, together=factor(together, labels=c('Married', 'Divorced/ Separated/ Absent Spouse'))) | |
# Graph divorced by income category, bar filled percent | |
j2 <- filter(j, together!='Married') | |
j3 <- summarize(group_by(j2, YEAR, together, incomecat), divorced=sum(weight)) | |
j4 <- j3[c(-2)] | |
k <- summarize(group_by(j, YEAR), Pop=sum(weight)) | |
k2 <- merge(j4, k) | |
k3 <- summarize(group_by(k2, YEAR, incomecat), TOTAL=(divorced/Pop)) | |
l <- ggplot(k3, aes(x=YEAR, y=TOTAL, fill=incomecat)) + geom_bar(stat='identity', position='fill') + | |
scale_y_continuous(labels=scales::percent) + | |
labs(title='Income for Divorced Women', x='Year', y='Percent', fill='Income Category') + | |
scale_fill_brewer(palette='Set2') | |
print(l) | |
# Graph married by income category, bar filled percent | |
r <- filter(j, together=='Married') | |
r2 <- summarize(group_by(r, YEAR, together, incomecat), married=sum(weight)) | |
r3 <- r2[c(-2)] | |
s <- summarize(group_by(j, YEAR), Pop=sum(weight)) | |
s2 <- merge(r3, s) | |
s3 <- summarize(group_by(s2, YEAR, incomecat), Total=(married/Pop)) | |
t <- ggplot(s3, aes(x=YEAR, y=Total, fill=incomecat)) + geom_bar(stat='identity', position='fill') + | |
scale_y_continuous(labels=scales::percent) + | |
labs(title='Income for Married Women', x='Year', y='Percent', fill='Income Category') + | |
scale_fill_brewer(palette='Set2') | |
print(t) | |
# Graph divorced and married women by RPlus by percent | |
m <- filter(j, together!='Married') | |
m2 <- summarize(group_by(m, YEAR, together, RPlus), divorced=sum(weight)) | |
m3 <- m2[c(-2)] | |
n <- summarize(group_by(j, YEAR, RPlus), POP=sum(weight)) | |
n2 <- merge(n, m3) | |
n3 <- summarize(group_by(n2, YEAR, RPlus), total=(divorced/POP)) | |
n4 <- ggplot(n3, aes(x=YEAR, y=total, color=RPlus)) + geom_line() + geom_point() + | |
scale_y_continuous(labels=scales::percent) + | |
labs(title='Women Divorced by Race', x='Year', y='Percent Divorced', color='Race') | |
print(n4) | |
# Graph income breakdown of divorced women as percent of divorced women | |
m4 <- filter(m3, RPlus=='White' | RPlus=='Black' | RPlus=='Hispanic') | |
o <- select(j, YEAR, RPlus, together, incomecat, weight) | |
p <- filter(o, RPlus=='White' | RPlus=='Black' | RPlus=='Hispanic') | |
p2 <- filter(p, together!='Married') | |
p3 <- p2[c(-3)] | |
p4 <- summarize(group_by(p3, YEAR, RPlus, incomecat), divorced2=sum(weight)) | |
q <- merge(p4, m4) | |
q2 <- summarize(group_by(q, YEAR, RPlus, incomecat), total2=(divorced2/divorced)) | |
q3 <- ggplot(q2, aes(x=YEAR, y=total2, fill=incomecat)) + geom_bar(stat='identity', position='fill') + | |
facet_grid(RPlus~.) + scale_y_continuous(labels=scales::percent) + | |
labs(title='Breakdown of Divorced Women by Income and Race', x='Year', y='Percent of Divorced Women in each Income Category', fill='Income Category') + | |
scale_fill_brewer(palette='Set2') | |
print(q3) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment