Skip to content

Instantly share code, notes, and snippets.

@jpearl1395
Last active March 9, 2016 05:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jpearl1395/7d90ced9289eb83d81bd to your computer and use it in GitHub Desktop.
Save jpearl1395/7d90ced9289eb83d81bd to your computer and use it in GitHub Desktop.
library(readr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(maptools)
library(ggmap)
library(gtools)
setwd('/Users/joshpearl/Documents/Dartmouth/Junior Year 16W-16X/16W/HIST 90.01- US Hist Through Census/Family Assignment')
a <- read_csv('usa_00017.csv')
# Remove Alaska and Hawaii, women only and born in the USA. Create Race categories
b <- filter(a, STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2, 15))))
c <- filter(b, YEAR>=1940 & SEX==2) %>% filter(BPL>=1 & BPL<=56)
c2 <- mutate(c, weight=ifelse(YEAR==1950,SLWT,PERWT))
d <- mutate(c2, RACE=factor(RACE, labels=c('White', 'Black', 'Native American',
'Chinese', 'Japanese', 'Other Asian',
'Other', '2 Races', '3+ Races')))
# Add hispanic to race
d2 <- mutate(d, HISPAN=ifelse(HISPAN==0, 0, 1))
d3 <- mutate(d2, HISPAN=factor(HISPAN, labels=c('Not Hispanic', 'Hispanic')))
d4 <- mutate(d3, RPlus=ifelse(RACE=='White' & HISPAN=='Not Hispanic', 1,
ifelse((RACE=='White' | RACE=='Black' | RACE=='Chinese' | RACE=='Japanese' | RACE=='Other Asian' | RACE=='Other') & HISPAN=='Hispanic', 2,
ifelse(RACE=='Black' & HISPAN=='Not Hispanic', 3,
ifelse(RACE=='Native American' & HISPAN=='Not Hispanic', 4,
ifelse((RACE=='Chinese' | RACE=='Japanese' | RACE=='Other Asian') & HISPAN=='Not Hispanic', 5,
ifelse(RACE=='Other' & HISPAN=='Not Hispanic', 6, 7)))))))
d5 <- mutate(d4, RPlus=factor(RPlus, labels=c('White', 'Hispanic', 'Black',
'Native American', 'All Asian',
'Other', '2 Races')))
d6 <- filter(d5, RPlus=='White' | RPlus=='Hispanic' | RPlus=='Black' | RPlus=='Native American' | RPlus=='All Asian' | RPlus=='Other')
# Create income variable and categories
e <- mutate(d6, income=ifelse(INCWAGE==999999, 0, INCWAGE*CPI99))
f <- mutate(e, incomecat=ifelse(income==0, 1,
ifelse(income<20000, 2,
ifelse(income<40000, 3,
ifelse(income<59999, 4,5)))))
g <- mutate(f, incomecat=factor(incomecat, labels=c('No Income', '$1 - $19,999',
'$20,000 - $39,999', '$40,000 - $59,999',
'$59,999+')))
# Remove never married and widowed
h <- filter(g, MARST>=1 & MARST<=4)
i <- mutate(h, together=ifelse(MARST==1, 0,
ifelse(MARST!=1, 1, 2)))
j <- mutate(i, together=factor(together, labels=c('Married', 'Divorced/ Separated/ Absent Spouse')))
# Graph divorced by income category, bar filled percent
j2 <- filter(j, together!='Married')
j3 <- summarize(group_by(j2, YEAR, together, incomecat), divorced=sum(weight))
j4 <- j3[c(-2)]
k <- summarize(group_by(j, YEAR), Pop=sum(weight))
k2 <- merge(j4, k)
k3 <- summarize(group_by(k2, YEAR, incomecat), TOTAL=(divorced/Pop))
l <- ggplot(k3, aes(x=YEAR, y=TOTAL, fill=incomecat)) + geom_bar(stat='identity', position='fill') +
scale_y_continuous(labels=scales::percent) +
labs(title='Income for Divorced Women', x='Year', y='Percent', fill='Income Category') +
scale_fill_brewer(palette='Set2')
print(l)
# Graph married by income category, bar filled percent
r <- filter(j, together=='Married')
r2 <- summarize(group_by(r, YEAR, together, incomecat), married=sum(weight))
r3 <- r2[c(-2)]
s <- summarize(group_by(j, YEAR), Pop=sum(weight))
s2 <- merge(r3, s)
s3 <- summarize(group_by(s2, YEAR, incomecat), Total=(married/Pop))
t <- ggplot(s3, aes(x=YEAR, y=Total, fill=incomecat)) + geom_bar(stat='identity', position='fill') +
scale_y_continuous(labels=scales::percent) +
labs(title='Income for Married Women', x='Year', y='Percent', fill='Income Category') +
scale_fill_brewer(palette='Set2')
print(t)
# Graph divorced and married women by RPlus by percent
m <- filter(j, together!='Married')
m2 <- summarize(group_by(m, YEAR, together, RPlus), divorced=sum(weight))
m3 <- m2[c(-2)]
n <- summarize(group_by(j, YEAR, RPlus), POP=sum(weight))
n2 <- merge(n, m3)
n3 <- summarize(group_by(n2, YEAR, RPlus), total=(divorced/POP))
n4 <- ggplot(n3, aes(x=YEAR, y=total, color=RPlus)) + geom_line() + geom_point() +
scale_y_continuous(labels=scales::percent) +
labs(title='Women Divorced by Race', x='Year', y='Percent Divorced', color='Race')
print(n4)
# Graph income breakdown of divorced women as percent of divorced women
m4 <- filter(m3, RPlus=='White' | RPlus=='Black' | RPlus=='Hispanic')
o <- select(j, YEAR, RPlus, together, incomecat, weight)
p <- filter(o, RPlus=='White' | RPlus=='Black' | RPlus=='Hispanic')
p2 <- filter(p, together!='Married')
p3 <- p2[c(-3)]
p4 <- summarize(group_by(p3, YEAR, RPlus, incomecat), divorced2=sum(weight))
q <- merge(p4, m4)
q2 <- summarize(group_by(q, YEAR, RPlus, incomecat), total2=(divorced2/divorced))
q3 <- ggplot(q2, aes(x=YEAR, y=total2, fill=incomecat)) + geom_bar(stat='identity', position='fill') +
facet_grid(RPlus~.) + scale_y_continuous(labels=scales::percent) +
labs(title='Breakdown of Divorced Women by Income and Race', x='Year', y='Percent of Divorced Women in each Income Category', fill='Income Category') +
scale_fill_brewer(palette='Set2')
print(q3)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment