jpearl1395/DivorceFamilyAssignment.R Secret

## DivorceFamilyAssignment.R
library(readr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(maptools)
library(ggmap)
library(gtools)

setwd('/Users/joshpearl/Documents/Dartmouth/Junior Year 16W-16X/16W/HIST 90.01- US Hist Through Census/Family Assignment')

a <- read_csv('usa_00017.csv')
# Remove Alaska and Hawaii, women only and born in the USA. Create Race categories
b <- filter(a, STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2, 15))))
c <- filter(b, YEAR>=1940 & SEX==2) %>% filter(BPL>=1 & BPL<=56)
c2 <- mutate(c, weight=ifelse(YEAR==1950,SLWT,PERWT))
d <- mutate(c2, RACE=factor(RACE, labels=c('White', 'Black', 'Native American',
                                          'Chinese', 'Japanese', 'Other Asian',
                                          'Other', '2 Races', '3+ Races')))
# Add hispanic to race
d2 <- mutate(d, HISPAN=ifelse(HISPAN==0, 0, 1))
d3 <- mutate(d2, HISPAN=factor(HISPAN, labels=c('Not Hispanic', 'Hispanic')))
d4 <- mutate(d3, RPlus=ifelse(RACE=='White' & HISPAN=='Not Hispanic', 1,
                              ifelse((RACE=='White' | RACE=='Black' | RACE=='Chinese' | RACE=='Japanese' | RACE=='Other Asian' | RACE=='Other') & HISPAN=='Hispanic', 2,
                                     ifelse(RACE=='Black' & HISPAN=='Not Hispanic', 3,
                                            ifelse(RACE=='Native American' & HISPAN=='Not Hispanic', 4,
                                                   ifelse((RACE=='Chinese' | RACE=='Japanese' | RACE=='Other Asian') & HISPAN=='Not Hispanic', 5,
                                                                        ifelse(RACE=='Other' & HISPAN=='Not Hispanic', 6, 7)))))))

d5 <- mutate(d4, RPlus=factor(RPlus, labels=c('White', 'Hispanic', 'Black',
                                              'Native American', 'All Asian',
                                              'Other', '2 Races')))
d6 <- filter(d5, RPlus=='White' | RPlus=='Hispanic' | RPlus=='Black' | RPlus=='Native American' | RPlus=='All Asian' | RPlus=='Other')
# Create income variable and categories
e <- mutate(d6, income=ifelse(INCWAGE==999999, 0, INCWAGE*CPI99))
f <- mutate(e, incomecat=ifelse(income==0, 1,
                                     ifelse(income<20000, 2,
                                            ifelse(income<40000, 3,
                                                   ifelse(income<59999, 4,5)))))

g <- mutate(f, incomecat=factor(incomecat, labels=c('No Income', '$1 - $19,999',
                                                    '$20,000 - $39,999', '$40,000 - $59,999',
                                                    '$59,999+')))
# Remove never married and widowed
h <- filter(g, MARST>=1 & MARST<=4)
i <- mutate(h, together=ifelse(MARST==1, 0,
                               ifelse(MARST!=1, 1, 2)))

j <- mutate(i, together=factor(together, labels=c('Married', 'Divorced/ Separated/ Absent Spouse')))

# Graph divorced by income category, bar filled percent
j2 <- filter(j, together!='Married')
j3 <- summarize(group_by(j2, YEAR, together, incomecat), divorced=sum(weight))
j4 <- j3[c(-2)]

k <- summarize(group_by(j, YEAR), Pop=sum(weight))
k2 <- merge(j4, k)
k3 <- summarize(group_by(k2, YEAR, incomecat), TOTAL=(divorced/Pop))

l <- ggplot(k3, aes(x=YEAR, y=TOTAL, fill=incomecat)) + geom_bar(stat='identity', position='fill') +
  scale_y_continuous(labels=scales::percent) +
  labs(title='Income for Divorced Women', x='Year', y='Percent', fill='Income Category') +
  scale_fill_brewer(palette='Set2')
print(l)

# Graph married by income category, bar filled percent
r <- filter(j, together=='Married')
r2 <- summarize(group_by(r, YEAR, together, incomecat), married=sum(weight))
r3 <- r2[c(-2)]

s <- summarize(group_by(j, YEAR), Pop=sum(weight))
s2 <- merge(r3, s)
s3 <- summarize(group_by(s2, YEAR, incomecat), Total=(married/Pop))

t <- ggplot(s3, aes(x=YEAR, y=Total, fill=incomecat)) + geom_bar(stat='identity', position='fill') +
  scale_y_continuous(labels=scales::percent) +
  labs(title='Income for Married Women', x='Year', y='Percent', fill='Income Category') +
  scale_fill_brewer(palette='Set2')
print(t)

# Graph divorced and married women by RPlus by percent
m <- filter(j, together!='Married')
m2 <- summarize(group_by(m, YEAR, together, RPlus), divorced=sum(weight))
m3 <- m2[c(-2)]

n <- summarize(group_by(j, YEAR, RPlus), POP=sum(weight))
n2 <- merge(n, m3)
n3 <- summarize(group_by(n2, YEAR, RPlus), total=(divorced/POP))

n4 <- ggplot(n3, aes(x=YEAR, y=total, color=RPlus)) + geom_line() + geom_point() +
  scale_y_continuous(labels=scales::percent) +
  labs(title='Women Divorced by Race', x='Year', y='Percent Divorced', color='Race')
print(n4)

# Graph income breakdown of divorced women as percent of divorced women
m4 <- filter(m3, RPlus=='White' | RPlus=='Black' | RPlus=='Hispanic')

o <- select(j, YEAR, RPlus, together, incomecat, weight)
p <- filter(o, RPlus=='White' | RPlus=='Black' | RPlus=='Hispanic')
p2 <- filter(p, together!='Married')
p3 <- p2[c(-3)]
p4 <- summarize(group_by(p3, YEAR, RPlus, incomecat), divorced2=sum(weight))

q <- merge(p4, m4)
q2 <- summarize(group_by(q, YEAR, RPlus, incomecat), total2=(divorced2/divorced))
q3 <- ggplot(q2, aes(x=YEAR, y=total2, fill=incomecat)) + geom_bar(stat='identity', position='fill') +
  facet_grid(RPlus~.) + scale_y_continuous(labels=scales::percent) +
  labs(title='Breakdown of Divorced Women by Income and Race', x='Year', y='Percent of Divorced Women in each Income Category', fill='Income Category') +
  scale_fill_brewer(palette='Set2')
print(q3)

dev.off()
	library(readr)
	library(dplyr)
	library(ggplot2)
	library(RColorBrewer)
	library(maptools)
	library(ggmap)
	library(gtools)

	setwd('/Users/joshpearl/Documents/Dartmouth/Junior Year 16W-16X/16W/HIST 90.01- US Hist Through Census/Family Assignment')

	a <- read_csv('usa_00017.csv')
	# Remove Alaska and Hawaii, women only and born in the USA. Create Race categories
	b <- filter(a, STATEFIP < 60 & (YEAR >= 1960 \| !(STATEFIP %in% c(2, 15))))
	c <- filter(b, YEAR>=1940 & SEX==2) %>% filter(BPL>=1 & BPL<=56)
	c2 <- mutate(c, weight=ifelse(YEAR==1950,SLWT,PERWT))
	d <- mutate(c2, RACE=factor(RACE, labels=c('White', 'Black', 'Native American',
	'Chinese', 'Japanese', 'Other Asian',
	'Other', '2 Races', '3+ Races')))
	# Add hispanic to race
	d2 <- mutate(d, HISPAN=ifelse(HISPAN==0, 0, 1))
	d3 <- mutate(d2, HISPAN=factor(HISPAN, labels=c('Not Hispanic', 'Hispanic')))
	d4 <- mutate(d3, RPlus=ifelse(RACE=='White' & HISPAN=='Not Hispanic', 1,
	ifelse((RACE=='White' \| RACE=='Black' \| RACE=='Chinese' \| RACE=='Japanese' \| RACE=='Other Asian' \| RACE=='Other') & HISPAN=='Hispanic', 2,
	ifelse(RACE=='Black' & HISPAN=='Not Hispanic', 3,
	ifelse(RACE=='Native American' & HISPAN=='Not Hispanic', 4,
	ifelse((RACE=='Chinese' \| RACE=='Japanese' \| RACE=='Other Asian') & HISPAN=='Not Hispanic', 5,
	ifelse(RACE=='Other' & HISPAN=='Not Hispanic', 6, 7)))))))

	d5 <- mutate(d4, RPlus=factor(RPlus, labels=c('White', 'Hispanic', 'Black',
	'Native American', 'All Asian',
	'Other', '2 Races')))
	d6 <- filter(d5, RPlus=='White' \| RPlus=='Hispanic' \| RPlus=='Black' \| RPlus=='Native American' \| RPlus=='All Asian' \| RPlus=='Other')
	# Create income variable and categories
	e <- mutate(d6, income=ifelse(INCWAGE==999999, 0, INCWAGE*CPI99))
	f <- mutate(e, incomecat=ifelse(income==0, 1,
	ifelse(income<20000, 2,
	ifelse(income<40000, 3,
	ifelse(income<59999, 4,5)))))

	g <- mutate(f, incomecat=factor(incomecat, labels=c('No Income', '$1 - $19,999',
	'$20,000 - $39,999', '$40,000 - $59,999',
	'$59,999+')))
	# Remove never married and widowed
	h <- filter(g, MARST>=1 & MARST<=4)
	i <- mutate(h, together=ifelse(MARST==1, 0,
	ifelse(MARST!=1, 1, 2)))

	j <- mutate(i, together=factor(together, labels=c('Married', 'Divorced/ Separated/ Absent Spouse')))

	# Graph divorced by income category, bar filled percent
	j2 <- filter(j, together!='Married')
	j3 <- summarize(group_by(j2, YEAR, together, incomecat), divorced=sum(weight))
	j4 <- j3[c(-2)]

	k <- summarize(group_by(j, YEAR), Pop=sum(weight))
	k2 <- merge(j4, k)
	k3 <- summarize(group_by(k2, YEAR, incomecat), TOTAL=(divorced/Pop))

	l <- ggplot(k3, aes(x=YEAR, y=TOTAL, fill=incomecat)) + geom_bar(stat='identity', position='fill') +
	scale_y_continuous(labels=scales::percent) +
	labs(title='Income for Divorced Women', x='Year', y='Percent', fill='Income Category') +
	scale_fill_brewer(palette='Set2')
	print(l)

	# Graph married by income category, bar filled percent
	r <- filter(j, together=='Married')
	r2 <- summarize(group_by(r, YEAR, together, incomecat), married=sum(weight))
	r3 <- r2[c(-2)]

	s <- summarize(group_by(j, YEAR), Pop=sum(weight))
	s2 <- merge(r3, s)
	s3 <- summarize(group_by(s2, YEAR, incomecat), Total=(married/Pop))

	t <- ggplot(s3, aes(x=YEAR, y=Total, fill=incomecat)) + geom_bar(stat='identity', position='fill') +
	scale_y_continuous(labels=scales::percent) +
	labs(title='Income for Married Women', x='Year', y='Percent', fill='Income Category') +
	scale_fill_brewer(palette='Set2')
	print(t)

	# Graph divorced and married women by RPlus by percent
	m <- filter(j, together!='Married')
	m2 <- summarize(group_by(m, YEAR, together, RPlus), divorced=sum(weight))
	m3 <- m2[c(-2)]

	n <- summarize(group_by(j, YEAR, RPlus), POP=sum(weight))
	n2 <- merge(n, m3)
	n3 <- summarize(group_by(n2, YEAR, RPlus), total=(divorced/POP))

	n4 <- ggplot(n3, aes(x=YEAR, y=total, color=RPlus)) + geom_line() + geom_point() +
	scale_y_continuous(labels=scales::percent) +
	labs(title='Women Divorced by Race', x='Year', y='Percent Divorced', color='Race')
	print(n4)

	# Graph income breakdown of divorced women as percent of divorced women
	m4 <- filter(m3, RPlus=='White' \| RPlus=='Black' \| RPlus=='Hispanic')

	o <- select(j, YEAR, RPlus, together, incomecat, weight)
	p <- filter(o, RPlus=='White' \| RPlus=='Black' \| RPlus=='Hispanic')
	p2 <- filter(p, together!='Married')
	p3 <- p2[c(-3)]
	p4 <- summarize(group_by(p3, YEAR, RPlus, incomecat), divorced2=sum(weight))

	q <- merge(p4, m4)
	q2 <- summarize(group_by(q, YEAR, RPlus, incomecat), total2=(divorced2/divorced))
	q3 <- ggplot(q2, aes(x=YEAR, y=total2, fill=incomecat)) + geom_bar(stat='identity', position='fill') +
	facet_grid(RPlus~.) + scale_y_continuous(labels=scales::percent) +
	labs(title='Breakdown of Divorced Women by Income and Race', x='Year', y='Percent of Divorced Women in each Income Category', fill='Income Category') +
	scale_fill_brewer(palette='Set2')
	print(q3)

	dev.off()