carolineallan/AsianRaces.R

## AsianRaces.R
## Caroline Allan
## U.S. History Through Census Data
## Project 1
## Asian Races in the U.S. 1900-1960
## January 18, 2016

rm(list = ls())

# load packages
library(readr)
library(dplyr)
library(ggplot2)
library(scales)
library(grid)
library(ggthemes)
library(RColorBrewer)

## Set Working Directory
setwd("~/Desktop/HistThroughCensusData/Project1")

## Read in data
data_raw <- read.csv('usa_00003.csv', stringsAsFactors = FALSE)
raced_raw <- read.csv('raced_data.csv', stringsAsFactors = FALSE)

## Remove Alaska, Hawaii, and oversease military
a <- filter(data_raw, STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2, 15))))

## Seclect necessary variables
b <- select(a, YEAR, PERWT, RACED, RACE)

## Add race labels
c <- mutate(b, RACE = factor(RACE, labels = c('white', 'black', 'AIAN', 'Chinese',
                                           'Japanese', 'OAPI', 'other')))

## Merge with RACED labels
d <- merge(c, raced_raw, by = 'RACED', all.x = TRUE)


## Dataset with number in each race each year
e <- summarise(group_by(d, YEAR, label, RACE), NUMBER = sum(PERWT));

## Dataset with total number in each year
f <- summarise(group_by(e, YEAR), TOTALPOP = sum(NUMBER))

## Merged dataset
g <- merge(e, f, by = 'YEAR', all.x = TRUE)

## Dataset with only Asian races
asian_data_final <- filter(g, RACE %in% c('Chinese',
                                        'Japanese', 'OAPI')) %>%
  mutate(People = NUMBER/1000) %>%
  mutate(Percent = NUMBER/TOTALPOP*100)

## Create graph of Asian race breakdown in U.S.
plot1 <- ggplot(asian_data_final, aes(x = YEAR, y = People, fill = label)) +
  geom_bar(stat = 'identity', position = "fill") +
  theme_few() +
  labs(x = "Year",
       y = "Percent",
       title = "Breakdown of Asian Races 1910-1960") +
  scale_fill_brewer(name = "Race",
                    labels = c("Asian Indian", "Chinese", "Filipino", "Hawaiian", "Japanese", "Korean"),
                    palette = 'Set2', guide = guide_legend(reverse = TRUE)) +
  scale_y_continuous(labels = percent_format())


print(plot1)
ggsave(file="breakdowns.png",dpi=300)

## Asian races as a % total population
h <- summarise(group_by(asian_data_final, YEAR), percpop = sum(Percent))

## Create graph of % of the U.S. population that is Asian over time
plot2 <- ggplot(h, aes(x = YEAR, y = percpop)) +
  geom_bar(stat = 'identity') +
  labs(x = "Year",
       y = "Percent Asian",
       title = "Asian Population as a Percent\nof U.S. Population, by Year") +
  theme_few()

print(plot2)
ggsave(file="trends.png",dpi=300)


plot3 <- ggplot(asian_data_final, aes(x = YEAR, y = People)) +
  geom_bar(stat = 'identity') +
  theme_few() +
  labs(x = "Year",
       y = "Number of Asian People (in thousands)",
       title = "Number of People in U.S. Census\nAsian Races 1910-1960") +
  scale_fill_brewer(name = "Race",
                    labels = c("Asian Indian", "Chinese", "Filipino", "Hawaiian", "Japanese", "Korean"),
                    palette = 'Set2', guide = guide_legend(reverse = TRUE))


print(plot3)
ggsave(file="numbertrends.png",dpi=300)

dev.off()
	## Caroline Allan
	## U.S. History Through Census Data
	## Project 1
	## Asian Races in the U.S. 1900-1960
	## January 18, 2016

	rm(list = ls())

	# load packages
	library(readr)
	library(dplyr)
	library(ggplot2)
	library(scales)
	library(grid)
	library(ggthemes)
	library(RColorBrewer)

	## Set Working Directory
	setwd("~/Desktop/HistThroughCensusData/Project1")

	## Read in data
	data_raw <- read.csv('usa_00003.csv', stringsAsFactors = FALSE)
	raced_raw <- read.csv('raced_data.csv', stringsAsFactors = FALSE)

	## Remove Alaska, Hawaii, and oversease military
	a <- filter(data_raw, STATEFIP < 60 & (YEAR >= 1960 \| !(STATEFIP %in% c(2, 15))))

	## Seclect necessary variables
	b <- select(a, YEAR, PERWT, RACED, RACE)

	## Add race labels
	c <- mutate(b, RACE = factor(RACE, labels = c('white', 'black', 'AIAN', 'Chinese',
	'Japanese', 'OAPI', 'other')))

	## Merge with RACED labels
	d <- merge(c, raced_raw, by = 'RACED', all.x = TRUE)


	## Dataset with number in each race each year
	e <- summarise(group_by(d, YEAR, label, RACE), NUMBER = sum(PERWT));

	## Dataset with total number in each year
	f <- summarise(group_by(e, YEAR), TOTALPOP = sum(NUMBER))

	## Merged dataset
	g <- merge(e, f, by = 'YEAR', all.x = TRUE)

	## Dataset with only Asian races
	asian_data_final <- filter(g, RACE %in% c('Chinese',
	'Japanese', 'OAPI')) %>%
	mutate(People = NUMBER/1000) %>%
	mutate(Percent = NUMBER/TOTALPOP*100)

	## Create graph of Asian race breakdown in U.S.
	plot1 <- ggplot(asian_data_final, aes(x = YEAR, y = People, fill = label)) +
	geom_bar(stat = 'identity', position = "fill") +
	theme_few() +
	labs(x = "Year",
	y = "Percent",
	title = "Breakdown of Asian Races 1910-1960") +
	scale_fill_brewer(name = "Race",
	labels = c("Asian Indian", "Chinese", "Filipino", "Hawaiian", "Japanese", "Korean"),
	palette = 'Set2', guide = guide_legend(reverse = TRUE)) +
	scale_y_continuous(labels = percent_format())


	print(plot1)
	ggsave(file="breakdowns.png",dpi=300)

	## Asian races as a % total population
	h <- summarise(group_by(asian_data_final, YEAR), percpop = sum(Percent))

	## Create graph of % of the U.S. population that is Asian over time
	plot2 <- ggplot(h, aes(x = YEAR, y = percpop)) +
	geom_bar(stat = 'identity') +
	labs(x = "Year",
	y = "Percent Asian",
	title = "Asian Population as a Percent\nof U.S. Population, by Year") +
	theme_few()

	print(plot2)
	ggsave(file="trends.png",dpi=300)


	plot3 <- ggplot(asian_data_final, aes(x = YEAR, y = People)) +
	geom_bar(stat = 'identity') +
	theme_few() +
	labs(x = "Year",
	y = "Number of Asian People (in thousands)",
	title = "Number of People in U.S. Census\nAsian Races 1910-1960") +
	scale_fill_brewer(name = "Race",
	labels = c("Asian Indian", "Chinese", "Filipino", "Hawaiian", "Japanese", "Korean"),
	palette = 'Set2', guide = guide_legend(reverse = TRUE))


	print(plot3)
	ggsave(file="numbertrends.png",dpi=300)

	dev.off()