Last active
January 21, 2016 20:55
-
-
Save carolineallan/8936e87616ac881e9854 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Caroline Allan | |
## U.S. History Through Census Data | |
## Project 1 | |
## Asian Races in the U.S. 1900-1960 | |
## January 18, 2016 | |
rm(list = ls()) | |
# load packages | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(scales) | |
library(grid) | |
library(ggthemes) | |
library(RColorBrewer) | |
## Set Working Directory | |
setwd("~/Desktop/HistThroughCensusData/Project1") | |
## Read in data | |
data_raw <- read.csv('usa_00003.csv', stringsAsFactors = FALSE) | |
raced_raw <- read.csv('raced_data.csv', stringsAsFactors = FALSE) | |
## Remove Alaska, Hawaii, and oversease military | |
a <- filter(data_raw, STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2, 15)))) | |
## Seclect necessary variables | |
b <- select(a, YEAR, PERWT, RACED, RACE) | |
## Add race labels | |
c <- mutate(b, RACE = factor(RACE, labels = c('white', 'black', 'AIAN', 'Chinese', | |
'Japanese', 'OAPI', 'other'))) | |
## Merge with RACED labels | |
d <- merge(c, raced_raw, by = 'RACED', all.x = TRUE) | |
## Dataset with number in each race each year | |
e <- summarise(group_by(d, YEAR, label, RACE), NUMBER = sum(PERWT)); | |
## Dataset with total number in each year | |
f <- summarise(group_by(e, YEAR), TOTALPOP = sum(NUMBER)) | |
## Merged dataset | |
g <- merge(e, f, by = 'YEAR', all.x = TRUE) | |
## Dataset with only Asian races | |
asian_data_final <- filter(g, RACE %in% c('Chinese', | |
'Japanese', 'OAPI')) %>% | |
mutate(People = NUMBER/1000) %>% | |
mutate(Percent = NUMBER/TOTALPOP*100) | |
## Create graph of Asian race breakdown in U.S. | |
plot1 <- ggplot(asian_data_final, aes(x = YEAR, y = People, fill = label)) + | |
geom_bar(stat = 'identity', position = "fill") + | |
theme_few() + | |
labs(x = "Year", | |
y = "Percent", | |
title = "Breakdown of Asian Races 1910-1960") + | |
scale_fill_brewer(name = "Race", | |
labels = c("Asian Indian", "Chinese", "Filipino", "Hawaiian", "Japanese", "Korean"), | |
palette = 'Set2', guide = guide_legend(reverse = TRUE)) + | |
scale_y_continuous(labels = percent_format()) | |
print(plot1) | |
ggsave(file="breakdowns.png",dpi=300) | |
## Asian races as a % total population | |
h <- summarise(group_by(asian_data_final, YEAR), percpop = sum(Percent)) | |
## Create graph of % of the U.S. population that is Asian over time | |
plot2 <- ggplot(h, aes(x = YEAR, y = percpop)) + | |
geom_bar(stat = 'identity') + | |
labs(x = "Year", | |
y = "Percent Asian", | |
title = "Asian Population as a Percent\nof U.S. Population, by Year") + | |
theme_few() | |
print(plot2) | |
ggsave(file="trends.png",dpi=300) | |
plot3 <- ggplot(asian_data_final, aes(x = YEAR, y = People)) + | |
geom_bar(stat = 'identity') + | |
theme_few() + | |
labs(x = "Year", | |
y = "Number of Asian People (in thousands)", | |
title = "Number of People in U.S. Census\nAsian Races 1910-1960") + | |
scale_fill_brewer(name = "Race", | |
labels = c("Asian Indian", "Chinese", "Filipino", "Hawaiian", "Japanese", "Korean"), | |
palette = 'Set2', guide = guide_legend(reverse = TRUE)) | |
print(plot3) | |
ggsave(file="numbertrends.png",dpi=300) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment