Skip to content

Instantly share code, notes, and snippets.

@carolineallan
Last active January 21, 2016 20:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save carolineallan/8936e87616ac881e9854 to your computer and use it in GitHub Desktop.
Save carolineallan/8936e87616ac881e9854 to your computer and use it in GitHub Desktop.
## Caroline Allan
## U.S. History Through Census Data
## Project 1
## Asian Races in the U.S. 1900-1960
## January 18, 2016
rm(list = ls())
# load packages
library(readr)
library(dplyr)
library(ggplot2)
library(scales)
library(grid)
library(ggthemes)
library(RColorBrewer)
## Set Working Directory
setwd("~/Desktop/HistThroughCensusData/Project1")
## Read in data
data_raw <- read.csv('usa_00003.csv', stringsAsFactors = FALSE)
raced_raw <- read.csv('raced_data.csv', stringsAsFactors = FALSE)
## Remove Alaska, Hawaii, and oversease military
a <- filter(data_raw, STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2, 15))))
## Seclect necessary variables
b <- select(a, YEAR, PERWT, RACED, RACE)
## Add race labels
c <- mutate(b, RACE = factor(RACE, labels = c('white', 'black', 'AIAN', 'Chinese',
'Japanese', 'OAPI', 'other')))
## Merge with RACED labels
d <- merge(c, raced_raw, by = 'RACED', all.x = TRUE)
## Dataset with number in each race each year
e <- summarise(group_by(d, YEAR, label, RACE), NUMBER = sum(PERWT));
## Dataset with total number in each year
f <- summarise(group_by(e, YEAR), TOTALPOP = sum(NUMBER))
## Merged dataset
g <- merge(e, f, by = 'YEAR', all.x = TRUE)
## Dataset with only Asian races
asian_data_final <- filter(g, RACE %in% c('Chinese',
'Japanese', 'OAPI')) %>%
mutate(People = NUMBER/1000) %>%
mutate(Percent = NUMBER/TOTALPOP*100)
## Create graph of Asian race breakdown in U.S.
plot1 <- ggplot(asian_data_final, aes(x = YEAR, y = People, fill = label)) +
geom_bar(stat = 'identity', position = "fill") +
theme_few() +
labs(x = "Year",
y = "Percent",
title = "Breakdown of Asian Races 1910-1960") +
scale_fill_brewer(name = "Race",
labels = c("Asian Indian", "Chinese", "Filipino", "Hawaiian", "Japanese", "Korean"),
palette = 'Set2', guide = guide_legend(reverse = TRUE)) +
scale_y_continuous(labels = percent_format())
print(plot1)
ggsave(file="breakdowns.png",dpi=300)
## Asian races as a % total population
h <- summarise(group_by(asian_data_final, YEAR), percpop = sum(Percent))
## Create graph of % of the U.S. population that is Asian over time
plot2 <- ggplot(h, aes(x = YEAR, y = percpop)) +
geom_bar(stat = 'identity') +
labs(x = "Year",
y = "Percent Asian",
title = "Asian Population as a Percent\nof U.S. Population, by Year") +
theme_few()
print(plot2)
ggsave(file="trends.png",dpi=300)
plot3 <- ggplot(asian_data_final, aes(x = YEAR, y = People)) +
geom_bar(stat = 'identity') +
theme_few() +
labs(x = "Year",
y = "Number of Asian People (in thousands)",
title = "Number of People in U.S. Census\nAsian Races 1910-1960") +
scale_fill_brewer(name = "Race",
labels = c("Asian Indian", "Chinese", "Filipino", "Hawaiian", "Japanese", "Korean"),
palette = 'Set2', guide = guide_legend(reverse = TRUE))
print(plot3)
ggsave(file="numbertrends.png",dpi=300)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment