Last active
February 13, 2016 23:02
-
-
Save jmcinerney14/7ad55f1be385d71b7300 to your computer and use it in GitHub Desktop.
Asian Population
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#load packages | |
library(ggplot2) | |
library(RColorBrewer) | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(scales) | |
#Read in IPUMS data | |
a <- read_csv('Asian4.csv') | |
raced<-read_csv("RACED.csv") | |
#Remove data from Alaska and Hawaii before 1960 | |
b <- filter(a,STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2,15)))) | |
#New data set that only includes YEAR, PERWT, and RACED | |
c <-select(b,YEAR,PERWT,RACED) | |
#Sum PERWT to find population with regard to each unique combination of YEAR and Region | |
d <- summarise(group_by(c,YEAR,RACED),NUMBER=sum(PERWT)) | |
#Sum PERWT to find population with regard to each unique combination of YEAR | |
e <- summarise(group_by(c,YEAR),TOTAL=sum(PERWT)) | |
#Merge the data from data set d and e according to YEAR | |
f <-merge(d,e,by='YEAR') | |
#Remove all rows with RACED values less or equal to 400 or greater than 699 | |
g <-filter(f,RACED >=400 &RACED<699) | |
#Assign these titles to the specified numbers | |
Chinese<-c(400) | |
A1990<-c(410,640,660:670) | |
Japanese<-c(500) | |
Hawaiian<-c(630:632) | |
Pacific_Islanders<-c(680:699) | |
A1930<-c(600:620) | |
#create a new variable RCAT, it's value is the previously assigned titles according to the RACED variable | |
h<-mutate(g,RCAT=ifelse(RACED %in% Chinese,"E", | |
ifelse(RACED %in% A1990, "F", | |
ifelse(RACED %in% Japanese, "C", | |
ifelse(RACED %in% Hawaiian, "D", | |
ifelse(RACED %in% Pacific_Islanders, "A", | |
ifelse(RACED %in% A1930, "G","B"))))))) | |
head(h) | |
#organize the data according to the column YEAR | |
h<-arrange(h,YEAR) | |
head(h) | |
#Sum PERWT to label new variable NUMBER with regard to each unique combination of YEAR and RCAT | |
i <- summarise(group_by(h,YEAR,RCAT),NUMBER=sum(NUMBER)) | |
#Combine data sets i and e according to YEAR | |
j<-merge(i,e,by='YEAR') | |
#organize data by each unique year into a table | |
years <- unique(j$YEAR) | |
for(i in 1:length(years)) { | |
print(filter(j,YEAR==years[i])) | |
} | |
#Print a ggplot with Years on the x axis and population on the y axis, label bars according to RCAT | |
k <- ggplot(j,aes(x=YEAR,y=NUMBER,fill=RCAT)) + geom_bar(stat='identity') | |
print(k) | |
#Use the color pallete Set1 | |
l <- k + scale_fill_brewer(palette="Dark2") | |
print(l) | |
#Reverse the order of the legend, label axis, change background color | |
m <- l + guides(fill = guide_legend(reverse = TRUE,title='Race Category'))+ | |
labs(title='US Population by Race Categories', x="Year", y='Population')+ | |
scale_x_continuous(breaks=c(1880,1910,1930,1960,1990))+ | |
scale_y_continuous(labels=comma)+ | |
theme_bw() | |
print(m) | |
#print the total population line above the bars | |
n <- m + geom_line(data=j[j$RCAT=='E',],aes(y=TOTAL)) | |
print(n) | |
#Create a graph of RCAT with Year on the X axis and the percentage of the total population on the Y axis | |
p <-ggplot(j,aes(x=YEAR,y=NUMBER/TOTAL,fill=RCAT)) + | |
geom_bar(stat='identity') + | |
scale_y_continuous(labels = scales::percent) + | |
scale_fill_brewer(palette="Dark2") + | |
guides(fill = guide_legend(reverse = TRUE,title='Race Category')) + | |
labs(title='The Percentage of the Total US Population by Race Categories', x="Year", y='Percentage of the total Population') + | |
scale_x_continuous(breaks=c(1880,1910,1930,1960,1990)) +theme_bw() | |
print(p) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment