Skip to content

Instantly share code, notes, and snippets.

@jmcinerney14
Last active February 13, 2016 23:02
Show Gist options
  • Save jmcinerney14/7ad55f1be385d71b7300 to your computer and use it in GitHub Desktop.
Save jmcinerney14/7ad55f1be385d71b7300 to your computer and use it in GitHub Desktop.
Asian Population
#load packages
library(ggplot2)
library(RColorBrewer)
library(readr)
library(dplyr)
library(ggplot2)
library(scales)
#Read in IPUMS data
a <- read_csv('Asian4.csv')
raced<-read_csv("RACED.csv")
#Remove data from Alaska and Hawaii before 1960
b <- filter(a,STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2,15))))
#New data set that only includes YEAR, PERWT, and RACED
c <-select(b,YEAR,PERWT,RACED)
#Sum PERWT to find population with regard to each unique combination of YEAR and Region
d <- summarise(group_by(c,YEAR,RACED),NUMBER=sum(PERWT))
#Sum PERWT to find population with regard to each unique combination of YEAR
e <- summarise(group_by(c,YEAR),TOTAL=sum(PERWT))
#Merge the data from data set d and e according to YEAR
f <-merge(d,e,by='YEAR')
#Remove all rows with RACED values less or equal to 400 or greater than 699
g <-filter(f,RACED >=400 &RACED<699)
#Assign these titles to the specified numbers
Chinese<-c(400)
A1990<-c(410,640,660:670)
Japanese<-c(500)
Hawaiian<-c(630:632)
Pacific_Islanders<-c(680:699)
A1930<-c(600:620)
#create a new variable RCAT, it's value is the previously assigned titles according to the RACED variable
h<-mutate(g,RCAT=ifelse(RACED %in% Chinese,"E",
ifelse(RACED %in% A1990, "F",
ifelse(RACED %in% Japanese, "C",
ifelse(RACED %in% Hawaiian, "D",
ifelse(RACED %in% Pacific_Islanders, "A",
ifelse(RACED %in% A1930, "G","B")))))))
head(h)
#organize the data according to the column YEAR
h<-arrange(h,YEAR)
head(h)
#Sum PERWT to label new variable NUMBER with regard to each unique combination of YEAR and RCAT
i <- summarise(group_by(h,YEAR,RCAT),NUMBER=sum(NUMBER))
#Combine data sets i and e according to YEAR
j<-merge(i,e,by='YEAR')
#organize data by each unique year into a table
years <- unique(j$YEAR)
for(i in 1:length(years)) {
print(filter(j,YEAR==years[i]))
}
#Print a ggplot with Years on the x axis and population on the y axis, label bars according to RCAT
k <- ggplot(j,aes(x=YEAR,y=NUMBER,fill=RCAT)) + geom_bar(stat='identity')
print(k)
#Use the color pallete Set1
l <- k + scale_fill_brewer(palette="Dark2")
print(l)
#Reverse the order of the legend, label axis, change background color
m <- l + guides(fill = guide_legend(reverse = TRUE,title='Race Category'))+
labs(title='US Population by Race Categories', x="Year", y='Population')+
scale_x_continuous(breaks=c(1880,1910,1930,1960,1990))+
scale_y_continuous(labels=comma)+
theme_bw()
print(m)
#print the total population line above the bars
n <- m + geom_line(data=j[j$RCAT=='E',],aes(y=TOTAL))
print(n)
#Create a graph of RCAT with Year on the X axis and the percentage of the total population on the Y axis
p <-ggplot(j,aes(x=YEAR,y=NUMBER/TOTAL,fill=RCAT)) +
geom_bar(stat='identity') +
scale_y_continuous(labels = scales::percent) +
scale_fill_brewer(palette="Dark2") +
guides(fill = guide_legend(reverse = TRUE,title='Race Category')) +
labs(title='The Percentage of the Total US Population by Race Categories', x="Year", y='Percentage of the total Population') +
scale_x_continuous(breaks=c(1880,1910,1930,1960,1990)) +theme_bw()
print(p)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment