Created
November 4, 2016 22:28
-
-
Save Jisoosong2017/e12b4571ca48ddff6aa0123264fe762d to your computer and use it in GitHub Desktop.
Ji Soo Song - Final Project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Activate appropriate packages. | |
library(readr) | |
library(tidyr) | |
library(dplyr) | |
library(ggplot2) | |
library(RColorBrewer) | |
library(maptools) | |
library(gtools) | |
library(ggmap) | |
#Read in IPUMS data. | |
Data<-read_csv('./Methods.csv',col_types = cols(PERWT=col_double())) | |
#Read in map data to be used later. | |
MapData<-read_csv('./map.csv') | |
#Preparing the Data | |
#Isolate East and Southeast Asians 25 and older. | |
Isolated<-Data%>%filter(RACED%in%c(400,410,420,500,620,640,660,661,662,663)&AGE>=25&YEAR%in%c(1940,1960,1980,2000)) | |
#Add ethnicity/nationality labels. | |
Eth<-Isolated%>%mutate(Ethnicity_or_Nationality=ifelse(RACED==400, 'Chinese', | |
ifelse(RACED==410, 'Taiwanese', | |
ifelse(RACED==420, 'Chinese and Taiwanese', | |
ifelse(RACED==500, 'Japanese', | |
ifelse(RACED==620, 'Korean', | |
ifelse(RACED==640, 'Vietnamese', | |
ifelse(RACED==660, 'Cambodian', | |
ifelse(RACED==661, 'Hmong', | |
ifelse(RACED==662, 'Laotian', | |
ifelse(RACED==663, 'Thai','Other'))))))))))) | |
#Categorieze each nationality/ethnicity by region. | |
Reg<-Eth%>%mutate(Region=ifelse(RACED%in%c(400,410,420,500,620),'East Asian','Southeast Asian')) | |
#Categorize educational attainment. | |
Ed<-Reg%>%mutate(Educational_Attainment=ifelse(EDUCD%in%c(999,000,001),'Not Applicable', | |
ifelse(EDUCD<=61,'Some K-12', | |
ifelse(EDUCD<=64,'High School Graduate', | |
ifelse(EDUCD%in%c(65,70,71,80,90,100,110,111,112,113),'Some College', | |
ifelse(EDUCD%in%c(81,82,83),'Associates Degree', | |
ifelse(EDUCD==101,'Bachelors Degree', | |
ifelse(EDUCD<=116,'Advanced Degree','Other')))))))) | |
#Adjust for inflation in income by converting past INCWAGE to value at year 2000. | |
#Multiplicative factors given by the Bureau of Labor Statistics. | |
AdjustedInc<-Ed%>%mutate(Adjusted_Income_Level=as.double(ifelse(YEAR==1940,INCWAGE*12.3, | |
ifelse(YEAR==1960,INCWAGE*5.82, | |
ifelse(YEAR==1980,INCWAGE*2.09,INCWAGE))))) | |
#Set maximum income level (necessary due to top-coding in the INCWAGE variable). | |
AdjustedInc<-AdjustedInc%>%mutate(AdjInc=ifelse(Adjusted_Income_Level>59941.99,59941.99,Adjusted_Income_Level)) | |
#Categorize income levels. | |
Inc<-AdjustedInc%>%mutate(Income_Level=ifelse(AdjInc<10000,'$0-$9999', | |
ifelse(AdjInc<20000,'$10,000-$19,999', | |
ifelse(AdjInc<30000,'$20,000-$29,999', | |
ifelse(AdjInc<40000,'$30,000-$39,999', | |
ifelse(AdjInc<59000,'$40,000-$58,999','$59,000+')))))) | |
#Creating the Data Frame Necessary for Column Graphs | |
#Group the prepared data by year and state. Count the number of people in each category by PERWT. | |
grouped=Inc%>%group_by(YEAR,STATEFIP,Ethnicity_or_Nationality,Educational_Attainment,Income_Level,Region)%>%summarise(NUMBER=sum(PERWT)) | |
#Arrange this grouped data by region. | |
grouped<-grouped%>%arrange(Region) | |
#Creating the Data Frame Necessary for Proportion Maps | |
#Group the preapred data by year, statem, region, and educational attainment. | |
#Count the number of people in each cateogry by PERWT. | |
grouped2=Inc%>%group_by(YEAR,STATEFIP,Region,Educational_Attainment)%>%summarise(NUMBER=sum(PERWT)) | |
#Isolate East and Southeast Asians in 2000. | |
#Separately, isolate East and Southeast Asians in 2000 with bachelors and above. | |
E<-grouped2%>%filter(Region=='East Asian'&YEAR==2000) | |
EB<-grouped2%>%filter(Region=='East Asian'&YEAR==2000&Educational_Attainment%in%c('Bachelors Degree','Advanced Degree')) | |
SE<-grouped2%>%filter(Region=='Southeast Asian'&YEAR==2000) | |
SEB<-grouped2%>%filter(Region=='Southeast Asian'&YEAR==2000&Educational_Attainment%in%c('Bachelors Degree','Advanced Degree')) | |
#Calculating East Asian Proportions | |
#In the year 2000, what percentage of East Asians, out of all East Asians 25 and older, in a certain state have a bachelors or above? | |
#Calculate this information and add it to a growing vector. | |
ProportionE<-c() | |
for(i in 1:56) | |
{ | |
#These STATEFIP IDs do not correspond to states and are thus skipped. | |
if(i%in%c(3,7,14,43,52))next | |
new<-filter(E,STATEFIP==i) | |
a<-sum(new$NUMBER) | |
newB<-filter(EB,STATEFIP==i) | |
b<-sum(newB$NUMBER) | |
PropE=round(b/a,digits=2) | |
ProportionE<-c(ProportionE,PropE) | |
} | |
#Southeast Asian Proportions | |
#In the year 2000, what percentage of Southeast Asians, out of all Southeast Asians 25 and older, in a certain state have a bachelors or above? | |
#Calculate this information and add it to a growing vector. | |
ProportionSE<-c() | |
for(i in 1:56) | |
{ | |
#These STATEFIP IDs do not correspond to states and are thus skipped. | |
if(i%in%c(3,7,14,43,52))next | |
newC<-filter(SE,STATEFIP==i) | |
c<-sum(newC$NUMBER) | |
newD<-filter(SEB,STATEFIP==i) | |
d<-sum(newD$NUMBER) | |
PropSE=round(d/c,digits=2) | |
ProportionSE<-c(ProportionSE,PropSE) | |
} | |
#Read in this data frame, which includes all STATEFIP IDs. | |
States<-read_csv('./Book1.csv') | |
#Combine the two proportion vectors into a data frame. Add STATEFIP IDs to this data frame. | |
Discrepancies<-data.frame(States,ProportionE,ProportionSE) | |
#Column Graph 1, (Education aggregated for all Asians) | |
#Identify the axes and what to fill the columns with. | |
Aggregated<-ggplot(data=grouped,aes(x=Educational_Attainment,y=NUMBER)) + | |
#Identify the graph as a column graph, apply appropriate labels. | |
geom_bar(stat='identity') + labs(x='Educational Attainment',y='Number of Individuals',title='Educational Attainment of Asians in the United States 1940-2000')+ | |
#Y axis is not in scientific notation. | |
scale_y_continuous(labels=scales::comma)+ | |
#Adjust X axis with labels. | |
scale_x_discrete(limits=c('Not Applicable','Some K-12','High School Graduate','Some College','Associates Degree','Bachelors Degree','Advanced Degree'))+ | |
#Apply appropriate colors. | |
scale_fill_brewer(palette = 'Set2')+ | |
#Separate the graph by year and adjust the y-scale according to population size. | |
facet_wrap(~YEAR,ncol=2,scales='free_y')+ | |
#Apply the black and white theme. | |
theme_bw()+ | |
#Rotate the x-axis labels. | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) | |
#Print the PNG file. | |
png('Aggregated.png',height=500,width=1000) | |
print(Aggregated) | |
dev.off() | |
#Column Graph 2, (Income aggregated for all Asians) | |
#Identify the axes and what to fill the columns with. | |
Aggregated1<-ggplot(data=grouped,aes(x=Income_Level,y=NUMBER)) + | |
#Identify the graph as a column graph, apply appropriate labels. | |
geom_bar(stat='identity') + labs(x='Income Level',y='Number of Individuals',title='Income Level of Asians in the United States 1940-2000')+ | |
#Y axis is not in scientific notation. | |
scale_y_continuous(labels=scales::comma)+ | |
#Apply appropriate colors. | |
scale_fill_brewer(palette = 'Set2')+ | |
#Separate the graph by year and adjust the y-scale according to population size. | |
facet_wrap(~YEAR,ncol=2,scales='free_y')+ | |
#Apply the black and white theme. | |
theme_bw()+ | |
#Rotate the x-axis labels. | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) | |
#Print the PNG file. | |
png('Aggregated1.png',height=500,width=1000) | |
print(Aggregated1) | |
dev.off() | |
#Column Graph 3, (Education disaggregated) | |
#Identify the axes and what to fill the columns with. | |
Disaggregated<-ggplot(data=grouped,aes(x=Educational_Attainment,y=NUMBER,fill=Region)) + | |
#Identify the graph as a column graph, apply appropriate labels. | |
geom_bar(stat='identity') + labs(x='Educational Attainment',y='Number of Individuals',fill='Region',title='Educational Attainment of Asians in the United States 1940-2000')+ | |
#Y axis is not in scientific notation. | |
scale_y_continuous(labels=scales::comma)+ | |
#Adjust X axis. | |
scale_x_discrete(limits=c('Not Applicable','Some K-12','High School Graduate','Some College','Associates Degree','Bachelors Degree','Advanced Degree'))+ | |
#Apply appropriate colors. | |
scale_fill_brewer(palette = 'Set2')+ | |
#Separate the graph by year and adjust the y-scale according to population size. | |
facet_wrap(~YEAR,ncol=2,scales='free_y')+ | |
#Apply the black and white theme. | |
theme_bw()+ | |
#Rotate the x-axis labels. | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) | |
#Print the PNG file. | |
png('Disaggregated.png',height=500,width=1000) | |
print(Disaggregated) | |
dev.off() | |
#Column Graph 4, (Income disaggregated) | |
#Identify the axes and what to fill the columns with. | |
Disaggregated1<-ggplot(data=grouped,aes(x=Income_Level,y=NUMBER,fill=Region)) + | |
#Identify the graph as a column graph, apply appropriate labels. | |
geom_bar(stat='identity') + labs(x='Income Level',y='Number of Individuals',fill='Region',title='Income Level of Asians in the United States 1940-2000')+ | |
#Y axis is not in scientific notation. | |
scale_y_continuous(labels=scales::comma)+ | |
#Apply appropriate colors. | |
scale_fill_brewer(palette = 'Set2')+ | |
#Separate the graph by year and adjust the y-scale according to population size. | |
facet_wrap(~YEAR,ncol=2,scales='free_y')+ | |
#Apply the black and white theme. | |
theme_bw()+ | |
#Rotate x-axis labels. | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) | |
#Print the PNG file. | |
png('Disaggregated1.png',height=500,width=1000) | |
print(Disaggregated1) | |
dev.off() | |
#Proportion Maps | |
#Change STATEFIP in MapData into integers. | |
newmap=MapData%>%mutate(STATEI=as.integer(STATEFIP)) | |
#Leftjoin Mapdata to the discrepancies data frame by matching up states. | |
joined<-left_join(Discrepancies,newmap,by=c('STATEFIP'='STATEI')) | |
joined<-joined%>%arrange(order) | |
#Add a factor variable that will categorize the East Asian proportions. | |
joinedcats<-joined%>% mutate(cat=factor(ifelse(ProportionE<0.1,1, | |
ifelse(ProportionE<0.2,2, | |
ifelse(ProportionE<0.3,3, | |
ifelse(ProportionE<0.4,4, | |
ifelse(ProportionE<0.5,5,6))))))) | |
#Attach these character variables to the above-defined factor variables. | |
levels(joinedcats$cat)<-c('0%-9%','10%-19%','20%-29%','30%-39%','40%-49%','50%+') | |
#East Asian Bachelors or Above Proportion Map | |
#Draw the blank map. | |
map4<-ggplot()+theme_nothing(legend=TRUE)+ | |
geom_polygon(data=MapData, aes(x=long,y=lat,group=group), fill='white',color='black')+ | |
#Use the Blues palette, which has various shades of blue. | |
scale_fill_brewer(palette='Blues') + | |
#Fill the map with proportion categories. | |
geom_polygon(data=joinedcats,aes(x=long,y=lat,group=group,fill=cat),color='black')+ | |
#Give title for map and legend. | |
labs(title=paste('Proportion of East Asians 25 or Older with a Bachelors or Above in 2000'),fill='Proportion') | |
#Print the PNG file. | |
png(paste('map4.png'),width=1500,height=1000) | |
print(map4) | |
dev.off() | |
#Add a factor variable that will categorize the Southeast Asian proportions. | |
joinedcats1<-joined%>% mutate(cat1=factor(ifelse(ProportionSE<0.1,1, | |
ifelse(ProportionSE<0.2,2, | |
ifelse(ProportionSE<0.3,3, | |
ifelse(ProportionSE<0.4,4, | |
ifelse(ProportionSE<0.5,5,6))))))) | |
#Attach these character variables to the above-defined factor variables. | |
levels(joinedcats1$cat1)<-c('0%-9%','10%-19%','20%-29%','30%-39%','40%-49%','50%+') | |
#Southeast Asian Bachelors or Above Proportion Map | |
#Draw the blank map. | |
map8<-ggplot()+theme_nothing(legend=TRUE)+ | |
geom_polygon(data=MapData, aes(x=long,y=lat,group=group), fill='white',color='black')+ | |
#Use the Blues palette, which has various shades of blue. | |
scale_fill_brewer(palette='Blues') + | |
#Fill the map with proportion categories. | |
geom_polygon(data=joinedcats1,aes(x=long,y=lat,group=group,fill=cat1),color='black')+ | |
#Give title for map and legend. | |
labs(title=paste('Proportion of Southeast Asians 25 or Older with a Bachelors or Above in 2000'),fill='Proportion') | |
#Print the PNG file. | |
png(paste('map8.png'),width=1500,height=1000) | |
print(map8) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment