Skip to content

Instantly share code, notes, and snippets.

@Jisoosong2017
Created November 4, 2016 22:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Jisoosong2017/e12b4571ca48ddff6aa0123264fe762d to your computer and use it in GitHub Desktop.
Save Jisoosong2017/e12b4571ca48ddff6aa0123264fe762d to your computer and use it in GitHub Desktop.
Ji Soo Song - Final Project
#Activate appropriate packages.
library(readr)
library(tidyr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(maptools)
library(gtools)
library(ggmap)
#Read in IPUMS data.
Data<-read_csv('./Methods.csv',col_types = cols(PERWT=col_double()))
#Read in map data to be used later.
MapData<-read_csv('./map.csv')
#Preparing the Data
#Isolate East and Southeast Asians 25 and older.
Isolated<-Data%>%filter(RACED%in%c(400,410,420,500,620,640,660,661,662,663)&AGE>=25&YEAR%in%c(1940,1960,1980,2000))
#Add ethnicity/nationality labels.
Eth<-Isolated%>%mutate(Ethnicity_or_Nationality=ifelse(RACED==400, 'Chinese',
ifelse(RACED==410, 'Taiwanese',
ifelse(RACED==420, 'Chinese and Taiwanese',
ifelse(RACED==500, 'Japanese',
ifelse(RACED==620, 'Korean',
ifelse(RACED==640, 'Vietnamese',
ifelse(RACED==660, 'Cambodian',
ifelse(RACED==661, 'Hmong',
ifelse(RACED==662, 'Laotian',
ifelse(RACED==663, 'Thai','Other')))))))))))
#Categorieze each nationality/ethnicity by region.
Reg<-Eth%>%mutate(Region=ifelse(RACED%in%c(400,410,420,500,620),'East Asian','Southeast Asian'))
#Categorize educational attainment.
Ed<-Reg%>%mutate(Educational_Attainment=ifelse(EDUCD%in%c(999,000,001),'Not Applicable',
ifelse(EDUCD<=61,'Some K-12',
ifelse(EDUCD<=64,'High School Graduate',
ifelse(EDUCD%in%c(65,70,71,80,90,100,110,111,112,113),'Some College',
ifelse(EDUCD%in%c(81,82,83),'Associates Degree',
ifelse(EDUCD==101,'Bachelors Degree',
ifelse(EDUCD<=116,'Advanced Degree','Other'))))))))
#Adjust for inflation in income by converting past INCWAGE to value at year 2000.
#Multiplicative factors given by the Bureau of Labor Statistics.
AdjustedInc<-Ed%>%mutate(Adjusted_Income_Level=as.double(ifelse(YEAR==1940,INCWAGE*12.3,
ifelse(YEAR==1960,INCWAGE*5.82,
ifelse(YEAR==1980,INCWAGE*2.09,INCWAGE)))))
#Set maximum income level (necessary due to top-coding in the INCWAGE variable).
AdjustedInc<-AdjustedInc%>%mutate(AdjInc=ifelse(Adjusted_Income_Level>59941.99,59941.99,Adjusted_Income_Level))
#Categorize income levels.
Inc<-AdjustedInc%>%mutate(Income_Level=ifelse(AdjInc<10000,'$0-$9999',
ifelse(AdjInc<20000,'$10,000-$19,999',
ifelse(AdjInc<30000,'$20,000-$29,999',
ifelse(AdjInc<40000,'$30,000-$39,999',
ifelse(AdjInc<59000,'$40,000-$58,999','$59,000+'))))))
#Creating the Data Frame Necessary for Column Graphs
#Group the prepared data by year and state. Count the number of people in each category by PERWT.
grouped=Inc%>%group_by(YEAR,STATEFIP,Ethnicity_or_Nationality,Educational_Attainment,Income_Level,Region)%>%summarise(NUMBER=sum(PERWT))
#Arrange this grouped data by region.
grouped<-grouped%>%arrange(Region)
#Creating the Data Frame Necessary for Proportion Maps
#Group the preapred data by year, statem, region, and educational attainment.
#Count the number of people in each cateogry by PERWT.
grouped2=Inc%>%group_by(YEAR,STATEFIP,Region,Educational_Attainment)%>%summarise(NUMBER=sum(PERWT))
#Isolate East and Southeast Asians in 2000.
#Separately, isolate East and Southeast Asians in 2000 with bachelors and above.
E<-grouped2%>%filter(Region=='East Asian'&YEAR==2000)
EB<-grouped2%>%filter(Region=='East Asian'&YEAR==2000&Educational_Attainment%in%c('Bachelors Degree','Advanced Degree'))
SE<-grouped2%>%filter(Region=='Southeast Asian'&YEAR==2000)
SEB<-grouped2%>%filter(Region=='Southeast Asian'&YEAR==2000&Educational_Attainment%in%c('Bachelors Degree','Advanced Degree'))
#Calculating East Asian Proportions
#In the year 2000, what percentage of East Asians, out of all East Asians 25 and older, in a certain state have a bachelors or above?
#Calculate this information and add it to a growing vector.
ProportionE<-c()
for(i in 1:56)
{
#These STATEFIP IDs do not correspond to states and are thus skipped.
if(i%in%c(3,7,14,43,52))next
new<-filter(E,STATEFIP==i)
a<-sum(new$NUMBER)
newB<-filter(EB,STATEFIP==i)
b<-sum(newB$NUMBER)
PropE=round(b/a,digits=2)
ProportionE<-c(ProportionE,PropE)
}
#Southeast Asian Proportions
#In the year 2000, what percentage of Southeast Asians, out of all Southeast Asians 25 and older, in a certain state have a bachelors or above?
#Calculate this information and add it to a growing vector.
ProportionSE<-c()
for(i in 1:56)
{
#These STATEFIP IDs do not correspond to states and are thus skipped.
if(i%in%c(3,7,14,43,52))next
newC<-filter(SE,STATEFIP==i)
c<-sum(newC$NUMBER)
newD<-filter(SEB,STATEFIP==i)
d<-sum(newD$NUMBER)
PropSE=round(d/c,digits=2)
ProportionSE<-c(ProportionSE,PropSE)
}
#Read in this data frame, which includes all STATEFIP IDs.
States<-read_csv('./Book1.csv')
#Combine the two proportion vectors into a data frame. Add STATEFIP IDs to this data frame.
Discrepancies<-data.frame(States,ProportionE,ProportionSE)
#Column Graph 1, (Education aggregated for all Asians)
#Identify the axes and what to fill the columns with.
Aggregated<-ggplot(data=grouped,aes(x=Educational_Attainment,y=NUMBER)) +
#Identify the graph as a column graph, apply appropriate labels.
geom_bar(stat='identity') + labs(x='Educational Attainment',y='Number of Individuals',title='Educational Attainment of Asians in the United States 1940-2000')+
#Y axis is not in scientific notation.
scale_y_continuous(labels=scales::comma)+
#Adjust X axis with labels.
scale_x_discrete(limits=c('Not Applicable','Some K-12','High School Graduate','Some College','Associates Degree','Bachelors Degree','Advanced Degree'))+
#Apply appropriate colors.
scale_fill_brewer(palette = 'Set2')+
#Separate the graph by year and adjust the y-scale according to population size.
facet_wrap(~YEAR,ncol=2,scales='free_y')+
#Apply the black and white theme.
theme_bw()+
#Rotate the x-axis labels.
theme(axis.text.x = element_text(angle = 90, hjust = 1))
#Print the PNG file.
png('Aggregated.png',height=500,width=1000)
print(Aggregated)
dev.off()
#Column Graph 2, (Income aggregated for all Asians)
#Identify the axes and what to fill the columns with.
Aggregated1<-ggplot(data=grouped,aes(x=Income_Level,y=NUMBER)) +
#Identify the graph as a column graph, apply appropriate labels.
geom_bar(stat='identity') + labs(x='Income Level',y='Number of Individuals',title='Income Level of Asians in the United States 1940-2000')+
#Y axis is not in scientific notation.
scale_y_continuous(labels=scales::comma)+
#Apply appropriate colors.
scale_fill_brewer(palette = 'Set2')+
#Separate the graph by year and adjust the y-scale according to population size.
facet_wrap(~YEAR,ncol=2,scales='free_y')+
#Apply the black and white theme.
theme_bw()+
#Rotate the x-axis labels.
theme(axis.text.x = element_text(angle = 90, hjust = 1))
#Print the PNG file.
png('Aggregated1.png',height=500,width=1000)
print(Aggregated1)
dev.off()
#Column Graph 3, (Education disaggregated)
#Identify the axes and what to fill the columns with.
Disaggregated<-ggplot(data=grouped,aes(x=Educational_Attainment,y=NUMBER,fill=Region)) +
#Identify the graph as a column graph, apply appropriate labels.
geom_bar(stat='identity') + labs(x='Educational Attainment',y='Number of Individuals',fill='Region',title='Educational Attainment of Asians in the United States 1940-2000')+
#Y axis is not in scientific notation.
scale_y_continuous(labels=scales::comma)+
#Adjust X axis.
scale_x_discrete(limits=c('Not Applicable','Some K-12','High School Graduate','Some College','Associates Degree','Bachelors Degree','Advanced Degree'))+
#Apply appropriate colors.
scale_fill_brewer(palette = 'Set2')+
#Separate the graph by year and adjust the y-scale according to population size.
facet_wrap(~YEAR,ncol=2,scales='free_y')+
#Apply the black and white theme.
theme_bw()+
#Rotate the x-axis labels.
theme(axis.text.x = element_text(angle = 90, hjust = 1))
#Print the PNG file.
png('Disaggregated.png',height=500,width=1000)
print(Disaggregated)
dev.off()
#Column Graph 4, (Income disaggregated)
#Identify the axes and what to fill the columns with.
Disaggregated1<-ggplot(data=grouped,aes(x=Income_Level,y=NUMBER,fill=Region)) +
#Identify the graph as a column graph, apply appropriate labels.
geom_bar(stat='identity') + labs(x='Income Level',y='Number of Individuals',fill='Region',title='Income Level of Asians in the United States 1940-2000')+
#Y axis is not in scientific notation.
scale_y_continuous(labels=scales::comma)+
#Apply appropriate colors.
scale_fill_brewer(palette = 'Set2')+
#Separate the graph by year and adjust the y-scale according to population size.
facet_wrap(~YEAR,ncol=2,scales='free_y')+
#Apply the black and white theme.
theme_bw()+
#Rotate x-axis labels.
theme(axis.text.x = element_text(angle = 90, hjust = 1))
#Print the PNG file.
png('Disaggregated1.png',height=500,width=1000)
print(Disaggregated1)
dev.off()
#Proportion Maps
#Change STATEFIP in MapData into integers.
newmap=MapData%>%mutate(STATEI=as.integer(STATEFIP))
#Leftjoin Mapdata to the discrepancies data frame by matching up states.
joined<-left_join(Discrepancies,newmap,by=c('STATEFIP'='STATEI'))
joined<-joined%>%arrange(order)
#Add a factor variable that will categorize the East Asian proportions.
joinedcats<-joined%>% mutate(cat=factor(ifelse(ProportionE<0.1,1,
ifelse(ProportionE<0.2,2,
ifelse(ProportionE<0.3,3,
ifelse(ProportionE<0.4,4,
ifelse(ProportionE<0.5,5,6)))))))
#Attach these character variables to the above-defined factor variables.
levels(joinedcats$cat)<-c('0%-9%','10%-19%','20%-29%','30%-39%','40%-49%','50%+')
#East Asian Bachelors or Above Proportion Map
#Draw the blank map.
map4<-ggplot()+theme_nothing(legend=TRUE)+
geom_polygon(data=MapData, aes(x=long,y=lat,group=group), fill='white',color='black')+
#Use the Blues palette, which has various shades of blue.
scale_fill_brewer(palette='Blues') +
#Fill the map with proportion categories.
geom_polygon(data=joinedcats,aes(x=long,y=lat,group=group,fill=cat),color='black')+
#Give title for map and legend.
labs(title=paste('Proportion of East Asians 25 or Older with a Bachelors or Above in 2000'),fill='Proportion')
#Print the PNG file.
png(paste('map4.png'),width=1500,height=1000)
print(map4)
dev.off()
#Add a factor variable that will categorize the Southeast Asian proportions.
joinedcats1<-joined%>% mutate(cat1=factor(ifelse(ProportionSE<0.1,1,
ifelse(ProportionSE<0.2,2,
ifelse(ProportionSE<0.3,3,
ifelse(ProportionSE<0.4,4,
ifelse(ProportionSE<0.5,5,6)))))))
#Attach these character variables to the above-defined factor variables.
levels(joinedcats1$cat1)<-c('0%-9%','10%-19%','20%-29%','30%-39%','40%-49%','50%+')
#Southeast Asian Bachelors or Above Proportion Map
#Draw the blank map.
map8<-ggplot()+theme_nothing(legend=TRUE)+
geom_polygon(data=MapData, aes(x=long,y=lat,group=group), fill='white',color='black')+
#Use the Blues palette, which has various shades of blue.
scale_fill_brewer(palette='Blues') +
#Fill the map with proportion categories.
geom_polygon(data=joinedcats1,aes(x=long,y=lat,group=group,fill=cat1),color='black')+
#Give title for map and legend.
labs(title=paste('Proportion of Southeast Asians 25 or Older with a Bachelors or Above in 2000'),fill='Proportion')
#Print the PNG file.
png(paste('map8.png'),width=1500,height=1000)
print(map8)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment