Last active
November 8, 2016 02:14
-
-
Save TheInstantCrush/a9b1e4e77dfda97810c35cea23fdd3a3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Data visualizations | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(RColorBrewer) | |
library(scales) | |
library(ggmap) | |
library(maptools) | |
library(gtools) | |
#Read in data | |
ipums <- read_csv('data/raw.csv',col_types=cols(HHWT=col_double(),PERWT=col_double())) | |
#Exclude Alaska/Hawaii before 1960 | |
ipums <- ipums %>% filter(!(YEAR < 1960 & STATEFIP %in% c(2,15))) | |
ipumsx <- ipums | |
#Studying only Asians | |
ipums <- ipums %>% filter(RACESING==4) | |
#1. Spine graphs showing historical distribution of Asian-Americans by birthplace. | |
#Select relevant variables | |
ipums1 <- ipums %>% select(YEAR,PERWT,AGE,BPL) | |
#Create birthplace categories | |
ipums1 <- ipums1 %>% mutate(Birthplace = factor(ifelse(BPL==6,1, | |
ifelse(BPL==15,2, | |
ifelse(BPL<150,3, | |
ifelse(BPL==500,4, | |
ifelse(BPL==501,5, | |
ifelse(BPL==502,6, | |
ifelse(BPL==515,7, | |
ifelse(BPL==518,8, | |
ifelse(BPL %in% c(510:519),9, | |
ifelse(BPL==521,10,11)))))))))), | |
labels=c('California','Hawaii','Other US/Territories','China','Japan','Korea','Philippines', | |
'Vietnam','Other SE Asia','India','Other'))) | |
#Group by year and birthplace, calculate totals and percentages | |
a <- ipums1 %>% group_by(YEAR) %>% summarise(Number1=sum(PERWT)) | |
b <- ipums1 %>% group_by(YEAR,Birthplace) %>% summarise(Number2=sum(PERWT)) | |
c <- left_join(b,a) | |
c <- c %>% mutate(Pct=Number2/Number1*100) | |
#Graph | |
graph1 <- ggplot(c, aes(x=YEAR,y=Pct/100,fill=Birthplace)) + | |
geom_bar(stat='identity', aes(width=rescale(Number1,c(2,10)))) + | |
labs(x='Year',y='Percent of Population',fill='Birthplace', | |
title='Asian-American Population by Birthplace, 1940-2000') + | |
theme_bw() + | |
scale_x_continuous(breaks=c(1940,1950,1960,1970,1980,1990,2000)) + | |
scale_y_continuous(labels=scales::percent) + | |
guides(fill = guide_legend(reverse=TRUE)) + | |
scale_fill_brewer(palette='Set3') | |
png('graph1.png',width=1000,height=500) | |
print(graph1) | |
dev.off() | |
#2. Series of historical maps documenting historical Asian-American populations by state. | |
#Set up US Map | |
mapdata <- read_csv('data/map.csv') | |
map1 <- ggplot() + theme_nothing(legend=TRUE) + | |
geom_polygon(data=mapdata, aes(x=long,y=lat,group=group),fill='white',color='black') | |
png('map.png',width=1500,height=1000) | |
print(map1) | |
dev.off() | |
#Select relevant variables | |
ipums2 <- ipums %>% select(YEAR,PERWT,STATEFIP) | |
#Group by year and state | |
ipums2 <- ipums2 %>% group_by(YEAR,STATEFIP) %>% summarise(Number=sum(PERWT)) | |
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP)) | |
#Set up population buckets | |
cuts <- quantcut(ipums2$Number,q=seq(0,1,.2)) | |
cats <- ipums2 %>% mutate(Population=factor(ifelse(Number<2000,1, | |
ifelse(Number<4000,2, | |
ifelse(Number<15000,3, | |
ifelse(Number<60000,4,5)))))) | |
levels(cats$Population) <- c('1-1,999','2,000-3,999','4,000-14,999','15,000-59,999','60,000+') | |
ipumsmap <- left_join(cats,newmap,by=c('STATEFIP'='STATEI')) %>% arrange(order) | |
#Map for each year | |
for (year in unique(ipumsmap$YEAR)) { | |
map2 <- map1 + scale_fill_brewer(palette='Blues') + | |
theme_bw(base_size = 24) + | |
geom_polygon(data=filter(ipumsmap,YEAR==year),aes(x=long,y=lat,group=group,fill=Population),color='black') + | |
labs(title=paste('Asian American Populations By State,',year,sep=' ')) | |
png(paste('map_',year,'.png',sep=''),width=1500,height=1000) | |
print(map2) | |
dev.off() | |
} | |
#3. Column graphs showing historical occupational distribution of Asian-Americans, broken down by sex and birthplace. | |
#3a only Asian-Americans | |
#Select relevant variables, ages 15-65 only | |
ipums3 <- ipums %>% select(YEAR,PERWT,AGE,BPL,OCC1950,SEX) %>% filter(AGE>=15 & AGE <=65) | |
#Set up nationality, occupation, and sex categories | |
ipums3 <- ipums3 %>% mutate(Birthplace = factor(ifelse(BPL<150,1,2), | |
labels=c('Native-Born','Foreign-Born')), | |
Occupation = factor(ifelse(OCC1950 %in% c(980:999), 1, | |
ifelse(OCC1950 %in% c(100,123,810:840) , 2, | |
ifelse(OCC1950 %in% c(500:690,910:979), 3, | |
ifelse(OCC1950 %in% c(200:490), 4, | |
ifelse(OCC1950 %in% c(700:790), 5, | |
ifelse(OCC1950 %in% c(000:099), 6,7)))))), | |
labels=c('none','farmers/farm laborers','craftsmen/operatives/laborers', | |
'managerial/clerical/sales','service','professional')), | |
Sex = ifelse(SEX==1,'Male','Female')) | |
#Group by year, birthplace, occupation, sex | |
d <- ipums3 %>% group_by(YEAR,Birthplace,Occupation,Sex) %>% summarise(Number=sum(PERWT)) | |
#Graph | |
graph2 <- ggplot(data=d,aes(x=YEAR,y=Number,fill=Occupation)) + | |
geom_bar(stat='identity',position='fill') + | |
labs(x='Year',y='Percent of Population',fill='Occupation', | |
title='Occupation of Asian-Americans Aged 15-65 by Sex, Birthplace, and Year, 1940-2000') + | |
scale_y_continuous(labels=scales::percent) + | |
scale_x_continuous(breaks=c(1940,1950,1960,1970,1980,1990,2000)) + | |
scale_fill_brewer(palette='Set1') + | |
facet_grid(Sex~.~Birthplace) + | |
theme_bw() + theme(legend.position = 'bottom') | |
png('graph2.png',height=500,width=1000) | |
print(graph2) | |
dev.off() | |
#3b All Americans | |
#Select relevant variables, ages 15-65 only | |
ipums3x <- ipumsx %>% select(YEAR,PERWT,AGE,BPL,OCC1950,SEX) %>% filter(AGE>=15 & AGE <=65) | |
#Set up nationality, occupation, and sex categories | |
ipums3x <- ipums3x %>% mutate(Birthplace = factor(ifelse(BPL<150,1,2), | |
labels=c('Native-Born','Foreign-Born')), | |
Occupation = factor(ifelse(OCC1950 %in% c(980:999), 1, | |
ifelse(OCC1950 %in% c(100,123,810:840) , 2, | |
ifelse(OCC1950 %in% c(500:690,910:979), 3, | |
ifelse(OCC1950 %in% c(200:490), 4, | |
ifelse(OCC1950 %in% c(700:790), 5, | |
ifelse(OCC1950 %in% c(000:099), 6,7)))))), | |
labels=c('none','farmers/farm laborers','craftsmen/operatives/laborers', | |
'managerial/clerical/sales','service','professional')), | |
Sex = ifelse(SEX==1,'Male','Female')) | |
#Group by year, birthplace, occupation, sex | |
dx <- ipums3x %>% group_by(YEAR,Birthplace,Occupation,Sex) %>% summarise(Number=sum(PERWT)) | |
#Graph | |
graph2x <- ggplot(data=dx,aes(x=YEAR,y=Number,fill=Occupation)) + | |
geom_bar(stat='identity',position='fill') + | |
labs(x='Year',y='Percent of Population',fill='Occupation', | |
title='Occupation of Americans Aged 15-65 by Sex, Birthplace, and Year, 1940-2000') + | |
scale_y_continuous(labels=scales::percent) + | |
scale_x_continuous(breaks=c(1940,1950,1960,1970,1980,1990,2000)) + | |
scale_fill_brewer(palette='Set1') + | |
facet_grid(Sex~.~Birthplace) + | |
theme_bw() + theme(legend.position = 'bottom') | |
png('graph2x.png',height=500,width=1000) | |
print(graph2x) | |
dev.off() | |
#4. Population pyramids showing the historical distribution of Asian immigrants by age at arrival and sex, and by years in the US and sex. | |
#Select relevant variables, years, and birthplaces | |
ipums4 <- ipums %>% select(YEAR,PERWT,AGE,BPL,SEX,YRIMMIG) | |
ipums4 <- ipums4 %>% filter(YEAR>=1970&BPL>=150) | |
#Create vector of age category labels | |
agecats <- '0-9' | |
for(i in 1:7){ | |
agecats <- c(agecats,paste(i,'0-',i,9,sep='')) | |
} | |
agecats <- c(agecats,'80+') | |
#Label sex, change YRIMMIG to integer | |
ipums4 <- ipums4 %>% mutate(Sex=factor(SEX,labels=c('Male','Female')), | |
Yrimmig=as.numeric(YRIMMIG)) | |
#Compute age at arrival Aaa and years in United States YIUS | |
ipums4 <- ipums4 %>% mutate(Aaa=(AGE-YEAR+Yrimmig),Yius=(YEAR-Yrimmig)) | |
#Clean bad entries: year of immigration before birth year, immigration in 996 | |
ipums4 <- ipums4 %>% filter(Aaa>=0 & Yius >=0) | |
#Place into age categories | |
ipums4 <- ipums4 %>% mutate(Acat=factor(ifelse(Aaa>=80,8,floor(Aaa/10)),labels=agecats), | |
Ycat=factor(ifelse(Yius>=80,8,floor(Yius/10)),labels=agecats)) | |
#Group age at arrival by category, sex, and year; prep for population pyramid | |
e1 <- ipums4 %>% group_by(Acat,Sex,YEAR) %>% summarise(Number=sum(PERWT)) | |
e1 <- e1 %>% mutate(Number=ifelse(Sex=='Male',-1*Number,Number)) | |
#Group years in the US by category, sex, and year; prep for population pyramid | |
e2 <- ipums4 %>% group_by(Ycat,Sex,YEAR) %>% summarise(Number=sum(PERWT)) | |
e2 <- e2 %>% mutate(Number=ifelse(Sex=='Male',-1*Number,Number)) | |
#Graph age at arrival | |
graph3 <- ggplot(data=e1,aes(x=Acat,y=Number,fill=Sex)) + | |
geom_bar(data=e1[e1$Sex=='Male',],stat='identity') + | |
geom_bar(data=e1[e1$Sex=='Female',],stat='identity') + | |
coord_flip() + | |
facet_grid(~YEAR) + | |
scale_y_continuous(breaks=c(-1500000,-1000000,-500000,0,500000,1000000,1500000), | |
labels=c('1.5','1','0.5','0','0.5','1','1.5')) + | |
labs(y='Population in millions',title='Population Pyramids for Age at Arrival of Asian immigrants') + | |
scale_fill_brewer(palette='Set1',guide=guide_legend(reverse=TRUE)) + | |
theme_bw() + theme(legend.position='bottom') | |
png('graph3.png',height=500,width=1000) | |
print(graph3) | |
dev.off() | |
#Graph years in US | |
graph4 <- ggplot(data=e2,aes(x=Ycat,y=Number,fill=Sex)) + | |
geom_bar(data=e2[e2$Sex=='Male',],stat='identity') + | |
geom_bar(data=e2[e2$Sex=='Female',],stat='identity') + | |
coord_flip() + | |
facet_grid(~YEAR) + | |
scale_y_continuous(breaks=c(-1500000,-1000000,-500000,0,500000,1000000,1500000), | |
labels=c('1.5','1','0.5','0','0.5','1','1.5')) + | |
labs(y='Population in millions',title='Population Pyramids for Years in US of Asian immigrants') + | |
scale_fill_brewer(palette='Set1',guide=guide_legend(reverse=TRUE)) + | |
theme_bw() + theme(legend.position='bottom') | |
png('graph4.png',height=500,width=1000) | |
print(graph4) | |
dev.off() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment