Created
March 5, 2016 03:51
-
-
Save jakeesse/89e3fd432de7cef70868 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd('/users/JakeEsse/Desktop/HIST 90') | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(scales) | |
library(grid) | |
immigration_master_data <- read_csv('usa_00007.csv') | |
#Select the data that we want | |
immigration_data <- select(immigration_master_data, YEAR, REGION, PERWT, SEX, RACE, BPL, AGE) | |
bpl_texas_data <- filter(immigration_data, BPL==48) | |
#Add age categories | |
a <- mutate(bpl_texas_data, AGECAT=ifelse(AGE >= 90, 9, floor(AGE/10))) | |
agec <- c('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-89', '90+') | |
b <- mutate(a,AGECAT=factor(AGECAT,labels=agec)) | |
head(b) | |
#Factor and Label the SEX variable | |
c <- mutate(b, SEX=factor(SEX, labels = c('male', 'female'))) | |
head(c) | |
#Factor and Label Race between White and Non-White | |
h <- mutate(c, RACE=ifelse(RACE==1, 0, 1)) | |
i <- mutate(h, RACE=factor(RACE, labels = c('White', 'Non-White'))) | |
head(i) | |
#Selecting the data on the people that were born in Texas and Lived in the "Northeast" | |
d <- filter(i, REGION > 10) | |
e <- filter(d, REGION < 14) | |
head(e) | |
bpl_texas_northeast <- summarise(group_by(e, YEAR, SEX, AGECAT, RACE), NUMBER=sum(PERWT)) | |
head(bpl_texas_northeast) | |
#Manipulate the data to make population pyramids for whites in the northeast | |
#1940 | |
ee1 <- filter(e, RACE=='White', YEAR==1940) | |
head(ee1) | |
whites_northeast_1940 <- summarise(group_by(ee1, YEAR, SEX, AGECAT), NUMBER=sum(PERWT)) | |
pdata <- mutate(whites_northeast_1940, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER)) | |
whites_northeast_1940_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() + scale_y_continuous(labels = c(3000,2000,1000,0,1000,2000,3000))+labs(title='White, Texas-Born in Northeast - 1940', x='Age Category', y='Number') | |
print(whites_northeast_1940_ppyramid) | |
#1970 | |
ee2 <- filter(e, RACE=='White', YEAR==1970) | |
head(ee2) | |
whites_northeast_1970 <- summarise(group_by(ee2, YEAR, SEX, AGECAT), NUMBER=sum(PERWT)) | |
pdata <- mutate(whites_northeast_1970, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER)) | |
whites_northeast_1970_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='White, Texas-Born in Northeast - 1970', x='Age Category', y='Number')+ scale_y_continuous(labels = c(10000,5000,0,5000,10000)) | |
print(whites_northeast_1970_ppyramid) | |
#2000 | |
ee3 <- filter(e, RACE=='White', YEAR==2000) | |
head(ee3) | |
whites_northeast_2000 <- summarise(group_by(ee3, YEAR, SEX, AGECAT), NUMBER=sum(PERWT)) | |
pdata <- mutate(whites_northeast_2000, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER)) | |
whites_northeast_2000_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='White, Texas-Born in Northeast - 2000', x='Age Category', y='Number') | |
print(whites_northeast_2000_ppyramid) | |
#Manipulate the data to make the population pyramids for the nonwhite in the northeast | |
#1940 - nonwhite | |
ee4 <- filter(e, RACE=='Non-White', YEAR==1940) | |
head(ee4) | |
nonwhites_northeast_1940 <- summarise(group_by(ee4, YEAR, SEX, AGECAT), NUMBER=sum(PERWT)) | |
pdata <- mutate(nonwhites_northeast_1940, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER)) | |
nonwhites_northeast_1940_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() + labs(title='Non-White, Texas-Born in Northeast - 1940', x='Age Category', y='Number') | |
print(nonwhites_northeast_1940_ppyramid) | |
#1970 - nonwhite | |
ee5 <- filter(e, RACE=='Non-White', YEAR==1970) | |
head(ee5) | |
nonwhites_northeast_1970 <- summarise(group_by(ee5, YEAR, SEX, AGECAT), NUMBER=sum(PERWT)) | |
pdata <- mutate(nonwhites_northeast_1970, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER)) | |
nonwhites_northeast_1970_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='Non-White, Texas-Born in Northeast - 1970', x='Age Category', y='Number') | |
print(nonwhites_northeast_1970_ppyramid) | |
#2000 - nonwhite | |
ee6 <- filter(e, RACE=='Non-White', YEAR==2000) | |
head(ee6) | |
nonwhites_northeast_2000 <- summarise(group_by(ee6, YEAR, SEX, AGECAT), NUMBER=sum(PERWT)) | |
pdata <- mutate(nonwhites_northeast_2000, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER)) | |
nonwhites_northeast_2000_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='Non-White, Texas-Born in Northeast - 2000', x='Age Category', y='Number') | |
print(nonwhites_northeast_2000_ppyramid) | |
#Select Out Year, Race, and PERWT for northeast_plot_1 | |
n <- select(e, YEAR, RACE, PERWT) | |
head(n) | |
o <- summarise(group_by(n, YEAR, RACE), NUMBER=sum(PERWT)) | |
head(o) | |
p <- summarise(group_by(n, YEAR), TOTAL=sum(PERWT)) | |
head(p) | |
q <- merge(o, p, by='YEAR') | |
#Plot the annual population by race with stacked columns | |
northeast_plot_1 <- ggplot(q, aes(x=YEAR, y=NUMBER, fill=RACE))+geom_bar(stat = 'identity')+labs(title='Texas Migrants to the Northeast by Race') + geom_text(aes(label=paste(as.character(round(NUMBER/TOTAL*100, digits=0)),'%',sep=''),y=ifelse(RACE=="White", NUMBER/2, TOTAL-NUMBER/2))) | |
print(northeast_plot_1) | |
#Selecting the data on the people that were born in Texas and lived in the "West" | |
f <- filter(i, REGION > 40) | |
g <- filter(f, REGION < 44) | |
bpl_texas_west <- summarise(group_by(g, YEAR, SEX, AGECAT, RACE), NUMBER=sum(PERWT)) | |
head(bpl_texas_west) | |
#Manipulate the data to make population pyramids for whites in the northwest | |
#1940 | |
gg1 <- filter(g, RACE=='White', YEAR==1940) | |
head(gg1) | |
whites_northwest_1940 <- summarise(group_by(gg1, YEAR, SEX, AGECAT), NUMBER=sum(PERWT)) | |
pdata <- mutate(whites_northwest_1940, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER)) | |
whites_northwest_1940_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='White, Texas-Born in Northwest - 1940', x='Age Category', y='Number') | |
print(whites_northwest_1940_ppyramid) | |
#1970 | |
gg2 <- filter(g, RACE=='White', YEAR==1970) | |
head(gg2) | |
whites_northwest_1970 <- summarise(group_by(gg2, YEAR, SEX, AGECAT), NUMBER=sum(PERWT)) | |
pdata <- mutate(whites_northwest_1970, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER)) | |
whites_northwest_1970_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='White, Texas-Born in Northwest - 1970', x='Age Category', y='Number') | |
print(whites_northwest_1970_ppyramid) | |
#2000 | |
gg3 <- filter(g, RACE=='White', YEAR==2000) | |
head(gg3) | |
whites_northwest_2000 <- summarise(group_by(gg3, YEAR, SEX, AGECAT), NUMBER=sum(PERWT)) | |
pdata <- mutate(whites_northwest_2000, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER)) | |
whites_northwest_2000_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='White, Texas-Born in Northwest - 2000', x='Age Category', y='Number') | |
print(whites_northwest_2000_ppyramid) | |
#Manipulate the data to make the population pyramids for the nonwhite in the northwest | |
#1940 - nonwhite | |
gg4 <- filter(g, RACE=='Non-White', YEAR==1940) | |
head(gg4) | |
nonwhites_northwest_1940 <- summarise(group_by(gg4, YEAR, SEX, AGECAT), NUMBER=sum(PERWT)) | |
pdata <- mutate(nonwhites_northwest_1940, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER)) | |
nonwhites_northwest_1940_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() + labs(title='Non-White, Texas-Born in Northwest - 1940', x='Age Category', y='Number') | |
print(nonwhites_northwest_1940_ppyramid) | |
#1970 - nonwhite | |
gg5 <- filter(g, RACE=='Non-White', YEAR==1970) | |
head(gg5) | |
nonwhites_northwest_1970 <- summarise(group_by(gg5, YEAR, SEX, AGECAT), NUMBER=sum(PERWT)) | |
pdata <- mutate(nonwhites_northwest_1970, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER)) | |
nonwhites_northwest_1970_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='Non-White, Texas-Born in Northwest - 1970', x='Age Category', y='Number') | |
print(nonwhites_northwest_1970_ppyramid) | |
#2000 - nonwhite | |
gg6 <- filter(g, RACE=='Non-White', YEAR==2000) | |
head(gg6) | |
nonwhites_northwest_2000 <- summarise(group_by(gg6, YEAR, SEX, AGECAT), NUMBER=sum(PERWT)) | |
pdata <- mutate(nonwhites_northwest_2000, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER)) | |
nonwhites_northwest_2000_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='Non-White, Texas-Born in Northwest - 2000', x='Age Category', y='Number') | |
print(nonwhites_northwest_2000_ppyramid) | |
#Select out YEAR, RACE, and PERWT for west_plot_1 | |
j <- select(g, YEAR, RACE, PERWT) | |
head(j) | |
k <- summarise(group_by(j, YEAR, RACE), NUMBER=sum(PERWT)) | |
head(k) | |
l <- summarise(group_by(j, YEAR), TOTAL=sum(PERWT)) | |
head(l) | |
m <- merge(k,l,by='YEAR') | |
head(m) | |
#Plot the annual population by race with stacked columns | |
west_plot_1 <- ggplot(m, aes(x=YEAR, y=NUMBER, fill=RACE))+geom_bar(stat = 'identity')+labs(title='Texas Migrants to the West by Race') + geom_text(aes(label=paste(as.character(round(NUMBER/TOTAL*100, digits=0)),'%',sep=''),y=ifelse(RACE=="White", NUMBER/2, TOTAL-NUMBER/2))) | |
print(west_plot_1) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment