Skip to content

Instantly share code, notes, and snippets.

@jakeesse
Created March 5, 2016 03:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jakeesse/89e3fd432de7cef70868 to your computer and use it in GitHub Desktop.
Save jakeesse/89e3fd432de7cef70868 to your computer and use it in GitHub Desktop.
setwd('/users/JakeEsse/Desktop/HIST 90')
library(readr)
library(dplyr)
library(ggplot2)
library(scales)
library(grid)
immigration_master_data <- read_csv('usa_00007.csv')
#Select the data that we want
immigration_data <- select(immigration_master_data, YEAR, REGION, PERWT, SEX, RACE, BPL, AGE)
bpl_texas_data <- filter(immigration_data, BPL==48)
#Add age categories
a <- mutate(bpl_texas_data, AGECAT=ifelse(AGE >= 90, 9, floor(AGE/10)))
agec <- c('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-89', '90+')
b <- mutate(a,AGECAT=factor(AGECAT,labels=agec))
head(b)
#Factor and Label the SEX variable
c <- mutate(b, SEX=factor(SEX, labels = c('male', 'female')))
head(c)
#Factor and Label Race between White and Non-White
h <- mutate(c, RACE=ifelse(RACE==1, 0, 1))
i <- mutate(h, RACE=factor(RACE, labels = c('White', 'Non-White')))
head(i)
#Selecting the data on the people that were born in Texas and Lived in the "Northeast"
d <- filter(i, REGION > 10)
e <- filter(d, REGION < 14)
head(e)
bpl_texas_northeast <- summarise(group_by(e, YEAR, SEX, AGECAT, RACE), NUMBER=sum(PERWT))
head(bpl_texas_northeast)
#Manipulate the data to make population pyramids for whites in the northeast
#1940
ee1 <- filter(e, RACE=='White', YEAR==1940)
head(ee1)
whites_northeast_1940 <- summarise(group_by(ee1, YEAR, SEX, AGECAT), NUMBER=sum(PERWT))
pdata <- mutate(whites_northeast_1940, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER))
whites_northeast_1940_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() + scale_y_continuous(labels = c(3000,2000,1000,0,1000,2000,3000))+labs(title='White, Texas-Born in Northeast - 1940', x='Age Category', y='Number')
print(whites_northeast_1940_ppyramid)
#1970
ee2 <- filter(e, RACE=='White', YEAR==1970)
head(ee2)
whites_northeast_1970 <- summarise(group_by(ee2, YEAR, SEX, AGECAT), NUMBER=sum(PERWT))
pdata <- mutate(whites_northeast_1970, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER))
whites_northeast_1970_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='White, Texas-Born in Northeast - 1970', x='Age Category', y='Number')+ scale_y_continuous(labels = c(10000,5000,0,5000,10000))
print(whites_northeast_1970_ppyramid)
#2000
ee3 <- filter(e, RACE=='White', YEAR==2000)
head(ee3)
whites_northeast_2000 <- summarise(group_by(ee3, YEAR, SEX, AGECAT), NUMBER=sum(PERWT))
pdata <- mutate(whites_northeast_2000, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER))
whites_northeast_2000_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='White, Texas-Born in Northeast - 2000', x='Age Category', y='Number')
print(whites_northeast_2000_ppyramid)
#Manipulate the data to make the population pyramids for the nonwhite in the northeast
#1940 - nonwhite
ee4 <- filter(e, RACE=='Non-White', YEAR==1940)
head(ee4)
nonwhites_northeast_1940 <- summarise(group_by(ee4, YEAR, SEX, AGECAT), NUMBER=sum(PERWT))
pdata <- mutate(nonwhites_northeast_1940, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER))
nonwhites_northeast_1940_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() + labs(title='Non-White, Texas-Born in Northeast - 1940', x='Age Category', y='Number')
print(nonwhites_northeast_1940_ppyramid)
#1970 - nonwhite
ee5 <- filter(e, RACE=='Non-White', YEAR==1970)
head(ee5)
nonwhites_northeast_1970 <- summarise(group_by(ee5, YEAR, SEX, AGECAT), NUMBER=sum(PERWT))
pdata <- mutate(nonwhites_northeast_1970, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER))
nonwhites_northeast_1970_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='Non-White, Texas-Born in Northeast - 1970', x='Age Category', y='Number')
print(nonwhites_northeast_1970_ppyramid)
#2000 - nonwhite
ee6 <- filter(e, RACE=='Non-White', YEAR==2000)
head(ee6)
nonwhites_northeast_2000 <- summarise(group_by(ee6, YEAR, SEX, AGECAT), NUMBER=sum(PERWT))
pdata <- mutate(nonwhites_northeast_2000, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER))
nonwhites_northeast_2000_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='Non-White, Texas-Born in Northeast - 2000', x='Age Category', y='Number')
print(nonwhites_northeast_2000_ppyramid)
#Select Out Year, Race, and PERWT for northeast_plot_1
n <- select(e, YEAR, RACE, PERWT)
head(n)
o <- summarise(group_by(n, YEAR, RACE), NUMBER=sum(PERWT))
head(o)
p <- summarise(group_by(n, YEAR), TOTAL=sum(PERWT))
head(p)
q <- merge(o, p, by='YEAR')
#Plot the annual population by race with stacked columns
northeast_plot_1 <- ggplot(q, aes(x=YEAR, y=NUMBER, fill=RACE))+geom_bar(stat = 'identity')+labs(title='Texas Migrants to the Northeast by Race') + geom_text(aes(label=paste(as.character(round(NUMBER/TOTAL*100, digits=0)),'%',sep=''),y=ifelse(RACE=="White", NUMBER/2, TOTAL-NUMBER/2)))
print(northeast_plot_1)
#Selecting the data on the people that were born in Texas and lived in the "West"
f <- filter(i, REGION > 40)
g <- filter(f, REGION < 44)
bpl_texas_west <- summarise(group_by(g, YEAR, SEX, AGECAT, RACE), NUMBER=sum(PERWT))
head(bpl_texas_west)
#Manipulate the data to make population pyramids for whites in the northwest
#1940
gg1 <- filter(g, RACE=='White', YEAR==1940)
head(gg1)
whites_northwest_1940 <- summarise(group_by(gg1, YEAR, SEX, AGECAT), NUMBER=sum(PERWT))
pdata <- mutate(whites_northwest_1940, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER))
whites_northwest_1940_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='White, Texas-Born in Northwest - 1940', x='Age Category', y='Number')
print(whites_northwest_1940_ppyramid)
#1970
gg2 <- filter(g, RACE=='White', YEAR==1970)
head(gg2)
whites_northwest_1970 <- summarise(group_by(gg2, YEAR, SEX, AGECAT), NUMBER=sum(PERWT))
pdata <- mutate(whites_northwest_1970, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER))
whites_northwest_1970_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='White, Texas-Born in Northwest - 1970', x='Age Category', y='Number')
print(whites_northwest_1970_ppyramid)
#2000
gg3 <- filter(g, RACE=='White', YEAR==2000)
head(gg3)
whites_northwest_2000 <- summarise(group_by(gg3, YEAR, SEX, AGECAT), NUMBER=sum(PERWT))
pdata <- mutate(whites_northwest_2000, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER))
whites_northwest_2000_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='White, Texas-Born in Northwest - 2000', x='Age Category', y='Number')
print(whites_northwest_2000_ppyramid)
#Manipulate the data to make the population pyramids for the nonwhite in the northwest
#1940 - nonwhite
gg4 <- filter(g, RACE=='Non-White', YEAR==1940)
head(gg4)
nonwhites_northwest_1940 <- summarise(group_by(gg4, YEAR, SEX, AGECAT), NUMBER=sum(PERWT))
pdata <- mutate(nonwhites_northwest_1940, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER))
nonwhites_northwest_1940_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() + labs(title='Non-White, Texas-Born in Northwest - 1940', x='Age Category', y='Number')
print(nonwhites_northwest_1940_ppyramid)
#1970 - nonwhite
gg5 <- filter(g, RACE=='Non-White', YEAR==1970)
head(gg5)
nonwhites_northwest_1970 <- summarise(group_by(gg5, YEAR, SEX, AGECAT), NUMBER=sum(PERWT))
pdata <- mutate(nonwhites_northwest_1970, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER))
nonwhites_northwest_1970_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='Non-White, Texas-Born in Northwest - 1970', x='Age Category', y='Number')
print(nonwhites_northwest_1970_ppyramid)
#2000 - nonwhite
gg6 <- filter(g, RACE=='Non-White', YEAR==2000)
head(gg6)
nonwhites_northwest_2000 <- summarise(group_by(gg6, YEAR, SEX, AGECAT), NUMBER=sum(PERWT))
pdata <- mutate(nonwhites_northwest_2000, NUMBER=ifelse(SEX=='male',0-NUMBER,NUMBER))
nonwhites_northwest_2000_ppyramid <- ggplot(pdata, aes(x=AGECAT, y=NUMBER, fill=SEX)) + geom_bar(data=pdata[pdata$SEX=='female',],stat='identity') + geom_bar(data=pdata[pdata$SEX=='male',],stat='identity') + coord_flip() +labs(title='Non-White, Texas-Born in Northwest - 2000', x='Age Category', y='Number')
print(nonwhites_northwest_2000_ppyramid)
#Select out YEAR, RACE, and PERWT for west_plot_1
j <- select(g, YEAR, RACE, PERWT)
head(j)
k <- summarise(group_by(j, YEAR, RACE), NUMBER=sum(PERWT))
head(k)
l <- summarise(group_by(j, YEAR), TOTAL=sum(PERWT))
head(l)
m <- merge(k,l,by='YEAR')
head(m)
#Plot the annual population by race with stacked columns
west_plot_1 <- ggplot(m, aes(x=YEAR, y=NUMBER, fill=RACE))+geom_bar(stat = 'identity')+labs(title='Texas Migrants to the West by Race') + geom_text(aes(label=paste(as.character(round(NUMBER/TOTAL*100, digits=0)),'%',sep=''),y=ifelse(RACE=="White", NUMBER/2, TOTAL-NUMBER/2)))
print(west_plot_1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment