Created
November 15, 2016 01:55
-
-
Save lfortin-117/f90b1f46d55a31d9e67ca6263847dc4d to your computer and use it in GitHub Desktop.
Final Project Code - LF
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Construction and General Labor | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(readr) | |
library(RColorBrewer) | |
library(ggmap) | |
library(maptools) | |
library(gtools) | |
#Loading in the blank map of the United States and filter out Alaska and Hawaii and plotting it | |
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15))) | |
map1 <- ggplot() + theme_nothing(legend=TRUE) + | |
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black') | |
png('map.png',width=1500, height=1000) | |
print(map1) | |
dev.off() | |
#Reading the data and separating out the first generation immigrants of working age in construction/labor | |
ipums <- read_csv('FINALDATA.csv', col_types = cols(PERWT=col_double())) | |
immig <- ipums %>% filter(BPL>150) | |
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15))) | |
immig3 <-immig2 %>% filter(IND1950>=100 & IND1950<=246) | |
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976) | |
#Aggregating | |
ds <- immig4 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT)) | |
#Join this data frame to our map data frame and ordering | |
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP)) | |
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI'))) | |
dsmap <- dsmap %>% arrange(order) | |
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2)) | |
#Sorting states by number of first generation immigrants for the map scale | |
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<100, 1, | |
ifelse(NUMBER<500, 2, | |
ifelse(NUMBER<1000, 3, | |
ifelse(NUMBER<5000, 4, 5)))))) | |
levels(dscats$Population) <- c('1-99','100-499','500-999','1000-4999','5000+') | |
#Final join of dataframe to our map data frame | |
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order) | |
#Plotting | |
map2 <- map1 + scale_fill_brewer(palette='Blues') + | |
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') + | |
labs(title='First Generation Immigrants in Construction or General Labor Industry in the United States 1850-1920,', sep='') | |
gg_animate(map2, 'animateconstlabor(new).gif', ani.width=800) | |
#Agriculture and Extractive | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(readr) | |
library(RColorBrewer) | |
library(ggmap) | |
library(maptools) | |
library(gtools) | |
#Loading in the blank map of the United States and plotting it | |
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15))) | |
map1 <- ggplot() + theme_nothing(legend=TRUE) + | |
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black') | |
png('map.png',width=1500, height=1000) | |
print(map1) | |
dev.off() | |
#Reading the data and separating out the first generation immigrants of working age in agriculture and extractive industry | |
ipums <- read_csv('FINALDATAF.csv', col_types = cols(PERWT=col_double())) | |
immig <- ipums %>% filter(BPL>150) | |
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15))) | |
immig3 <-immig2 %>% filter(IND1950>100 & IND1950<246) | |
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976) | |
#Aggregating | |
ds <- immig3 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT)) | |
#Join this data frame to our map data frame and ordering | |
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP)) | |
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI'))) | |
dsmap <- dsmap %>% arrange(order) | |
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2)) | |
#Sorting states by number of first generation immigrants for the map scale | |
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<1000, 1, | |
ifelse(NUMBER<5000, 2, | |
ifelse(NUMBER<25000, 3, | |
ifelse(NUMBER<50000, 4, 5)))))) | |
levels(dscats$Population) <- c('1-999','1000-4999','5000-24999','24999-49999','50000+') | |
#Final join of dataframe to our map data frame | |
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order) | |
#Plotting | |
map2 <- map1 + scale_fill_brewer(palette='Blues') + | |
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') + | |
labs(title='First Generation Immigrants in Agriculture/Extractive Industry in the United States 1850-1920,', sep='') | |
gg_animate(map2, 'animatedagriculture(new).gif', ani.width=800) | |
#Manufacturing | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(readr) | |
library(RColorBrewer) | |
library(ggmap) | |
library(maptools) | |
library(gtools) | |
#Loading in the blank map of the United States and plotting it | |
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15))) | |
map1 <- ggplot() + theme_nothing(legend=TRUE) + | |
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black') | |
png('map.png',width=1500, height=1000) | |
print(map1) | |
dev.off() | |
#Reading the data and separating out the first generation immigrants of working age in manufacturing | |
ipums <- read_csv('FINALDATA.csv', col_types = cols(PERWT=col_double())) | |
immig <- ipums %>% filter(BPL>150) | |
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15))) | |
immig3 <-immig2 %>% filter(IND1950>100 & IND1950<246) | |
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976) | |
immig5 <- immig2 %>% filter(IND1950<100 | IND1950>976) | |
immig6 <- immig2 %>% filter(IND1950>500 & IND1950<600) | |
immig7 <- immig2 %>% filter(IND1950>600 & IND1950<700) | |
immig8 <- immig2 %>% filter(IND1950>700 & IND1950<950) | |
immig9 <- immig2 %>% filter(IND1950>300 & IND1950<500) | |
#Aggregating | |
ds <- immig9 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT)) | |
#Join this data frame to our map data frame and ordering | |
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP)) | |
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI'))) | |
dsmap <- dsmap %>% arrange(order) | |
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2)) | |
#Sorting states by number of first generation immigrants for the map scale | |
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<1000, 1, | |
ifelse(NUMBER<5000, 2, | |
ifelse(NUMBER<10000, 3, | |
ifelse(NUMBER<50000, 4, 5)))))) | |
levels(dscats$Population) <- c('1-999','1000-4999','5000-9999','10000-49999','50000+') | |
#Final join of dataframe to our map data frame | |
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order) | |
#Plotting | |
map2 <- map1 + scale_fill_brewer(palette='Blues') + | |
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') + | |
labs(title='First Generation Immigrants in Manufacturing Industry in the United States 1850-1920,', sep='') | |
gg_animate(map2, 'animatemanufac(new).gif', ani.width=800) | |
#Without an Industry (None) | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(readr) | |
library(RColorBrewer) | |
library(ggmap) | |
library(maptools) | |
library(gtools) | |
#Loading in the blank map of the United States and plotting it | |
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15))) | |
map1 <- ggplot() + theme_nothing(legend=TRUE) + | |
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black') | |
png('map.png',width=1500, height=1000) | |
print(map1) | |
dev.off() | |
#Reading the data and separating out the first generation immigrants of working age without an industry | |
ipums <- read_csv('FINALDATAF.csv', col_types = cols(PERWT=col_double())) | |
immig <- ipums %>% filter(BPL>150) | |
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15))) | |
immig3 <-immig2 %>% filter(IND1950>100 & IND1950<246) | |
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976) | |
immig5 <- immig2 %>% filter(IND1950<100 | IND1950>976) | |
#Aggregating | |
ds <- immig5 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT)) | |
#Join this data frame to our map data frame and ordering | |
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP)) | |
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI'))) | |
dsmap <- dsmap %>% arrange(order) | |
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2)) | |
#Sorting states by number of first generation immigrants for the map scale | |
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<5000, 1, | |
ifelse(NUMBER<15000, 2, | |
ifelse(NUMBER<30000, 3, | |
ifelse(NUMBER<60000, 4, 5)))))) | |
levels(dscats$Population) <- c('1-4999','5000-14999','15000-29999','30000-59999','60000+') | |
#Final join of dataframe to our map data frame | |
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order) | |
#Plotting | |
map2 <- map1 + scale_fill_brewer(palette='Blues') + | |
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') + | |
labs(title='First Generation Immigrants Without an Industry in the United States 1850-1920,', sep='') | |
gg_animate(map2, 'animatenone(new).gif', ani.width=800) | |
#Service | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(readr) | |
library(RColorBrewer) | |
library(ggmap) | |
library(maptools) | |
library(gtools) | |
#Loading in the blank map of the United States and plotting it | |
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15))) | |
map1 <- ggplot() + theme_nothing(legend=TRUE) + | |
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black') | |
png('map.png',width=1500, height=1000) | |
print(map1) | |
dev.off() | |
#Reading the data and separating out the first generation immigrants of working age in service | |
ipums <- read_csv('FINALDATA.csv', col_types = cols(PERWT=col_double())) | |
immig <- ipums %>% filter(BPL>150) | |
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15))) | |
immig3 <-immig2 %>% filter(IND1950>100 & IND1950<246) | |
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976) | |
immig5 <- immig2 %>% filter(IND1950<100 | IND1950>976) | |
immig6 <- immig2 %>% filter(IND1950>500 & IND1950<600) | |
immig7 <- immig2 %>% filter(IND1950>600 & IND1950<700) | |
immig8 <- immig2 %>% filter(IND1950>700 & IND1950<950) | |
#Aggregating | |
ds <- immig8 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT)) | |
#Join this data frame to our map data frame and ordering | |
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP)) | |
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI'))) | |
dsmap <- dsmap %>% arrange(order) | |
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2)) | |
#Sorting states by number of first generation immigrants for the map scale | |
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<1000, 1, | |
ifelse(NUMBER<2000, 2, | |
ifelse(NUMBER<10000, 3, | |
ifelse(NUMBER<20000, 4, 5)))))) | |
levels(dscats$Population) <- c('1-499','500-999','1000-4999','5000-9999','10000+') | |
#Final join of dataframe to our map data frame | |
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order) | |
#Plotting | |
map2 <- map1 + scale_fill_brewer(palette='Blues') + | |
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') + | |
labs(title='First Generation Immigrants in Service Industry in the United States 1850-1920,', sep='') | |
gg_animate(map2, 'animateservice(new).gif', ani.width=800) | |
#Trade | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(readr) | |
library(RColorBrewer) | |
library(ggmap) | |
library(maptools) | |
library(gtools) | |
library(gganimate) | |
#Loading in the blank map of the United States and plotting it | |
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15))) | |
map1 <- ggplot() + theme_nothing(legend=TRUE) + | |
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black') | |
png('map.png',width=1500, height=1000) | |
print(map1) | |
dev.off() | |
#Reading the data and separating out the first generation immigrants of working age in trade | |
ipums <- read_csv('FINALDATAF.csv', col_types = cols(PERWT=col_double())) | |
immig <- ipums %>% filter(BPL>150) | |
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15))) | |
immig3 <-immig2 %>% filter(IND1950>100 & IND1950<246) | |
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976) | |
immig5 <- immig2 %>% filter(IND1950<100 | IND1950>976) | |
immig6 <- immig2 %>% filter(IND1950>500 & IND1950<600) | |
immig7 <- immig2 %>% filter(IND1950>600 & IND1950<700) | |
#Aggregating | |
ds <- immig7 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT)) | |
#Join this data frame to our map data frame and ordering | |
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP)) | |
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI'))) | |
dsmap <- dsmap %>% arrange(order) | |
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2)) | |
#Sorting states by number of first generation immigrants for the map scale | |
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<500, 1, | |
ifelse(NUMBER<1000, 2, | |
ifelse(NUMBER<5000, 3, | |
ifelse(NUMBER<10000, 4, 5)))))) | |
levels(dscats$Population) <- c('1-499','500-999','1000-4999','5000-9999','10000+') | |
#Final join of dataframe to our map data frame | |
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order) | |
#Plotting | |
map2 <- map1 + scale_fill_brewer(palette='Blues') + | |
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') + | |
labs(title='First Generation Immigrants in Trade Industry in the United States 1850-1920,', sep='') | |
gg_animate(map2, 'animatetrade(new3).gif', ani.width=800) | |
#Transportation/Communication/Utilities | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(readr) | |
library(RColorBrewer) | |
library(ggmap) | |
library(maptools) | |
library(gtools) | |
#Loading in the blank map of the United States and plotting it | |
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15))) | |
map1 <- ggplot() + theme_nothing(legend=TRUE) + | |
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black') | |
png('map.png',width=1500, height=1000) | |
print(map1) | |
dev.off() | |
#Reading the data and separating out the first generation immigrants of working age in transportation/ | |
#communications/utilities | |
ipums <- read_csv('FINALDATA.csv', col_types = cols(PERWT=col_double())) | |
immig <- ipums %>% filter(BPL>150) | |
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15))) | |
immig3 <-immig2 %>% filter(IND1950>100 & IND1950<246) | |
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976) | |
immig5 <- immig2 %>% filter(IND1950<100 | IND1950>976) | |
immig6 <- immig2 %>% filter(IND1950>500 & IND1950<600) | |
#Aggregating | |
ds <- immig6 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT)) | |
#Join this data frame to our map data frame and ordering | |
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP)) | |
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI'))) | |
dsmap <- dsmap %>% arrange(order) | |
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2)) | |
#Sorting states by number of first generation immigrants for the map scale | |
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<500, 1, | |
ifelse(NUMBER<1000, 2, | |
ifelse(NUMBER<5000, 3, | |
ifelse(NUMBER<10000, 4, 5)))))) | |
levels(dscats$Population) <- c('1-499','500-999','1000-4999','5000-9999','10000+') | |
#Final join of dataframe to our map data frame | |
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order) | |
#Plotting | |
map2 <- map1 + scale_fill_brewer(palette='Blues') + | |
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') + | |
labs(title='First Generation Immigrants in Transportation/Utilities/Communications Industry in the United States 1850-1920,', sep='') | |
gg_animate(map2, 'animatetransport(new).gif', ani.width=800) | |
#Column Graph | |
library(dplyr) | |
library(ggplot2) | |
library(readr) | |
library(RColorBrewer) | |
x <- read_csv('FINALDATAF.csv') %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15))) | |
#Recode to get Region variable | |
a <- x %>% mutate(Region=ifelse(STATEFIP %in% c(4,6,8,16,30,32,35,41,49,53,56), 'West', | |
ifelse(STATEFIP %in% c(17,18,19,20,26,27,29,31,38,46,55), 'Midwest', | |
ifelse(STATEFIP %in% c(9,23,25,33,34,36,42,44,50), 'Northeast','South')))) | |
#Recode industry variable | |
y <- a %>% mutate(Industry=factor(ifelse(IND1950<100 | IND1950>976, 1, | |
ifelse(IND1950<246, 2, | |
ifelse(IND1950==246 | IND1950==976, 4, | |
ifelse(IND1950>700, 7, | |
ifelse(IND1950>600, 5, | |
ifelse(IND1950>500, 6, 3)))))), | |
labels=c('none','agricultural/extractive', 'manufacturing', | |
'construction or general labor','trade', | |
'transportation/communication/utilities','service'))) | |
#Recode sex variable | |
d <- y %>% mutate(Sex = ifelse (SEX ==1,'male','female')) | |
#Aggregating the data for the plot | |
f1 <- d %>% group_by (YEAR, Sex, Region, Industry) %>% summarise(NUMBER = sum(PERWT)) | |
#Graphing the data in accordance to the aggregated data | |
G2 <- ggplot(data=f1, aes(x=YEAR,y=NUMBER,fill=Industry)) + | |
geom_bar(stat='identity', position='fill') + | |
labs(x='Year', y='Percent of Population', fill='Industry',title='Industry for First Generation Immigrants Aged 15-65 by Year and Sex, 1850-1920') + | |
scale_y_continuous(labels=scales::percent) + | |
scale_x_continuous(breaks=c(1870,1900,1920))+ | |
scale_fill_brewer(palette = 'Set1') + | |
facet_grid(Sex~.~Region) + | |
theme_bw() + theme(legend.position='bottom') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment