Skip to content

Instantly share code, notes, and snippets.

@lfortin-117
Created November 15, 2016 01:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lfortin-117/f90b1f46d55a31d9e67ca6263847dc4d to your computer and use it in GitHub Desktop.
Save lfortin-117/f90b1f46d55a31d9e67ca6263847dc4d to your computer and use it in GitHub Desktop.
Final Project Code - LF
#Construction and General Labor
library(readr)
library(dplyr)
library(ggplot2)
library(readr)
library(RColorBrewer)
library(ggmap)
library(maptools)
library(gtools)
#Loading in the blank map of the United States and filter out Alaska and Hawaii and plotting it
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15)))
map1 <- ggplot() + theme_nothing(legend=TRUE) +
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black')
png('map.png',width=1500, height=1000)
print(map1)
dev.off()
#Reading the data and separating out the first generation immigrants of working age in construction/labor
ipums <- read_csv('FINALDATA.csv', col_types = cols(PERWT=col_double()))
immig <- ipums %>% filter(BPL>150)
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15)))
immig3 <-immig2 %>% filter(IND1950>=100 & IND1950<=246)
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976)
#Aggregating
ds <- immig4 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT))
#Join this data frame to our map data frame and ordering
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP))
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI')))
dsmap <- dsmap %>% arrange(order)
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2))
#Sorting states by number of first generation immigrants for the map scale
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<100, 1,
ifelse(NUMBER<500, 2,
ifelse(NUMBER<1000, 3,
ifelse(NUMBER<5000, 4, 5))))))
levels(dscats$Population) <- c('1-99','100-499','500-999','1000-4999','5000+')
#Final join of dataframe to our map data frame
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order)
#Plotting
map2 <- map1 + scale_fill_brewer(palette='Blues') +
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') +
labs(title='First Generation Immigrants in Construction or General Labor Industry in the United States 1850-1920,', sep='')
gg_animate(map2, 'animateconstlabor(new).gif', ani.width=800)
#Agriculture and Extractive
library(readr)
library(dplyr)
library(ggplot2)
library(readr)
library(RColorBrewer)
library(ggmap)
library(maptools)
library(gtools)
#Loading in the blank map of the United States and plotting it
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15)))
map1 <- ggplot() + theme_nothing(legend=TRUE) +
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black')
png('map.png',width=1500, height=1000)
print(map1)
dev.off()
#Reading the data and separating out the first generation immigrants of working age in agriculture and extractive industry
ipums <- read_csv('FINALDATAF.csv', col_types = cols(PERWT=col_double()))
immig <- ipums %>% filter(BPL>150)
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15)))
immig3 <-immig2 %>% filter(IND1950>100 & IND1950<246)
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976)
#Aggregating
ds <- immig3 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT))
#Join this data frame to our map data frame and ordering
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP))
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI')))
dsmap <- dsmap %>% arrange(order)
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2))
#Sorting states by number of first generation immigrants for the map scale
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<1000, 1,
ifelse(NUMBER<5000, 2,
ifelse(NUMBER<25000, 3,
ifelse(NUMBER<50000, 4, 5))))))
levels(dscats$Population) <- c('1-999','1000-4999','5000-24999','24999-49999','50000+')
#Final join of dataframe to our map data frame
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order)
#Plotting
map2 <- map1 + scale_fill_brewer(palette='Blues') +
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') +
labs(title='First Generation Immigrants in Agriculture/Extractive Industry in the United States 1850-1920,', sep='')
gg_animate(map2, 'animatedagriculture(new).gif', ani.width=800)
#Manufacturing
library(readr)
library(dplyr)
library(ggplot2)
library(readr)
library(RColorBrewer)
library(ggmap)
library(maptools)
library(gtools)
#Loading in the blank map of the United States and plotting it
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15)))
map1 <- ggplot() + theme_nothing(legend=TRUE) +
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black')
png('map.png',width=1500, height=1000)
print(map1)
dev.off()
#Reading the data and separating out the first generation immigrants of working age in manufacturing
ipums <- read_csv('FINALDATA.csv', col_types = cols(PERWT=col_double()))
immig <- ipums %>% filter(BPL>150)
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15)))
immig3 <-immig2 %>% filter(IND1950>100 & IND1950<246)
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976)
immig5 <- immig2 %>% filter(IND1950<100 | IND1950>976)
immig6 <- immig2 %>% filter(IND1950>500 & IND1950<600)
immig7 <- immig2 %>% filter(IND1950>600 & IND1950<700)
immig8 <- immig2 %>% filter(IND1950>700 & IND1950<950)
immig9 <- immig2 %>% filter(IND1950>300 & IND1950<500)
#Aggregating
ds <- immig9 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT))
#Join this data frame to our map data frame and ordering
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP))
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI')))
dsmap <- dsmap %>% arrange(order)
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2))
#Sorting states by number of first generation immigrants for the map scale
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<1000, 1,
ifelse(NUMBER<5000, 2,
ifelse(NUMBER<10000, 3,
ifelse(NUMBER<50000, 4, 5))))))
levels(dscats$Population) <- c('1-999','1000-4999','5000-9999','10000-49999','50000+')
#Final join of dataframe to our map data frame
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order)
#Plotting
map2 <- map1 + scale_fill_brewer(palette='Blues') +
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') +
labs(title='First Generation Immigrants in Manufacturing Industry in the United States 1850-1920,', sep='')
gg_animate(map2, 'animatemanufac(new).gif', ani.width=800)
#Without an Industry (None)
library(readr)
library(dplyr)
library(ggplot2)
library(readr)
library(RColorBrewer)
library(ggmap)
library(maptools)
library(gtools)
#Loading in the blank map of the United States and plotting it
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15)))
map1 <- ggplot() + theme_nothing(legend=TRUE) +
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black')
png('map.png',width=1500, height=1000)
print(map1)
dev.off()
#Reading the data and separating out the first generation immigrants of working age without an industry
ipums <- read_csv('FINALDATAF.csv', col_types = cols(PERWT=col_double()))
immig <- ipums %>% filter(BPL>150)
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15)))
immig3 <-immig2 %>% filter(IND1950>100 & IND1950<246)
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976)
immig5 <- immig2 %>% filter(IND1950<100 | IND1950>976)
#Aggregating
ds <- immig5 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT))
#Join this data frame to our map data frame and ordering
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP))
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI')))
dsmap <- dsmap %>% arrange(order)
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2))
#Sorting states by number of first generation immigrants for the map scale
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<5000, 1,
ifelse(NUMBER<15000, 2,
ifelse(NUMBER<30000, 3,
ifelse(NUMBER<60000, 4, 5))))))
levels(dscats$Population) <- c('1-4999','5000-14999','15000-29999','30000-59999','60000+')
#Final join of dataframe to our map data frame
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order)
#Plotting
map2 <- map1 + scale_fill_brewer(palette='Blues') +
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') +
labs(title='First Generation Immigrants Without an Industry in the United States 1850-1920,', sep='')
gg_animate(map2, 'animatenone(new).gif', ani.width=800)
#Service
library(readr)
library(dplyr)
library(ggplot2)
library(readr)
library(RColorBrewer)
library(ggmap)
library(maptools)
library(gtools)
#Loading in the blank map of the United States and plotting it
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15)))
map1 <- ggplot() + theme_nothing(legend=TRUE) +
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black')
png('map.png',width=1500, height=1000)
print(map1)
dev.off()
#Reading the data and separating out the first generation immigrants of working age in service
ipums <- read_csv('FINALDATA.csv', col_types = cols(PERWT=col_double()))
immig <- ipums %>% filter(BPL>150)
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15)))
immig3 <-immig2 %>% filter(IND1950>100 & IND1950<246)
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976)
immig5 <- immig2 %>% filter(IND1950<100 | IND1950>976)
immig6 <- immig2 %>% filter(IND1950>500 & IND1950<600)
immig7 <- immig2 %>% filter(IND1950>600 & IND1950<700)
immig8 <- immig2 %>% filter(IND1950>700 & IND1950<950)
#Aggregating
ds <- immig8 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT))
#Join this data frame to our map data frame and ordering
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP))
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI')))
dsmap <- dsmap %>% arrange(order)
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2))
#Sorting states by number of first generation immigrants for the map scale
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<1000, 1,
ifelse(NUMBER<2000, 2,
ifelse(NUMBER<10000, 3,
ifelse(NUMBER<20000, 4, 5))))))
levels(dscats$Population) <- c('1-499','500-999','1000-4999','5000-9999','10000+')
#Final join of dataframe to our map data frame
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order)
#Plotting
map2 <- map1 + scale_fill_brewer(palette='Blues') +
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') +
labs(title='First Generation Immigrants in Service Industry in the United States 1850-1920,', sep='')
gg_animate(map2, 'animateservice(new).gif', ani.width=800)
#Trade
library(readr)
library(dplyr)
library(ggplot2)
library(readr)
library(RColorBrewer)
library(ggmap)
library(maptools)
library(gtools)
library(gganimate)
#Loading in the blank map of the United States and plotting it
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15)))
map1 <- ggplot() + theme_nothing(legend=TRUE) +
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black')
png('map.png',width=1500, height=1000)
print(map1)
dev.off()
#Reading the data and separating out the first generation immigrants of working age in trade
ipums <- read_csv('FINALDATAF.csv', col_types = cols(PERWT=col_double()))
immig <- ipums %>% filter(BPL>150)
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15)))
immig3 <-immig2 %>% filter(IND1950>100 & IND1950<246)
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976)
immig5 <- immig2 %>% filter(IND1950<100 | IND1950>976)
immig6 <- immig2 %>% filter(IND1950>500 & IND1950<600)
immig7 <- immig2 %>% filter(IND1950>600 & IND1950<700)
#Aggregating
ds <- immig7 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT))
#Join this data frame to our map data frame and ordering
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP))
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI')))
dsmap <- dsmap %>% arrange(order)
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2))
#Sorting states by number of first generation immigrants for the map scale
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<500, 1,
ifelse(NUMBER<1000, 2,
ifelse(NUMBER<5000, 3,
ifelse(NUMBER<10000, 4, 5))))))
levels(dscats$Population) <- c('1-499','500-999','1000-4999','5000-9999','10000+')
#Final join of dataframe to our map data frame
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order)
#Plotting
map2 <- map1 + scale_fill_brewer(palette='Blues') +
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') +
labs(title='First Generation Immigrants in Trade Industry in the United States 1850-1920,', sep='')
gg_animate(map2, 'animatetrade(new3).gif', ani.width=800)
#Transportation/Communication/Utilities
library(readr)
library(dplyr)
library(ggplot2)
library(readr)
library(RColorBrewer)
library(ggmap)
library(maptools)
library(gtools)
#Loading in the blank map of the United States and plotting it
mapdata <- read_csv('map.csv',col_types = cols(STATEFIP=col_integer())) %>% filter(!(STATEFIP %in% c(2,15)))
map1 <- ggplot() + theme_nothing(legend=TRUE) +
geom_polygon(data=mapdata, aes(x=long, y=lat, group=group),fill='white', color='black')
png('map.png',width=1500, height=1000)
print(map1)
dev.off()
#Reading the data and separating out the first generation immigrants of working age in transportation/
#communications/utilities
ipums <- read_csv('FINALDATA.csv', col_types = cols(PERWT=col_double()))
immig <- ipums %>% filter(BPL>150)
immig2 <- immig %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15)))
immig3 <-immig2 %>% filter(IND1950>100 & IND1950<246)
immig4 <- immig2 %>% filter(IND1950==246 | IND1950==976)
immig5 <- immig2 %>% filter(IND1950<100 | IND1950>976)
immig6 <- immig2 %>% filter(IND1950>500 & IND1950<600)
#Aggregating
ds <- immig6 %>% group_by(YEAR, STATEFIP) %>% summarise(NUMBER=sum(PERWT))
#Join this data frame to our map data frame and ordering
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP))
dsmap <- left_join(ds, newmap, by=(c('STATEFIP'='STATEI')))
dsmap <- dsmap %>% arrange(order)
cuts <- quantcut(ds$NUMBER, q=seq(0,1,.2))
#Sorting states by number of first generation immigrants for the map scale
dscats <- ds %>% mutate(Population=factor(ifelse(NUMBER<500, 1,
ifelse(NUMBER<1000, 2,
ifelse(NUMBER<5000, 3,
ifelse(NUMBER<10000, 4, 5))))))
levels(dscats$Population) <- c('1-499','500-999','1000-4999','5000-9999','10000+')
#Final join of dataframe to our map data frame
dsmap <- left_join(dscats, newmap, by=c('STATEFIP'='STATEI')) %>% arrange(order)
#Plotting
map2 <- map1 + scale_fill_brewer(palette='Blues') +
geom_polygon(data=dsmap, aes(x=long, y=lat, group=group, fill=Population, frame=YEAR), color='black') +
labs(title='First Generation Immigrants in Transportation/Utilities/Communications Industry in the United States 1850-1920,', sep='')
gg_animate(map2, 'animatetransport(new).gif', ani.width=800)
#Column Graph
library(dplyr)
library(ggplot2)
library(readr)
library(RColorBrewer)
x <- read_csv('FINALDATAF.csv') %>% filter(AGE>=15 & AGE<=65 & !(STATEFIP %in% c(2,15)))
#Recode to get Region variable
a <- x %>% mutate(Region=ifelse(STATEFIP %in% c(4,6,8,16,30,32,35,41,49,53,56), 'West',
ifelse(STATEFIP %in% c(17,18,19,20,26,27,29,31,38,46,55), 'Midwest',
ifelse(STATEFIP %in% c(9,23,25,33,34,36,42,44,50), 'Northeast','South'))))
#Recode industry variable
y <- a %>% mutate(Industry=factor(ifelse(IND1950<100 | IND1950>976, 1,
ifelse(IND1950<246, 2,
ifelse(IND1950==246 | IND1950==976, 4,
ifelse(IND1950>700, 7,
ifelse(IND1950>600, 5,
ifelse(IND1950>500, 6, 3)))))),
labels=c('none','agricultural/extractive', 'manufacturing',
'construction or general labor','trade',
'transportation/communication/utilities','service')))
#Recode sex variable
d <- y %>% mutate(Sex = ifelse (SEX ==1,'male','female'))
#Aggregating the data for the plot
f1 <- d %>% group_by (YEAR, Sex, Region, Industry) %>% summarise(NUMBER = sum(PERWT))
#Graphing the data in accordance to the aggregated data
G2 <- ggplot(data=f1, aes(x=YEAR,y=NUMBER,fill=Industry)) +
geom_bar(stat='identity', position='fill') +
labs(x='Year', y='Percent of Population', fill='Industry',title='Industry for First Generation Immigrants Aged 15-65 by Year and Sex, 1850-1920') +
scale_y_continuous(labels=scales::percent) +
scale_x_continuous(breaks=c(1870,1900,1920))+
scale_fill_brewer(palette = 'Set1') +
facet_grid(Sex~.~Region) +
theme_bw() + theme(legend.position='bottom')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment