Last active
February 17, 2016 02:29
-
-
Save carolineallan/ad441b02cc79428d331c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Caroline Allan | |
## U.S. History Through Census Data | |
## Project 2 | |
## Mexican Race in the U.S. 1950-2000 | |
## January 30, 2016 | |
rm(list = ls()) | |
# load packages | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(scales) | |
library(ggthemes) | |
library(ggmap) | |
## Set Working Directory | |
setwd("~/Desktop/HistThroughCensusData/Project2") | |
## Read In Data | |
data_raw <- read_csv('usa_00004.csv') | |
## Remove Alaska, Hawaii, and oversease military | |
a <- filter(data_raw, STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2, 15)))) | |
## Seclect necessary variables | |
b <- select(a, YEAR, PERWT, BPL, BPLD, STATEFIP) | |
## Dataset with total foreign-born population in each year | |
c <- b %>% | |
filter(BPL > 99) %>% | |
group_by(YEAR) %>% | |
summarise(TOTALPOP = sum(PERWT)) | |
## Calculate total foreign-born populations by state by year | |
c2 <- b %>% | |
filter(BPL > 99) %>% | |
group_by(YEAR, STATEFIP) %>% | |
summarise(STATEPOP = sum(PERWT)) | |
## Add total populations to dataset b | |
d <- merge(b, c, by = 'YEAR', all.x = TRUE) | |
## Filter out everyone not born in Mexico | |
e <- filter(d, BPL == 200 | BPLD == 20000) | |
## Dataframe with number of Mexican immigrants by year (not by state) | |
f <- summarise(group_by(e, YEAR), NUMMEX = sum(PERWT)) | |
## Add in total population | |
g <- merge(f, c, by = 'YEAR', all.x = TRUE) | |
## Add column for % Mexican immigrants | |
h <- mutate(g, PERCMEX = NUMMEX/TOTALPOP) | |
## Create a plot of # Mexican immigrants by year | |
plot1 <- ggplot(h, aes(x = YEAR, y = NUMMEX)) + | |
geom_bar(stat = 'identity') + | |
theme_few() + | |
labs(x = "Year", | |
y = "Estimated Mexican Immigrants", | |
title = "Estimated Number of Mexican Immigrants in U.S.\n1950-2000") + | |
scale_x_continuous(breaks = c(1950, 1960, 1970, 1980, 1990, 2000)) + | |
scale_y_continuous(labels = comma) | |
print(plot1) | |
ggsave(file="nummex.png",dpi=300) | |
plot2 <- ggplot(h, aes(x = YEAR, y = PERCMEX)) + | |
geom_bar(stat = 'identity') + | |
theme_few() + | |
labs(x = "Year", | |
y = "Percent", | |
title = "Percent of Foreign-Born Population Born in Mexico\n1950-2000") + | |
scale_y_continuous(labels = percent_format()) + | |
scale_x_continuous(breaks = c(1950, 1960, 1970, 1980, 1990, 2000)) | |
print(plot2) | |
ggsave(file="mexshare.png",dpi=300) | |
## MAPS | |
## State Name Data | |
states <- read.csv('states.csv', stringsAsFactors = FALSE) | |
## Map data | |
map <- read.csv('map.csv', stringsAsFactors = FALSE) | |
## Create dataset of number of Mexican Immigrants per year in each state | |
NumMex_Data <- summarise(group_by(e, YEAR, STATEFIP), NUMMEX = sum(PERWT)) | |
## Add total foreign-born population in each state | |
i <- merge(NumMex_Data, c2, by = c('YEAR', 'STATEFIP')) | |
## Calculate Mexican share | |
map_data_final <- mutate(i, SHAREMEX = NUMMEX/STATEPOP) | |
## MAP 1 | |
share2000 <- map_data_final %>% | |
select(YEAR, SHAREMEX, STATEFIP) %>% | |
filter(YEAR == 2000) | |
gg <- merge(share2000,states,by='STATEFIP') | |
hh <- merge(gg, map, by.x='NAME', by.y='id') | |
ii <- mutate(hh,section=cut(SHAREMEX,c(0,.1,.2,.4,.5, .9),right=FALSE)) | |
lab <- c('<10%','10%-19.99%','20%-39.99%','40%-49.99%', '>=50%') | |
jj <- mutate(ii,section=factor(section,labels=lab)) | |
mapShare2000 <- ggplot() + | |
geom_polygon(data = jj, | |
aes(x = long, y = lat, group = group, fill = section), | |
color = "black", size = 0.25) + | |
labs(title='Percent of Foreign-Born State Population Born in Mexico, 2000') + | |
scale_fill_brewer(name = 'Share Mexican\nImmigrants', palette = 'Greens') + | |
theme_nothing(legend = TRUE) + | |
guides(fill = guide_legend(override.aes = list(colour = NULL))) | |
ggsave(file="share2000.png",dpi=300) | |
print(mapShare2000) | |
## MAP 2 | |
## Dataset with Year, Statefip, and share Mexicans | |
share1950 <- map_data_final %>% | |
select(YEAR, SHAREMEX, STATEFIP) %>% | |
filter(YEAR == 1950) %>% | |
filter(STATEFIP != 1 | STATEFIP != 15) | |
## Add in state names | |
kk <- merge(share1950,states,by='STATEFIP') | |
## Remove Alaska and Hawaii from map data | |
map <- map %>% | |
filter(id != 'Alaska' & id != 'Hawaii') | |
ll <- merge(kk, map, by.x='NAME', by.y='id', all.y = TRUE) | |
mm <- mutate(ll,section=cut(SHAREMEX, c(0,.1,.2,.4, .9), right=TRUE)) | |
nn <- mutate(mm,section=factor(section)) | |
## Reorder data | |
oo <- nn[order(nn$order), ] | |
mapShare1950 <- ggplot() + | |
geom_polygon(data = oo, | |
aes(x = long, y = lat, group = group, fill = section), | |
color = "black", size = 0.25) + | |
labs(title='Percent of Foreign-Born State Population Born in Mexico, 1950') + | |
scale_fill_brewer(name = 'Share Mexican\nImmigrants', palette = 'Greens', | |
labels = c('<10%','10%-19.99%','20%-39.99%', '>=40%') ) + | |
theme_nothing(legend = TRUE) + | |
guides(fill = guide_legend(override.aes = list(colour = NULL))) | |
ggsave(file="share1950.png",dpi=300) | |
print(mapShare1950) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment