Skip to content

Instantly share code, notes, and snippets.

@carolineallan
Last active February 17, 2016 02:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save carolineallan/ad441b02cc79428d331c to your computer and use it in GitHub Desktop.
Save carolineallan/ad441b02cc79428d331c to your computer and use it in GitHub Desktop.
## Caroline Allan
## U.S. History Through Census Data
## Project 2
## Mexican Race in the U.S. 1950-2000
## January 30, 2016
rm(list = ls())
# load packages
library(readr)
library(dplyr)
library(ggplot2)
library(scales)
library(ggthemes)
library(ggmap)
## Set Working Directory
setwd("~/Desktop/HistThroughCensusData/Project2")
## Read In Data
data_raw <- read_csv('usa_00004.csv')
## Remove Alaska, Hawaii, and oversease military
a <- filter(data_raw, STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2, 15))))
## Seclect necessary variables
b <- select(a, YEAR, PERWT, BPL, BPLD, STATEFIP)
## Dataset with total foreign-born population in each year
c <- b %>%
filter(BPL > 99) %>%
group_by(YEAR) %>%
summarise(TOTALPOP = sum(PERWT))
## Calculate total foreign-born populations by state by year
c2 <- b %>%
filter(BPL > 99) %>%
group_by(YEAR, STATEFIP) %>%
summarise(STATEPOP = sum(PERWT))
## Add total populations to dataset b
d <- merge(b, c, by = 'YEAR', all.x = TRUE)
## Filter out everyone not born in Mexico
e <- filter(d, BPL == 200 | BPLD == 20000)
## Dataframe with number of Mexican immigrants by year (not by state)
f <- summarise(group_by(e, YEAR), NUMMEX = sum(PERWT))
## Add in total population
g <- merge(f, c, by = 'YEAR', all.x = TRUE)
## Add column for % Mexican immigrants
h <- mutate(g, PERCMEX = NUMMEX/TOTALPOP)
## Create a plot of # Mexican immigrants by year
plot1 <- ggplot(h, aes(x = YEAR, y = NUMMEX)) +
geom_bar(stat = 'identity') +
theme_few() +
labs(x = "Year",
y = "Estimated Mexican Immigrants",
title = "Estimated Number of Mexican Immigrants in U.S.\n1950-2000") +
scale_x_continuous(breaks = c(1950, 1960, 1970, 1980, 1990, 2000)) +
scale_y_continuous(labels = comma)
print(plot1)
ggsave(file="nummex.png",dpi=300)
plot2 <- ggplot(h, aes(x = YEAR, y = PERCMEX)) +
geom_bar(stat = 'identity') +
theme_few() +
labs(x = "Year",
y = "Percent",
title = "Percent of Foreign-Born Population Born in Mexico\n1950-2000") +
scale_y_continuous(labels = percent_format()) +
scale_x_continuous(breaks = c(1950, 1960, 1970, 1980, 1990, 2000))
print(plot2)
ggsave(file="mexshare.png",dpi=300)
## MAPS
## State Name Data
states <- read.csv('states.csv', stringsAsFactors = FALSE)
## Map data
map <- read.csv('map.csv', stringsAsFactors = FALSE)
## Create dataset of number of Mexican Immigrants per year in each state
NumMex_Data <- summarise(group_by(e, YEAR, STATEFIP), NUMMEX = sum(PERWT))
## Add total foreign-born population in each state
i <- merge(NumMex_Data, c2, by = c('YEAR', 'STATEFIP'))
## Calculate Mexican share
map_data_final <- mutate(i, SHAREMEX = NUMMEX/STATEPOP)
## MAP 1
share2000 <- map_data_final %>%
select(YEAR, SHAREMEX, STATEFIP) %>%
filter(YEAR == 2000)
gg <- merge(share2000,states,by='STATEFIP')
hh <- merge(gg, map, by.x='NAME', by.y='id')
ii <- mutate(hh,section=cut(SHAREMEX,c(0,.1,.2,.4,.5, .9),right=FALSE))
lab <- c('<10%','10%-19.99%','20%-39.99%','40%-49.99%', '>=50%')
jj <- mutate(ii,section=factor(section,labels=lab))
mapShare2000 <- ggplot() +
geom_polygon(data = jj,
aes(x = long, y = lat, group = group, fill = section),
color = "black", size = 0.25) +
labs(title='Percent of Foreign-Born State Population Born in Mexico, 2000') +
scale_fill_brewer(name = 'Share Mexican\nImmigrants', palette = 'Greens') +
theme_nothing(legend = TRUE) +
guides(fill = guide_legend(override.aes = list(colour = NULL)))
ggsave(file="share2000.png",dpi=300)
print(mapShare2000)
## MAP 2
## Dataset with Year, Statefip, and share Mexicans
share1950 <- map_data_final %>%
select(YEAR, SHAREMEX, STATEFIP) %>%
filter(YEAR == 1950) %>%
filter(STATEFIP != 1 | STATEFIP != 15)
## Add in state names
kk <- merge(share1950,states,by='STATEFIP')
## Remove Alaska and Hawaii from map data
map <- map %>%
filter(id != 'Alaska' & id != 'Hawaii')
ll <- merge(kk, map, by.x='NAME', by.y='id', all.y = TRUE)
mm <- mutate(ll,section=cut(SHAREMEX, c(0,.1,.2,.4, .9), right=TRUE))
nn <- mutate(mm,section=factor(section))
## Reorder data
oo <- nn[order(nn$order), ]
mapShare1950 <- ggplot() +
geom_polygon(data = oo,
aes(x = long, y = lat, group = group, fill = section),
color = "black", size = 0.25) +
labs(title='Percent of Foreign-Born State Population Born in Mexico, 1950') +
scale_fill_brewer(name = 'Share Mexican\nImmigrants', palette = 'Greens',
labels = c('<10%','10%-19.99%','20%-39.99%', '>=40%') ) +
theme_nothing(legend = TRUE) +
guides(fill = guide_legend(override.aes = list(colour = NULL)))
ggsave(file="share1950.png",dpi=300)
print(mapShare1950)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment