Skip to content

Instantly share code, notes, and snippets.

@jmcinerney14
Last active February 26, 2016 03:05
Show Gist options
  • Save jmcinerney14/ea5e0156f95515692de6 to your computer and use it in GitHub Desktop.
Save jmcinerney14/ea5e0156f95515692de6 to your computer and use it in GitHub Desktop.
#load packages
library(ggplot2)
library(RColorBrewer)
library(readr)
library(dplyr)
library(ggplot2)
library(scales)
#Read data
a <- read.csv('Mexican2.csv')
BPL<-read_csv("BPL.csv")
#Remove data from Alaska and Hawaii before 1960
b <- filter(a,STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2,15))))
#Merge the census data that excludes Alaska and Hawaii with the names of birthplace
c <- merge(b, BPL, all.x = TRUE)
#New data set that only includes BPL, YEAR, PERWT and Name
d <-select(c,BPL,YEAR,PERWT,Name)
#Assign BPL number to categorical title
Mexico <- c(200)
Latin_America <- c(210:300)
US <- c(1:99)
e<-mutate(d,Region=ifelse(BPL %in% US,"US",
ifelse(BPL %in% Mexico, "Mexico",
ifelse(BPL %in% Latin_America, "Latin America","Other"))))
#Add PERWT to find population with regard to each unique combination of YEAR and Region
f <- summarise(group_by(e,YEAR,Region),NUMBER=sum(PERWT))
#Add NUMBER to find total population each year
g<- summarise(group_by(f,YEAR),TOTAL=sum(NUMBER))
#merge both data sets together
h <- merge(f,g,by='YEAR')
#Remove anyone with BPL in US
i <- filter(h, Region!="US")
#Calculate Percent of the total population
ii <- mutate(i, percent =NUMBER/TOTAL)
#Graph immigrant population as percentage of total population
plot<- ggplot(i,aes(x=YEAR,y=NUMBER/TOTAL,fill=Region)) +
geom_bar(stat='identity') +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(breaks=c(1910,1930,1950,1970))+
labs(title='Percentage of Immigrant Population Categorized by Place of Birth',x='Census Year',y='Percentage of Total American Population')+
scale_fill_brewer(palette="Set1")+
theme_bw()
print(plot)
#Population Pyramid
#Create data set that only includes BPL, YEAR, PERWT, SEX, AGE
d <- select(c,BPL,YEAR,PERWT,SEX,AGE)
#Add Male and Female Labels to new column SEXF
e <- mutate(d,SEXF=factor(SEX,labels=c('male','female')))
#Select the indicated columns
f<- select(e,BPL,YEAR,PERWT,AGE,SEXF)
#Rename the SEXF column to SEX
g<-rename(f, Sex=SEXF)
#Create column for Region, label according to BPL as either Mexico or Other
h<-mutate(g,Region=ifelse(BPL %in% Mexico,"Mexico","Other"))
#Remove anyone labeled as other from the data set
i <- filter(h, Region!="Other")
#Create data set that only includes YEAR, AGE, SEX, PERWT
j <- select(i,YEAR,AGE,Sex,PERWT)
#Create categories for age
k <- mutate(j,AGECAT=ifelse(AGE >= 90,9,floor(AGE/10)))
agec <- c('0-9','10-19','20-29','30-39','40-49','50-59','60-69','70-79','80-89','90+')
agec <- '0-9'
for(i in 1:8) {
agec <- c(agec,paste(i,'0-',i,'9',sep=''))
}
agec <- c(agec,'90+')
#Label data set using new age categories
l <- mutate(k,AGECAT=factor(AGECAT,labels=agec))
#Add PERWT to find population with regard to each unique combination of YEAR, SEX, and AGECAT
m <- summarise(group_by(l,YEAR,Sex,AGECAT),NUMBER=sum(PERWT))
mm<-filter(m, YEAR>1920)
#Graph data as a population Pyramid
pdata <- mutate(mm,NUMBER=ifelse(Sex=='male',0-NUMBER,NUMBER))
o <- ggplot(pdata, aes(x=AGECAT,y=NUMBER,fill=Sex)) +
geom_bar(data=pdata[pdata$Sex=='female',],stat='identity') +
geom_bar(data=pdata[pdata$Sex=='male',],stat='identity')
#Flip Coordinates
p <- o + coord_flip()
#Fix y-axis
q <- p + scale_y_continuous(labels=c(200000,100000,0,100000,200000))
#Facet by year
r <- q + facet_grid(YEAR~.)
#Label Axis
s<-r + labs(title='US Population Born in Mexico by Sex and Age',x='Reported Age',y='Population')+
scale_y_continuous(labels=comma)+
theme_bw()
print(s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment