Last active
February 26, 2016 03:05
-
-
Save jmcinerney14/ea5e0156f95515692de6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#load packages | |
library(ggplot2) | |
library(RColorBrewer) | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(scales) | |
#Read data | |
a <- read.csv('Mexican2.csv') | |
BPL<-read_csv("BPL.csv") | |
#Remove data from Alaska and Hawaii before 1960 | |
b <- filter(a,STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2,15)))) | |
#Merge the census data that excludes Alaska and Hawaii with the names of birthplace | |
c <- merge(b, BPL, all.x = TRUE) | |
#New data set that only includes BPL, YEAR, PERWT and Name | |
d <-select(c,BPL,YEAR,PERWT,Name) | |
#Assign BPL number to categorical title | |
Mexico <- c(200) | |
Latin_America <- c(210:300) | |
US <- c(1:99) | |
e<-mutate(d,Region=ifelse(BPL %in% US,"US", | |
ifelse(BPL %in% Mexico, "Mexico", | |
ifelse(BPL %in% Latin_America, "Latin America","Other")))) | |
#Add PERWT to find population with regard to each unique combination of YEAR and Region | |
f <- summarise(group_by(e,YEAR,Region),NUMBER=sum(PERWT)) | |
#Add NUMBER to find total population each year | |
g<- summarise(group_by(f,YEAR),TOTAL=sum(NUMBER)) | |
#merge both data sets together | |
h <- merge(f,g,by='YEAR') | |
#Remove anyone with BPL in US | |
i <- filter(h, Region!="US") | |
#Calculate Percent of the total population | |
ii <- mutate(i, percent =NUMBER/TOTAL) | |
#Graph immigrant population as percentage of total population | |
plot<- ggplot(i,aes(x=YEAR,y=NUMBER/TOTAL,fill=Region)) + | |
geom_bar(stat='identity') + | |
scale_y_continuous(labels = scales::percent) + | |
scale_x_continuous(breaks=c(1910,1930,1950,1970))+ | |
labs(title='Percentage of Immigrant Population Categorized by Place of Birth',x='Census Year',y='Percentage of Total American Population')+ | |
scale_fill_brewer(palette="Set1")+ | |
theme_bw() | |
print(plot) | |
#Population Pyramid | |
#Create data set that only includes BPL, YEAR, PERWT, SEX, AGE | |
d <- select(c,BPL,YEAR,PERWT,SEX,AGE) | |
#Add Male and Female Labels to new column SEXF | |
e <- mutate(d,SEXF=factor(SEX,labels=c('male','female'))) | |
#Select the indicated columns | |
f<- select(e,BPL,YEAR,PERWT,AGE,SEXF) | |
#Rename the SEXF column to SEX | |
g<-rename(f, Sex=SEXF) | |
#Create column for Region, label according to BPL as either Mexico or Other | |
h<-mutate(g,Region=ifelse(BPL %in% Mexico,"Mexico","Other")) | |
#Remove anyone labeled as other from the data set | |
i <- filter(h, Region!="Other") | |
#Create data set that only includes YEAR, AGE, SEX, PERWT | |
j <- select(i,YEAR,AGE,Sex,PERWT) | |
#Create categories for age | |
k <- mutate(j,AGECAT=ifelse(AGE >= 90,9,floor(AGE/10))) | |
agec <- c('0-9','10-19','20-29','30-39','40-49','50-59','60-69','70-79','80-89','90+') | |
agec <- '0-9' | |
for(i in 1:8) { | |
agec <- c(agec,paste(i,'0-',i,'9',sep='')) | |
} | |
agec <- c(agec,'90+') | |
#Label data set using new age categories | |
l <- mutate(k,AGECAT=factor(AGECAT,labels=agec)) | |
#Add PERWT to find population with regard to each unique combination of YEAR, SEX, and AGECAT | |
m <- summarise(group_by(l,YEAR,Sex,AGECAT),NUMBER=sum(PERWT)) | |
mm<-filter(m, YEAR>1920) | |
#Graph data as a population Pyramid | |
pdata <- mutate(mm,NUMBER=ifelse(Sex=='male',0-NUMBER,NUMBER)) | |
o <- ggplot(pdata, aes(x=AGECAT,y=NUMBER,fill=Sex)) + | |
geom_bar(data=pdata[pdata$Sex=='female',],stat='identity') + | |
geom_bar(data=pdata[pdata$Sex=='male',],stat='identity') | |
#Flip Coordinates | |
p <- o + coord_flip() | |
#Fix y-axis | |
q <- p + scale_y_continuous(labels=c(200000,100000,0,100000,200000)) | |
#Facet by year | |
r <- q + facet_grid(YEAR~.) | |
#Label Axis | |
s<-r + labs(title='US Population Born in Mexico by Sex and Age',x='Reported Age',y='Population')+ | |
scale_y_continuous(labels=comma)+ | |
theme_bw() | |
print(s) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment