Created
March 12, 2016 00:37
-
-
Save anonymous/84d9f59a84df2c2e149f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Code for Project 2 | |
#install packages - this only needs to run once per machine | |
install.packages('readr') | |
install.packages('dplyr') | |
install.packages('ggplot2') | |
install.packages('scales') | |
install.packages('grid') | |
#load packages | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(scales) | |
library(grid) | |
#set working directory | |
setwd('/Users/robertmoffitt/Desktop/R Code/') | |
#read in data (10-2 has age, 00006 does not have age) | |
a <- read.csv('usa_00010-2.csv') | |
head(a) | |
#select year, perwt, sex, and birthplace | |
b <- select(a,YEAR,PERWT,SEX,BPL) | |
head(c) | |
#factor sex variable | |
c <- mutate(b,SEXF=factor(SEX,labels=c('male','female'))) | |
head(c) | |
#factor BPL variable to separate categories | |
d <- mutate(c,BIRTHPLACE=ifelse(BPL<=99,'U.S. Born',ifelse(BPL<=199,'Other NA',ifelse(BPL<=300,'Central and South America',ifelse(BPL<=405,'Northern EU',ifelse(BPL<=419,'UK & Ireland',ifelse(BPL<=439,'Western EU',ifelse(BPL<=499,'Central/Eastern EU',ifelse(BPL<=509,'East Asia',ifelse(BPL<=599,'Other Asia','Other')))))))))) | |
#select year, perwt, sex and birthplace variables | |
e <- select(d,YEAR,PERWT,SEXF,BIRTHPLACE) | |
head(e) | |
#sum across unique combinations of year, sex and birthplace | |
f <- summarise(group_by(e,YEAR,SEXF,BIRTHPLACE),NUMBER=sum(PERWT)) | |
head(g) | |
#Graph with birthplace as the filled variable by gender (non-percent) | |
g <- ggplot(f,aes(x=YEAR,y=NUMBER,fill=BIRTHPLACE)) + geom_bar(stat='identity') + facet_grid(~SEXF) | |
print(g) | |
#Add labels to non percent graph | |
x <- g + labs(title='Population by Birthplace and Gender',x='Year',y='Population') | |
print(x) | |
#Graph with birthplace as filled variable by gender as percent of total population | |
h <- ggplot(f,aes(x=YEAR,y=NUMBER,fill=BIRTHPLACE)) + | |
geom_bar(stat='identity',position="fill") + | |
facet_grid(~SEXF) + | |
scale_y_continuous(labels = scales::percent) | |
print(h) | |
#Add labels to percent graph with U.S. Born | |
i <- h + labs(title='Population by Birthplace and Gender',x='Year',y='Percent of Population') | |
print(i) | |
#Filter out U.S. Born persons | |
j <- filter(f,BIRTHPLACE!='U.S. Born') | |
#Graph with birthplace as filled variable by gender as percent of total population | |
k <- ggplot(j,aes(x=YEAR,y=NUMBER,fill=BIRTHPLACE)) + | |
geom_bar(stat='identity',position="fill") + | |
facet_grid(~SEXF) + | |
scale_y_continuous(labels = scales::percent) | |
print(k) | |
#Add labels to Percent Graph without U.S. Born persons | |
l <- k + labs(title='Immigrant Population by Birthplace and Gender',x='Year',y='Percent of Population') | |
print(l) | |
#Graph with birthplace as filled variable by gender, not as percents | |
m <- ggplot(j,aes(x=YEAR,y=NUMBER,fill=BIRTHPLACE)) + geom_bar(stat='identity') + facet_grid(~SEXF) | |
print(m) | |
#Add labels to non-percent Immigration population graph | |
n <- m + labs(title='Immigrant Population by Birthplace and Gender',x='Year',y='Number') | |
print(n) | |
#select year, perwt, sex, and birthplace | |
b <- select(a,YEAR,PERWT,SEX,BPL,AGE) | |
head(c) | |
#factor sex variable | |
c <- mutate(b,SEXF=factor(SEX,labels=c('male','female'))) | |
head(c) | |
#factor BPL variable to separate categories | |
d <- mutate(c,BIRTHPLACE=ifelse(BPL<=99,'U.S. Born',ifelse(BPL<=199,'Other NA',ifelse(BPL<=300,'Central and South America',ifelse(BPL<=499,'Europe','Other'))))) | |
#filter out U.S. Born population | |
e <- filter(d,BIRTHPLACE!='U.S. Born' & BIRTHPLACE!='Other NA') | |
#Age categories using floor function | |
f <- mutate(e,AGECAT=ifelse(AGE >= 90,9,floor(AGE/10))) | |
#Create age categories for population pyramid | |
agec <- c('0-9','10-19','20-29','30-39','40-49','50-59','60-69','70-79','80-89','90+') | |
#Factor agecat | |
g <- mutate(f,AGECAT=factor(AGECAT,labels=agec)) | |
head(e) | |
#select year, perwt, sex and birthplace variables | |
h <- select(g,YEAR,PERWT,SEXF,BIRTHPLACE,AGECAT) | |
#summarise by year, sex, age and birthplace, sum over perwt | |
i <- summarise(group_by(h,YEAR,SEXF,AGECAT,BIRTHPLACE),NUMBER=sum(PERWT)) | |
#filter to 1920 and 1940 | |
j <- filter(i,YEAR==1920| YEAR==1940) | |
#Graph age categories | |
k <- ggplot(j,aes(x=AGECAT,y=NUMBER)) + geom_bar(stat='identity') | |
print(k) | |
#Population pyramid | |
pdata <- mutate(j,NUMBER=ifelse(SEXF=='male',0-NUMBER,NUMBER)) | |
k <- ggplot(pdata, aes(x=AGECAT,y=NUMBER,fill=SEXF)) + | |
geom_bar(data=pdata[pdata$SEXF=='female',],stat='identity') + | |
geom_bar(data=pdata[pdata$SEXF=='male',],stat='identity') + | |
coord_flip() + | |
facet_grid(YEAR~.~BIRTHPLACE) + | |
scale_y_continuous(labels=c(1000000,0,1000000)) | |
print(k) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment