Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

Created March 12, 2016 00:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/84d9f59a84df2c2e149f to your computer and use it in GitHub Desktop.
Save anonymous/84d9f59a84df2c2e149f to your computer and use it in GitHub Desktop.
#Code for Project 2
#install packages - this only needs to run once per machine
install.packages('readr')
install.packages('dplyr')
install.packages('ggplot2')
install.packages('scales')
install.packages('grid')
#load packages
library(readr)
library(dplyr)
library(ggplot2)
library(scales)
library(grid)
#set working directory
setwd('/Users/robertmoffitt/Desktop/R Code/')
#read in data (10-2 has age, 00006 does not have age)
a <- read.csv('usa_00010-2.csv')
head(a)
#select year, perwt, sex, and birthplace
b <- select(a,YEAR,PERWT,SEX,BPL)
head(c)
#factor sex variable
c <- mutate(b,SEXF=factor(SEX,labels=c('male','female')))
head(c)
#factor BPL variable to separate categories
d <- mutate(c,BIRTHPLACE=ifelse(BPL<=99,'U.S. Born',ifelse(BPL<=199,'Other NA',ifelse(BPL<=300,'Central and South America',ifelse(BPL<=405,'Northern EU',ifelse(BPL<=419,'UK & Ireland',ifelse(BPL<=439,'Western EU',ifelse(BPL<=499,'Central/Eastern EU',ifelse(BPL<=509,'East Asia',ifelse(BPL<=599,'Other Asia','Other'))))))))))
#select year, perwt, sex and birthplace variables
e <- select(d,YEAR,PERWT,SEXF,BIRTHPLACE)
head(e)
#sum across unique combinations of year, sex and birthplace
f <- summarise(group_by(e,YEAR,SEXF,BIRTHPLACE),NUMBER=sum(PERWT))
head(g)
#Graph with birthplace as the filled variable by gender (non-percent)
g <- ggplot(f,aes(x=YEAR,y=NUMBER,fill=BIRTHPLACE)) + geom_bar(stat='identity') + facet_grid(~SEXF)
print(g)
#Add labels to non percent graph
x <- g + labs(title='Population by Birthplace and Gender',x='Year',y='Population')
print(x)
#Graph with birthplace as filled variable by gender as percent of total population
h <- ggplot(f,aes(x=YEAR,y=NUMBER,fill=BIRTHPLACE)) +
geom_bar(stat='identity',position="fill") +
facet_grid(~SEXF) +
scale_y_continuous(labels = scales::percent)
print(h)
#Add labels to percent graph with U.S. Born
i <- h + labs(title='Population by Birthplace and Gender',x='Year',y='Percent of Population')
print(i)
#Filter out U.S. Born persons
j <- filter(f,BIRTHPLACE!='U.S. Born')
#Graph with birthplace as filled variable by gender as percent of total population
k <- ggplot(j,aes(x=YEAR,y=NUMBER,fill=BIRTHPLACE)) +
geom_bar(stat='identity',position="fill") +
facet_grid(~SEXF) +
scale_y_continuous(labels = scales::percent)
print(k)
#Add labels to Percent Graph without U.S. Born persons
l <- k + labs(title='Immigrant Population by Birthplace and Gender',x='Year',y='Percent of Population')
print(l)
#Graph with birthplace as filled variable by gender, not as percents
m <- ggplot(j,aes(x=YEAR,y=NUMBER,fill=BIRTHPLACE)) + geom_bar(stat='identity') + facet_grid(~SEXF)
print(m)
#Add labels to non-percent Immigration population graph
n <- m + labs(title='Immigrant Population by Birthplace and Gender',x='Year',y='Number')
print(n)
#select year, perwt, sex, and birthplace
b <- select(a,YEAR,PERWT,SEX,BPL,AGE)
head(c)
#factor sex variable
c <- mutate(b,SEXF=factor(SEX,labels=c('male','female')))
head(c)
#factor BPL variable to separate categories
d <- mutate(c,BIRTHPLACE=ifelse(BPL<=99,'U.S. Born',ifelse(BPL<=199,'Other NA',ifelse(BPL<=300,'Central and South America',ifelse(BPL<=499,'Europe','Other')))))
#filter out U.S. Born population
e <- filter(d,BIRTHPLACE!='U.S. Born' & BIRTHPLACE!='Other NA')
#Age categories using floor function
f <- mutate(e,AGECAT=ifelse(AGE >= 90,9,floor(AGE/10)))
#Create age categories for population pyramid
agec <- c('0-9','10-19','20-29','30-39','40-49','50-59','60-69','70-79','80-89','90+')
#Factor agecat
g <- mutate(f,AGECAT=factor(AGECAT,labels=agec))
head(e)
#select year, perwt, sex and birthplace variables
h <- select(g,YEAR,PERWT,SEXF,BIRTHPLACE,AGECAT)
#summarise by year, sex, age and birthplace, sum over perwt
i <- summarise(group_by(h,YEAR,SEXF,AGECAT,BIRTHPLACE),NUMBER=sum(PERWT))
#filter to 1920 and 1940
j <- filter(i,YEAR==1920| YEAR==1940)
#Graph age categories
k <- ggplot(j,aes(x=AGECAT,y=NUMBER)) + geom_bar(stat='identity')
print(k)
#Population pyramid
pdata <- mutate(j,NUMBER=ifelse(SEXF=='male',0-NUMBER,NUMBER))
k <- ggplot(pdata, aes(x=AGECAT,y=NUMBER,fill=SEXF)) +
geom_bar(data=pdata[pdata$SEXF=='female',],stat='identity') +
geom_bar(data=pdata[pdata$SEXF=='male',],stat='identity') +
coord_flip() +
facet_grid(YEAR~.~BIRTHPLACE) +
scale_y_continuous(labels=c(1000000,0,1000000))
print(k)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment