Created
March 9, 2016 16:12
-
-
Save anonymous/f4848b41de0e6746edc9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Code for project 3 | |
#install packages - this only needs to run once per machine | |
install.packages('readr') | |
install.packages('dplyr') | |
install.packages('ggplot2') | |
install.packages('scales') | |
install.packages('grid') | |
#load packages | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(scales) | |
library(grid) | |
#set working directory | |
setwd('Users/robertmoffitt/Desktop/R Code') | |
#read in data | |
a <- read.csv('usa_00009.csv') | |
head(a) | |
#select year, perwt, sex, and birthplace | |
b <- select(a,YEAR,PERWT,SEX,BPL,AGE,OCC1950) | |
head(c) | |
#factor sex variable | |
c <- mutate(b,SEXF=factor(SEX,labels=c('male','female'))) | |
head(c) | |
#filter out the male population | |
d <- filter(c,SEXF!='male') | |
#factor BPL variable to separate categories | |
e <- mutate(d,BIRTHPLACE=ifelse(BPL<=99,'U.S. Born',ifelse(BPL<=199,'Other NA',ifelse(BPL<=300,'Central and South America',ifelse(BPL<=405,'Northern EU',ifelse(BPL<=419,'UK & Ireland',ifelse(BPL<=439,'Western EU',ifelse(BPL<=499,'Central/Eastern EU',ifelse(BPL<=509,'East Asia',ifelse(BPL<=599,'Other Asia','Other')))))))))) | |
#Filter out U.S. Born persons | |
f <- filter(e,BIRTHPLACE!='U.S. Born') | |
#factor the OCC1950 variable, categories? | |
g <- mutate(f,OCCUPATION=ifelse(OCC1950<=99,'Professional',ifelse(OCC1950<=123,'Farmers/Farm Labor',ifelse(OCC1950<=290,'Managers/Proprietors',ifelse(OCC1950<=390,'Clerical',ifelse(OCC1950<=490,'Sales',ifelse(OCC1950<=595,'Crafts',ifelse(OCC1950<=690,'Operatives/Laborers',ifelse(OCC1950<=720,'Private Service',ifelse(OCC1950<=790,'Public Service',ifelse(OCC1950<=970,'Operatives/Laborers','non-occupational'))))))))))) | |
#Filter non-occupational | |
h <- filter(g,OCCUPATION!='non-occupational') | |
#Age categories using floor function | |
i <- mutate(h,AGECAT=ifelse(AGE >= 90,9,floor(AGE/10))) | |
#Create age categories | |
AGEC <- c('0-9','10-19','20-29','30-39','40-49','50-59','60-69','70-79','80-89','90+') | |
#Attach labels fo AGECAT | |
j <- mutate(i,AGECAT=factor(AGECAT,labels=AGEC)) | |
#Filter out non working age categories | |
k <- filter(j,AGECAT!='0-9' & AGECAT!='10-19' & AGECAT!='70-79' & AGECAT!='80-89' & AGECAT!='90+') | |
#select year, perwt, sex, birthplace, age and occupation variables | |
l <- select(k,YEAR,PERWT,SEXF,BIRTHPLACE,AGECAT,OCCUPATION) | |
head(e) | |
#sum across unique combinations of year, sex, birthplace and age | |
m <- summarise(group_by(l,YEAR,SEXF,BIRTHPLACE,AGECAT,OCCUPATION),NUMBER=sum(PERWT)) | |
head(m) | |
#Create a data set for each year | |
z <- filter(m,YEAR==1900) | |
x <- filter(m,YEAR==1910) | |
y <- filter(m,YEAR==1920) | |
q <- filter(m,YEAR==1930) | |
o <- filter(m,YEAR==1940) | |
n <- filter(m,YEAR==1950) | |
p <- filter(m,YEAR==1960) | |
#Graph with birthplace as the filled variable by age (non-percent) for each year | |
s <- ggplot(z,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) + | |
geom_bar(stat='identity') + | |
facet_grid(YEAR~.~AGECAT) + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number') | |
print(s) | |
s <- ggplot(x,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) + | |
geom_bar(stat='identity') + | |
facet_grid(YEAR~.~AGECAT) + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number') | |
print(s) | |
s <- ggplot(y,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) + | |
geom_bar(stat='identity') + | |
facet_grid(YEAR~.~AGECAT) + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number') | |
print(s) | |
s <- ggplot(q,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) + | |
geom_bar(stat='identity') + | |
facet_grid(YEAR~.~AGECAT) + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number') | |
print(s) | |
s <- ggplot(o,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) + | |
geom_bar(stat='identity') + | |
facet_grid(YEAR~.~AGECAT) + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number') | |
print(s) | |
s <- ggplot(n,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) + | |
geom_bar(stat='identity') + | |
facet_grid(YEAR~.~AGECAT) + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number') | |
print(s) | |
s <- ggplot(p,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) + | |
geom_bar(stat='identity') + | |
facet_grid(YEAR~.~AGECAT) + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number') | |
print(s) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment