Skip to content

Instantly share code, notes, and snippets.

Created March 9, 2016 16:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/f4848b41de0e6746edc9 to your computer and use it in GitHub Desktop.
Save anonymous/f4848b41de0e6746edc9 to your computer and use it in GitHub Desktop.
#Code for project 3
#install packages - this only needs to run once per machine
install.packages('readr')
install.packages('dplyr')
install.packages('ggplot2')
install.packages('scales')
install.packages('grid')
#load packages
library(readr)
library(dplyr)
library(ggplot2)
library(scales)
library(grid)
#set working directory
setwd('Users/robertmoffitt/Desktop/R Code')
#read in data
a <- read.csv('usa_00009.csv')
head(a)
#select year, perwt, sex, and birthplace
b <- select(a,YEAR,PERWT,SEX,BPL,AGE,OCC1950)
head(c)
#factor sex variable
c <- mutate(b,SEXF=factor(SEX,labels=c('male','female')))
head(c)
#filter out the male population
d <- filter(c,SEXF!='male')
#factor BPL variable to separate categories
e <- mutate(d,BIRTHPLACE=ifelse(BPL<=99,'U.S. Born',ifelse(BPL<=199,'Other NA',ifelse(BPL<=300,'Central and South America',ifelse(BPL<=405,'Northern EU',ifelse(BPL<=419,'UK & Ireland',ifelse(BPL<=439,'Western EU',ifelse(BPL<=499,'Central/Eastern EU',ifelse(BPL<=509,'East Asia',ifelse(BPL<=599,'Other Asia','Other'))))))))))
#Filter out U.S. Born persons
f <- filter(e,BIRTHPLACE!='U.S. Born')
#factor the OCC1950 variable, categories?
g <- mutate(f,OCCUPATION=ifelse(OCC1950<=99,'Professional',ifelse(OCC1950<=123,'Farmers/Farm Labor',ifelse(OCC1950<=290,'Managers/Proprietors',ifelse(OCC1950<=390,'Clerical',ifelse(OCC1950<=490,'Sales',ifelse(OCC1950<=595,'Crafts',ifelse(OCC1950<=690,'Operatives/Laborers',ifelse(OCC1950<=720,'Private Service',ifelse(OCC1950<=790,'Public Service',ifelse(OCC1950<=970,'Operatives/Laborers','non-occupational')))))))))))
#Filter non-occupational
h <- filter(g,OCCUPATION!='non-occupational')
#Age categories using floor function
i <- mutate(h,AGECAT=ifelse(AGE >= 90,9,floor(AGE/10)))
#Create age categories
AGEC <- c('0-9','10-19','20-29','30-39','40-49','50-59','60-69','70-79','80-89','90+')
#Attach labels fo AGECAT
j <- mutate(i,AGECAT=factor(AGECAT,labels=AGEC))
#Filter out non working age categories
k <- filter(j,AGECAT!='0-9' & AGECAT!='10-19' & AGECAT!='70-79' & AGECAT!='80-89' & AGECAT!='90+')
#select year, perwt, sex, birthplace, age and occupation variables
l <- select(k,YEAR,PERWT,SEXF,BIRTHPLACE,AGECAT,OCCUPATION)
head(e)
#sum across unique combinations of year, sex, birthplace and age
m <- summarise(group_by(l,YEAR,SEXF,BIRTHPLACE,AGECAT,OCCUPATION),NUMBER=sum(PERWT))
head(m)
#Create a data set for each year
z <- filter(m,YEAR==1900)
x <- filter(m,YEAR==1910)
y <- filter(m,YEAR==1920)
q <- filter(m,YEAR==1930)
o <- filter(m,YEAR==1940)
n <- filter(m,YEAR==1950)
p <- filter(m,YEAR==1960)
#Graph with birthplace as the filled variable by age (non-percent) for each year
s <- ggplot(z,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) +
geom_bar(stat='identity') +
facet_grid(YEAR~.~AGECAT) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number')
print(s)
s <- ggplot(x,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) +
geom_bar(stat='identity') +
facet_grid(YEAR~.~AGECAT) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number')
print(s)
s <- ggplot(y,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) +
geom_bar(stat='identity') +
facet_grid(YEAR~.~AGECAT) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number')
print(s)
s <- ggplot(q,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) +
geom_bar(stat='identity') +
facet_grid(YEAR~.~AGECAT) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number')
print(s)
s <- ggplot(o,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) +
geom_bar(stat='identity') +
facet_grid(YEAR~.~AGECAT) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number')
print(s)
s <- ggplot(n,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) +
geom_bar(stat='identity') +
facet_grid(YEAR~.~AGECAT) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number')
print(s)
s <- ggplot(p,aes(x=OCCUPATION,y=NUMBER,fill=BIRTHPLACE)) +
geom_bar(stat='identity') +
facet_grid(YEAR~.~AGECAT) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title='Female Immigrant Population Labor Force',x='Occupation',y='Number')
print(s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment