Last active
February 29, 2016 17:11
-
-
Save jmcinerney14/2303ee01facd07aea2ab to your computer and use it in GitHub Desktop.
Women_Emp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#load packages | |
library(ggplot2) | |
library(RColorBrewer) | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(scales) | |
#Import data | |
raw_data <- read.csv("usa_00016.csv") | |
#Remove data from Alaska and Hawaii before 1960, Only people over 18, Only Women, Only people in the workforce | |
a <- filter(raw_data,STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2,15)))) %>% | |
filter(AGE>=18) %>% | |
filter(SEX==2) %>% | |
filter(EMPSTAT %in% c(1,2)) | |
#Narrow data set down to identified columns of variables | |
b <- select(a, YEAR,PERWT,RACE,BPL,EMPSTAT) | |
#Create new column "Identity" based on the qualities of RACE and BPL indicated | |
c <- mutate(b, Identity = ifelse(RACE==1 & BPL<=99, "White Native Born", | |
ifelse(RACE==1 & BPL>99, "White Foreign Born", | |
ifelse(RACE!=1 & BPL<=99, "Nonwhite Naitive Born","Nonwhite Foreign Born")))) | |
#Create new columns for individual Employment and Unemployment | |
d <- mutate(c,EMP=ifelse(EMPSTAT==1,PERWT,0), UNEMP= ifelse(EMPSTAT==2,PERWT,0)) | |
#Sum EMP and UNEMP to get population in those categories for unique YEAR and Identity | |
e <- summarise(group_by(d,YEAR,Identity),EMP=sum(EMP),UNEMP=sum(UNEMP)) | |
#Create new column for total by adding together Employment and unemployment | |
f <- mutate(e, TOTAL =EMP+UNEMP ) | |
#Create new column for employment rate by dividing employment pop by total pop | |
g <- mutate(f, EMPR=EMP/TOTAL) | |
#Create a line graph showing employment rate over time for identiy groups | |
ggplot(g,aes(x=YEAR, y=EMPR, color=Identity)) + geom_line() +geom_point() + | |
scale_y_continuous(labels= scales::percent, limits=c(0,1))+ | |
labs(title='US Employment Rate by Race and Nativity', x="Year", y='Employment Rate') | |
#Create a focussed line graph showing employment rate over time for identiy groups | |
ggplot(g,aes(x=YEAR, y=EMPR, color=Identity)) + geom_line() +geom_point() + | |
scale_y_continuous(labels= scales::percent)+ | |
labs(title='US Employment Rate by Race and Nativity', x="Year", y='Employment Rate') | |
#Narrow data set down to identified columns of variables | |
h <- select(a,REGION,YEAR,PERWT,RACE,BPL,EMPSTAT) | |
#Create new column "Identity" based on the qualities of RACE and BPL indicated | |
i <- mutate(h, Identity = ifelse(RACE==1 & BPL<=99, "White Native Born", | |
ifelse(RACE==1 & BPL>99, "White Foreign Born", | |
ifelse(RACE!=1 & BPL<=99, "Nonwhite Naitive Born","Nonwhite Foreign Born")))) | |
#Create new columns for individual Employment and Unemployment | |
j <- mutate(i,EMP=ifelse(EMPSTAT==1,PERWT,0), UNEMP= ifelse(EMPSTAT==2,PERWT,0)) | |
#Assign these titles to the specified numbers | |
Northeast <- c(11:13) | |
Midwest <-c(21:23) | |
South <- c(31:34) | |
West <- c(41:43) | |
#create a new variable Region, it's value is the previously assigned titles according to the REGION variable | |
k <-mutate(j,Region=ifelse(REGION %in% Northeast,"Northeast", | |
ifelse(REGION %in% Midwest,"Midwest", | |
ifelse(REGION %in% South, "South", | |
ifelse(REGION %in% West, "West", "Other"))))) | |
#Narrow the data set to the identified columns | |
kk <- select(k,YEAR,Identity,Region,EMP,UNEMP) | |
#Sum EMP and UNEMP to get population in those categories for unique YEAR, Region and Identity | |
l <- summarise(group_by(kk,YEAR,Identity,Region),EMP=sum(EMP),UNEMP=sum(UNEMP)) | |
#Create new column for total by adding together Employment and unemployment | |
m <- mutate(l, TOTAL =EMP+UNEMP) | |
#Create new column for employment rate by dividing employment pop by total pop | |
n <- mutate(m,EMPR=EMP/TOTAL) | |
#Create a line graph showing employment rate over time for identiy groups in each region | |
ggplot(n,aes(x=YEAR, y=EMPR, color=Identity)) + geom_line() +geom_point() + | |
scale_y_continuous(labels= scales::percent, limits=c(0,1))+ | |
labs(title='US Employment Rate by Race and Nativity', x="Year", y='Employment Rate')+ | |
facet_grid(.~Region) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment