Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

Created March 12, 2016 16:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/091ab0861c6f5a439499 to your computer and use it in GitHub Desktop.
Save anonymous/091ab0861c6f5a439499 to your computer and use it in GitHub Desktop.
#Code for Project 1
#install packages - this only needs to run once per machine
install.packages('readr')
install.packages('dplyr')
install.packages('ggplot2')
install.packages('scales')
install.packages('grid')
#load packages
library(readr)
library(dplyr)
library(ggplot2)
library(scales)
library(grid)
#set working directory
setwd('/Users/robertmoffitt/Desktop/R Code')
#read in data
a <- read.csv('usa_00002.csv')
head(a)
#select year, perwt, sex, race and region
b <- select(a,YEAR,PERWT,SEX,RACE,REGION)
head(c)
#factor sex variable
c <- mutate(b,SEXF=factor(SEX,labels=c('male','female')))
head(c)
#factor RACE variable to just white and black
d <- mutate(c,Race=ifelse(RACE==1,'white',ifelse(RACE==2,'black','other')))
#create new variable for region
e <- mutate(d,REGIONA=ifelse(REGION<=13,'Northeast',ifelse(REGION<=23,'Midwest',ifelse(REGION<=34,'South',ifelse(REGION<=43,'West','other')))))
#take out regions I am not looking at
f <- filter(e,REGIONA!='other')
#select new variables
g <- select(f,YEAR,PERWT,SEXF,Race,REGIONA)
head(e)
#sum across unique combinations of year, sex, race and region
h <- summarise(group_by(g,YEAR,SEXF,Race,REGIONA),NUMBER=sum(PERWT))
head(g)
#Graph with race as the filled variable, and by gender and region
l <- ggplot(h,aes(x=YEAR,y=NUMBER,fill=Race)) +
geom_bar(stat='identity') +
facet_grid(SEXF~.~REGIONA)
print(l)
n <- ggplot(h,aes(x=YEAR,y=NUMBER,fill=Race)) +
geom_bar(stat='identity',position="fill") +
facet_grid(SEXF~.~REGIONA) +
scale_y_continuous(labels = scales::percent)
print(n)
#Add labels to the non-percent Graph
m <- l + labs(title='Population by Race, Gender and Region',x='Year',y='Number')
print(m)
#Add labels to percent Graph
o <- n + labs(title='Population by Race, Gender and Region',x='Year',y='Number')
print(o)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment