Skip to content

Instantly share code, notes, and snippets.

@jpearl1395
Last active March 5, 2016 21:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jpearl1395/c72cea4498a0345c79f0 to your computer and use it in GitHub Desktop.
Save jpearl1395/c72cea4498a0345c79f0 to your computer and use it in GitHub Desktop.
R Code for HIST 90.01 Race Assignment
library(readr)
library(dplyr)
library(ggplot2)
library(scales)
library(grid)
library(RColorBrewer)
setwd('/Users/joshpearl/Documents/Dartmouth/Junior Year 16W-16X/16W/HIST 90.01- US Hist Through Census/Race Assignment')
a<-read_csv('usa_00004.csv')
# Remove Alaska, Hawaii and overseas military
b<-filter(a, STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2,15))))
c<-select(b, YEAR, PERWT, RACED)
unique(c$RACED)
# Labels must go in numeric order not the order given by the code
# Get proper labels to count NA from 1850 onwards
d<-mutate(c, RACED=factor(RACED, labels=c("White",
"White (Blank)", "Portuguese", "Mexican", "Puerto Rican", "Black",
"Mulatto", "Native American", "Chinese", "Japanese", "Filipino",
"Asian Indian", "Korean", "Hawaiian", "Other")))
# To get NA compared to everyone else (all the other races), create
# new variable where the variable is NA and another variable for !=NA
# and then sum those variables
e<-summarise(group_by(d, YEAR, RACED), NUMBER=sum(PERWT))
# Separate NA from all other races
f<-mutate(e, NATIVE=ifelse(RACED=="Native American", "Native American",
"Rest of Population (Not Native)"))
g<-summarise(group_by(f, YEAR, NATIVE), POPULATION=sum(NUMBER))
h<-ggplot(g, aes(x=YEAR, y=POPULATION, fill=NATIVE)) + geom_bar(stat="identity") +
labs(title="Native American Population Compared to U.S. Population", x="Year", y="Population") +
guides(fill=guide_legend(reverse=TRUE))
print(h)
#Create graph of just Native American population over time, must isolate Native American variable, delete FALSE rows
i<-summarise(group_by(g, YEAR, NATIVE=='Native American', POPULATION))
j<-i[i$'NATIVE == "Native American"',]
k<-j[-2]
#Graph just Native American population
l<-ggplot(k, aes(x=YEAR, y=POPULATION)) +
geom_bar(stat='identity') +
labs(title="Native American Population 1850 - 1950", x='Year', y='Population') +
scale_y_continuous(labels=scales::comma) + scale_fill_brewer(palette='Greens')
print(l)
#Graph percentages and population of Native Americans and other races, using RACED
m<-select(b, YEAR, RACED, PERWT)
unique(c$RACED)
n<-mutate(m, RACED=factor(RACED, labels=c("White",
"White (Blank)", "Portuguese", "Mexican", "Puerto Rican", "Black",
"Mulatto", "Native American", "Chinese", "Japanese", "Filipino",
"Asian Indian", "Korean", "Hawaiian", "Other")))
o<-summarise(group_by(n, YEAR, RACED), NUMBER=sum(PERWT))
p<-summarise(group_by(n, YEAR), TOTALPOP=sum(PERWT))
q<- merge(o, p, by='YEAR')
r<-ggplot(q, aes(x=YEAR, y=NUMBER, fill=RACED))+geom_bar(stat='identity')+geom_text(aes(label=ifelse(RACED=='Native American', paste(round(NUMBER/TOTALPOP*100, digits=0),
'%', sep=''), ''),y=NUMBER/2), size=4)+guides(fill=guide_legend(reverse=TRUE))+ggtitle("Percentage of Native Americans and Population Compared to all Races")
print(r)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment