-
-
Save jpearl1395/c72cea4498a0345c79f0 to your computer and use it in GitHub Desktop.
R Code for HIST 90.01 Race Assignment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(scales) | |
library(grid) | |
library(RColorBrewer) | |
setwd('/Users/joshpearl/Documents/Dartmouth/Junior Year 16W-16X/16W/HIST 90.01- US Hist Through Census/Race Assignment') | |
a<-read_csv('usa_00004.csv') | |
# Remove Alaska, Hawaii and overseas military | |
b<-filter(a, STATEFIP < 60 & (YEAR >= 1960 | !(STATEFIP %in% c(2,15)))) | |
c<-select(b, YEAR, PERWT, RACED) | |
unique(c$RACED) | |
# Labels must go in numeric order not the order given by the code | |
# Get proper labels to count NA from 1850 onwards | |
d<-mutate(c, RACED=factor(RACED, labels=c("White", | |
"White (Blank)", "Portuguese", "Mexican", "Puerto Rican", "Black", | |
"Mulatto", "Native American", "Chinese", "Japanese", "Filipino", | |
"Asian Indian", "Korean", "Hawaiian", "Other"))) | |
# To get NA compared to everyone else (all the other races), create | |
# new variable where the variable is NA and another variable for !=NA | |
# and then sum those variables | |
e<-summarise(group_by(d, YEAR, RACED), NUMBER=sum(PERWT)) | |
# Separate NA from all other races | |
f<-mutate(e, NATIVE=ifelse(RACED=="Native American", "Native American", | |
"Rest of Population (Not Native)")) | |
g<-summarise(group_by(f, YEAR, NATIVE), POPULATION=sum(NUMBER)) | |
h<-ggplot(g, aes(x=YEAR, y=POPULATION, fill=NATIVE)) + geom_bar(stat="identity") + | |
labs(title="Native American Population Compared to U.S. Population", x="Year", y="Population") + | |
guides(fill=guide_legend(reverse=TRUE)) | |
print(h) | |
#Create graph of just Native American population over time, must isolate Native American variable, delete FALSE rows | |
i<-summarise(group_by(g, YEAR, NATIVE=='Native American', POPULATION)) | |
j<-i[i$'NATIVE == "Native American"',] | |
k<-j[-2] | |
#Graph just Native American population | |
l<-ggplot(k, aes(x=YEAR, y=POPULATION)) + | |
geom_bar(stat='identity') + | |
labs(title="Native American Population 1850 - 1950", x='Year', y='Population') + | |
scale_y_continuous(labels=scales::comma) + scale_fill_brewer(palette='Greens') | |
print(l) | |
#Graph percentages and population of Native Americans and other races, using RACED | |
m<-select(b, YEAR, RACED, PERWT) | |
unique(c$RACED) | |
n<-mutate(m, RACED=factor(RACED, labels=c("White", | |
"White (Blank)", "Portuguese", "Mexican", "Puerto Rican", "Black", | |
"Mulatto", "Native American", "Chinese", "Japanese", "Filipino", | |
"Asian Indian", "Korean", "Hawaiian", "Other"))) | |
o<-summarise(group_by(n, YEAR, RACED), NUMBER=sum(PERWT)) | |
p<-summarise(group_by(n, YEAR), TOTALPOP=sum(PERWT)) | |
q<- merge(o, p, by='YEAR') | |
r<-ggplot(q, aes(x=YEAR, y=NUMBER, fill=RACED))+geom_bar(stat='identity')+geom_text(aes(label=ifelse(RACED=='Native American', paste(round(NUMBER/TOTALPOP*100, digits=0), | |
'%', sep=''), ''),y=NUMBER/2), size=4)+guides(fill=guide_legend(reverse=TRUE))+ggtitle("Percentage of Native Americans and Population Compared to all Races") | |
print(r) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment