Last active
November 7, 2016 15:45
-
-
Save scfurey/c4a3ef296577f1f423a4ae8e2123bff3 to your computer and use it in GitHub Desktop.
Final Project Code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#USA Visualization 1: Animated Map of Those Living in Metro Areas | |
#load packages | |
library (dplyr) | |
library (readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
library(ggmap) | |
library(maptools) | |
library(gtools) | |
#read in blank map data of the USA | |
mapdata <- read_csv('map.csv') | |
#create and print blank map of USA | |
map1 <- ggplot() + theme_nothing(legend=TRUE) + | |
geom_polygon(data=mapdata, aes(x=long,y=lat,group=group), fill='white',color='black') | |
png('map.png',width=1500,height=1000) | |
print(map1) | |
dev.off () | |
#read in IPUMS data | |
ipums <- read_csv('usa_00015.csv',col_types = cols(PERWT=col_double())) | |
#create Metrostatus variable | |
a <- ipums %>% mutate(Metrostatus=ifelse(METRO %in% c(2,3), 'Metro', 'Non-Metro')) | |
#group by YEAR, STATEFIP, and Metrostatus | |
b <- a %>% group_by(YEAR,STATEFIP,Metrostatus) %>% summarise(Number=sum(PERWT)) | |
#group by YEAR and STATEFIP | |
c <- b %>% group_by(YEAR,STATEFIP) %>% summarise(Total=sum(Number)) | |
#left join these previous two variables | |
d <- left_join(b,c) | |
#find percentages | |
pct <- d %>% mutate(pctmetro=Number/Total*100) %>%filter(Metrostatus=='Metro') | |
#cut into equal categories with appropriate ranges | |
cuts <- quantcut(pct$pctmetro,q=seq(0,1,.2)) | |
#assign values and create ranges for the legend | |
metrocats <- pct %>% mutate(Population=factor(ifelse(pctmetro<20,1, | |
ifelse(pctmetro<35,2, | |
ifelse(pctmetro<50,3, | |
ifelse(pctmetro<65,4,5)))))) | |
levels(metrocats$Population) <- c('0-19%','20-34%','35-48%', | |
'49-65%','65%+') | |
#create newmap variable from original map data | |
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP)) | |
#left-join metrocats by newmap | |
metromap <- left_join(metrocats,newmap,by=c('STATEFIP'='STATEI')) %>% arrange(order) | |
#print separate maps for years 1930-2010 | |
for (year in unique(metromap$YEAR)) { | |
map2 <- map1 + scale_fill_brewer(palette='Blues') + theme_bw(base_size = 24) + | |
geom_polygon(data=filter(metromap,YEAR==year),aes(x=long,y=lat,group=group,fill=Population),color='black') | |
labs(title=paste('Percentage of People Residing in Metro Areas in the United States,1930-2010,',year,sep=' ')) | |
png(paste('map_',year,'.png',sep=''),width=1500,height=1000) | |
print(map2) | |
dev.off () | |
} | |
#run this code once | |
install.packages('devtools') | |
library(devtools) | |
devtools::install_github('dgrtwo/gganimate') | |
#load gganimate | |
library(gganimate) | |
#animate the separate maps into a gif | |
anmap <- map1 + scale_fill_brewer(palette='Blues') + | |
geom_polygon(data=metromap,aes(x=long,y=lat,group=group,fill=Population,frame=YEAR),color='black') + | |
labs(title='Percentage of People Residing in Metro Areas in the United States,1930-2010, ') | |
gg_animate(anmap,ani.width=1500,ani.height=1000,'anmap.gif') | |
#USA Visualization 2: Column Graph of those Living in Cities versus Suburbs | |
#load packages | |
library (dplyr) | |
library (readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
#read in IPUMS data | |
ipums <- read_csv('usa_00015.csv',col_types = cols(PERWT=col_double())) | |
#filter those living in metro areas | |
a <- ipums %>% filter(METRO %in% c(2,3)) %>% filter(!STATEFIP %in% c(2,15)) | |
#distinguish between those living in cities versus suburbs | |
b <- a %>% mutate(Metro=ifelse(METRO==2, 'City', 'Suburb')) | |
#select variables I need | |
c <- b %>% select(YEAR, PERWT, Metro) | |
#Group twice, creating Number and NumTotal so you can find percentages | |
f1 <- b %>% group_by(YEAR,Metro) %>% summarise(Number=sum(PERWT)) | |
f2 <- b %>% group_by(YEAR) %>% summarise(NumTotal=sum(PERWT)) | |
#create pctmetro by left joining f1 and f2 and dividing Number/NumTotal | |
pctmetro <- left_join(f1,f2) %>% mutate(pct=Number/NumTotal*100) | |
#printplot function to print | |
printplot <- function(plot) { | |
png('USA_Fig2.png',height=500,width=1000) | |
print(fig1) | |
dev.off() | |
} | |
#graph | |
fig1 <- ggplot(data=pctmetro, aes(x=YEAR, y=pct/100, fill=Metro)) + | |
geom_bar(stat='identity') + | |
labs(x='Year',y='Percent of Population',fill='Metro',title='American Metro Population Living in Cities versus Suburbs, 1930-2010') + | |
scale_y_continuous(labels=scales::percent) + | |
scale_x_continuous(breaks=c(1930,1940,1950,1960,1970,1980,1990,2000,2010)) + | |
scale_fill_brewer(palette='Set2',guide=guide_legend(reverse=TRUE)) + | |
theme_bw() | |
printplot(fig1) | |
#USA Visualization 3: Column Graph of Americans Living in Cities versus Suburbs by Race. | |
#load packages | |
library (dplyr) | |
library (readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
library(scales) | |
#read in IPUMS data | |
ipums <- read_csv('usa_00015.csv',col_types = cols(PERWT=col_double())) | |
#exclude Alaska and Hawaii | |
filtered_ipums <- ipums %>% filter(!STATEFIP %in% c(2,15)) | |
#filter by those residing in metro areas | |
a <- filtered_ipums %>% filter(METRO %in% c(2,3)) | |
#create metro variable | |
b <- a %>% mutate(Metro=ifelse(METRO==2, 'City', 'Suburb')) | |
#create race variable by recoding HISPAN and RACESING | |
c <- b %>% mutate(Raceth=factor(ifelse(HISPAN>0,'Hispanic', | |
ifelse(RACESING==1, 'White', | |
ifelse(RACESING==2, 'Black', 'Other'))))) | |
#group by Year, Metro, Raceth | |
f1 <- c %>% group_by(YEAR, Metro, Raceth) %>% summarise(Number=sum(PERWT)) | |
#group by Year, Metro | |
f2 <- f1 %>% group_by(YEAR,Metro) %>% mutate(Total=sum(Number)) | |
#column graph | |
fig2 <- ggplot(data=f2,aes(x=Metro,y=Number, fill=Raceth)) + | |
geom_bar(stat='identity') + | |
labs(x='City/Suburb',y='Population',fill='Metro',title='Number of Americans Living in Cities or Suburbs by Race, 1930-2010') + | |
scale_y_continuous(labels=scales::comma) + | |
scale_fill_brewer(palette='Set2',guide=guide_legend(reverse=TRUE)) + | |
facet_wrap(~YEAR,ncol=2) + | |
theme_bw() | |
png('USA_Fig3.png', height=500, width=1000) | |
print(fig2) | |
dev.off() | |
#Newark Visualization 1: Population Pyramid | |
#load packages | |
library (dplyr) | |
library (readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
#read in data | |
a <- read_csv('usa_00015.csv') | |
b <- a %>% filter(CITY==4630) | |
#Create vector of age category labels | |
agecats <- '0-9' | |
for (i in 1:7) { | |
agecats <- c(agecats,paste(i, '0-',i,9,sep='')) | |
} | |
agecats <- c(agecats, '80+') | |
#create sex and age variables | |
c <- b %>% mutate(Sex=factor(SEX,labels=c('Male','Female'))) | |
d <- c %>% mutate (Age=ifelse(AGE>=80,8,floor(AGE/10))) | |
e <- d %>% mutate(Age=factor(Age,labels=agecats)) | |
#create Raceth variable using HISPAN and RACESING variables from IPUMS | |
f <- e %>% mutate(Raceth=factor(ifelse(HISPAN>0,'Hispanic', | |
ifelse(RACESING==1, 'White', | |
ifelse(RACESING==2, 'Black', 'Other'))))) | |
#group by city, Age, Sex, Raceth, and Year | |
g <- f %>% group_by(CITY, Age, Sex, Raceth, YEAR) %>% summarise(Number=sum(PERWT)) | |
#Multiply 'Male' by -1 | |
h <- g %>% mutate(Number=ifelse(Sex=='Male', -1 *Number,Number)) | |
#Graphing (make sure to use proper scale, appropriate title,etc.) | |
fig1 <- ggplot(data=h,aes(x=Age, y=Number,fill=Sex)) + | |
geom_bar(data=h[h$Sex=='Male',], stat='identity') + | |
geom_bar(data=h[h$Sex=='Female',], stat='identity') + | |
coord_flip() + | |
facet_grid(Raceth~.~YEAR) + | |
scale_y_continuous(breaks=c(-40000,-20000,0,20000,40000), | |
labels=c('40','20','0','20','40')) + | |
labs(y='Population in Thousands',title='Population Pyramid for Newark Residents,1930-2010')+ | |
scale_fill_brewer(palette='Set1',guide=guide_legend(reverse=TRUE))+ | |
theme_bw() + theme(legend.position='bottom') | |
png('Newark_Fig1.png', height=500, width=1000) | |
print(fig1) | |
dev.off() | |
#save as PDF to export more easily to Word | |
ggsave('Newark_Population_Pyramid.pdf',width=10,height=7.5) | |
#Newark Visualization 2: Industry Column Graph | |
#load packages | |
library (dplyr) | |
library (readr) | |
library(ggplot2) | |
library(RColorBrewer) | |
#read in data; filter working ages and specify Newark | |
a <- read_csv('usa_00015.csv') %>% | |
filter(AGE>=15 & AGE<=65 & (CITY==4630)) | |
#create industry variable by recoding IND1950 | |
b <- a %>% mutate(Industry=factor(ifelse(IND1950<100 | IND1950>976,1, | |
ifelse(IND1950<246, 2, | |
ifelse(IND1950==246 | IND1950==976,4, | |
ifelse(IND1950>700,7, | |
ifelse(IND1950>600,5, | |
ifelse(IND1950>500,6,3)))))))) | |
levels(b$Industry) <- c('none','agricultural/extractive','manufacturing', | |
'construction or general labor', 'trade', | |
'transporation/communication/utilities', | |
'service') | |
#Check to see what the head (top) looks like | |
c <- b %>% select(YEAR, PERWT, Industry) | |
head (c) | |
#Group by YEAR, City, and Industry | |
f1 <- c %>% group_by(YEAR, Industry) %>% summarise(Number=sum (PERWT)) | |
head (f1) | |
#Graph industry | |
fig1 <- ggplot(data=f1,aes(x=YEAR,y=Number,fill=Industry)) + | |
geom_bar(stat='identity',position='fill') + | |
labs(x='Year',y='Percent of Population',fill='Industry',title='Newark Industry, 1930-2010') + | |
scale_y_continuous(labels=scales::percent) + | |
scale_x_continuous(breaks=c(1930,1940,1950,1960,1970,1980,1990,2000,2010)) + | |
scale_fill_brewer(palette='Set1') + | |
theme_bw() + theme(legend.position='bottom') | |
#print it | |
png('Newark_Industry.png',height=500,width=1000) | |
print(fig1) | |
dev.off() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment