Skip to content

Instantly share code, notes, and snippets.

@scfurey
Last active November 7, 2016 15:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save scfurey/c4a3ef296577f1f423a4ae8e2123bff3 to your computer and use it in GitHub Desktop.
Save scfurey/c4a3ef296577f1f423a4ae8e2123bff3 to your computer and use it in GitHub Desktop.
Final Project Code
#USA Visualization 1: Animated Map of Those Living in Metro Areas
#load packages
library (dplyr)
library (readr)
library(ggplot2)
library(RColorBrewer)
library(ggmap)
library(maptools)
library(gtools)
#read in blank map data of the USA
mapdata <- read_csv('map.csv')
#create and print blank map of USA
map1 <- ggplot() + theme_nothing(legend=TRUE) +
geom_polygon(data=mapdata, aes(x=long,y=lat,group=group), fill='white',color='black')
png('map.png',width=1500,height=1000)
print(map1)
dev.off ()
#read in IPUMS data
ipums <- read_csv('usa_00015.csv',col_types = cols(PERWT=col_double()))
#create Metrostatus variable
a <- ipums %>% mutate(Metrostatus=ifelse(METRO %in% c(2,3), 'Metro', 'Non-Metro'))
#group by YEAR, STATEFIP, and Metrostatus
b <- a %>% group_by(YEAR,STATEFIP,Metrostatus) %>% summarise(Number=sum(PERWT))
#group by YEAR and STATEFIP
c <- b %>% group_by(YEAR,STATEFIP) %>% summarise(Total=sum(Number))
#left join these previous two variables
d <- left_join(b,c)
#find percentages
pct <- d %>% mutate(pctmetro=Number/Total*100) %>%filter(Metrostatus=='Metro')
#cut into equal categories with appropriate ranges
cuts <- quantcut(pct$pctmetro,q=seq(0,1,.2))
#assign values and create ranges for the legend
metrocats <- pct %>% mutate(Population=factor(ifelse(pctmetro<20,1,
ifelse(pctmetro<35,2,
ifelse(pctmetro<50,3,
ifelse(pctmetro<65,4,5))))))
levels(metrocats$Population) <- c('0-19%','20-34%','35-48%',
'49-65%','65%+')
#create newmap variable from original map data
newmap <- mapdata %>% mutate(STATEI=as.integer(STATEFIP))
#left-join metrocats by newmap
metromap <- left_join(metrocats,newmap,by=c('STATEFIP'='STATEI')) %>% arrange(order)
#print separate maps for years 1930-2010
for (year in unique(metromap$YEAR)) {
map2 <- map1 + scale_fill_brewer(palette='Blues') + theme_bw(base_size = 24) +
geom_polygon(data=filter(metromap,YEAR==year),aes(x=long,y=lat,group=group,fill=Population),color='black')
labs(title=paste('Percentage of People Residing in Metro Areas in the United States,1930-2010,',year,sep=' '))
png(paste('map_',year,'.png',sep=''),width=1500,height=1000)
print(map2)
dev.off ()
}
#run this code once
install.packages('devtools')
library(devtools)
devtools::install_github('dgrtwo/gganimate')
#load gganimate
library(gganimate)
#animate the separate maps into a gif
anmap <- map1 + scale_fill_brewer(palette='Blues') +
geom_polygon(data=metromap,aes(x=long,y=lat,group=group,fill=Population,frame=YEAR),color='black') +
labs(title='Percentage of People Residing in Metro Areas in the United States,1930-2010, ')
gg_animate(anmap,ani.width=1500,ani.height=1000,'anmap.gif')
#USA Visualization 2: Column Graph of those Living in Cities versus Suburbs
#load packages
library (dplyr)
library (readr)
library(ggplot2)
library(RColorBrewer)
#read in IPUMS data
ipums <- read_csv('usa_00015.csv',col_types = cols(PERWT=col_double()))
#filter those living in metro areas
a <- ipums %>% filter(METRO %in% c(2,3)) %>% filter(!STATEFIP %in% c(2,15))
#distinguish between those living in cities versus suburbs
b <- a %>% mutate(Metro=ifelse(METRO==2, 'City', 'Suburb'))
#select variables I need
c <- b %>% select(YEAR, PERWT, Metro)
#Group twice, creating Number and NumTotal so you can find percentages
f1 <- b %>% group_by(YEAR,Metro) %>% summarise(Number=sum(PERWT))
f2 <- b %>% group_by(YEAR) %>% summarise(NumTotal=sum(PERWT))
#create pctmetro by left joining f1 and f2 and dividing Number/NumTotal
pctmetro <- left_join(f1,f2) %>% mutate(pct=Number/NumTotal*100)
#printplot function to print
printplot <- function(plot) {
png('USA_Fig2.png',height=500,width=1000)
print(fig1)
dev.off()
}
#graph
fig1 <- ggplot(data=pctmetro, aes(x=YEAR, y=pct/100, fill=Metro)) +
geom_bar(stat='identity') +
labs(x='Year',y='Percent of Population',fill='Metro',title='American Metro Population Living in Cities versus Suburbs, 1930-2010') +
scale_y_continuous(labels=scales::percent) +
scale_x_continuous(breaks=c(1930,1940,1950,1960,1970,1980,1990,2000,2010)) +
scale_fill_brewer(palette='Set2',guide=guide_legend(reverse=TRUE)) +
theme_bw()
printplot(fig1)
#USA Visualization 3: Column Graph of Americans Living in Cities versus Suburbs by Race.
#load packages
library (dplyr)
library (readr)
library(ggplot2)
library(RColorBrewer)
library(scales)
#read in IPUMS data
ipums <- read_csv('usa_00015.csv',col_types = cols(PERWT=col_double()))
#exclude Alaska and Hawaii
filtered_ipums <- ipums %>% filter(!STATEFIP %in% c(2,15))
#filter by those residing in metro areas
a <- filtered_ipums %>% filter(METRO %in% c(2,3))
#create metro variable
b <- a %>% mutate(Metro=ifelse(METRO==2, 'City', 'Suburb'))
#create race variable by recoding HISPAN and RACESING
c <- b %>% mutate(Raceth=factor(ifelse(HISPAN>0,'Hispanic',
ifelse(RACESING==1, 'White',
ifelse(RACESING==2, 'Black', 'Other')))))
#group by Year, Metro, Raceth
f1 <- c %>% group_by(YEAR, Metro, Raceth) %>% summarise(Number=sum(PERWT))
#group by Year, Metro
f2 <- f1 %>% group_by(YEAR,Metro) %>% mutate(Total=sum(Number))
#column graph
fig2 <- ggplot(data=f2,aes(x=Metro,y=Number, fill=Raceth)) +
geom_bar(stat='identity') +
labs(x='City/Suburb',y='Population',fill='Metro',title='Number of Americans Living in Cities or Suburbs by Race, 1930-2010') +
scale_y_continuous(labels=scales::comma) +
scale_fill_brewer(palette='Set2',guide=guide_legend(reverse=TRUE)) +
facet_wrap(~YEAR,ncol=2) +
theme_bw()
#print
png('USA_Fig3.png', height=500, width=1000)
print(fig2)
dev.off()
#Newark Visualization 1: Population Pyramid
#load packages
library (dplyr)
library (readr)
library(ggplot2)
library(RColorBrewer)
#read in data
a <- read_csv('usa_00015.csv')
b <- a %>% filter(CITY==4630)
#Create vector of age category labels
agecats <- '0-9'
for (i in 1:7) {
agecats <- c(agecats,paste(i, '0-',i,9,sep=''))
}
agecats <- c(agecats, '80+')
#create sex and age variables
c <- b %>% mutate(Sex=factor(SEX,labels=c('Male','Female')))
d <- c %>% mutate (Age=ifelse(AGE>=80,8,floor(AGE/10)))
e <- d %>% mutate(Age=factor(Age,labels=agecats))
#create Raceth variable using HISPAN and RACESING variables from IPUMS
f <- e %>% mutate(Raceth=factor(ifelse(HISPAN>0,'Hispanic',
ifelse(RACESING==1, 'White',
ifelse(RACESING==2, 'Black', 'Other')))))
#group by city, Age, Sex, Raceth, and Year
g <- f %>% group_by(CITY, Age, Sex, Raceth, YEAR) %>% summarise(Number=sum(PERWT))
#Multiply 'Male' by -1
h <- g %>% mutate(Number=ifelse(Sex=='Male', -1 *Number,Number))
#Graphing (make sure to use proper scale, appropriate title,etc.)
fig1 <- ggplot(data=h,aes(x=Age, y=Number,fill=Sex)) +
geom_bar(data=h[h$Sex=='Male',], stat='identity') +
geom_bar(data=h[h$Sex=='Female',], stat='identity') +
coord_flip() +
facet_grid(Raceth~.~YEAR) +
scale_y_continuous(breaks=c(-40000,-20000,0,20000,40000),
labels=c('40','20','0','20','40')) +
labs(y='Population in Thousands',title='Population Pyramid for Newark Residents,1930-2010')+
scale_fill_brewer(palette='Set1',guide=guide_legend(reverse=TRUE))+
theme_bw() + theme(legend.position='bottom')
#print
png('Newark_Fig1.png', height=500, width=1000)
print(fig1)
dev.off()
#save as PDF to export more easily to Word
ggsave('Newark_Population_Pyramid.pdf',width=10,height=7.5)
#Newark Visualization 2: Industry Column Graph
#load packages
library (dplyr)
library (readr)
library(ggplot2)
library(RColorBrewer)
#read in data; filter working ages and specify Newark
a <- read_csv('usa_00015.csv') %>%
filter(AGE>=15 & AGE<=65 & (CITY==4630))
#create industry variable by recoding IND1950
b <- a %>% mutate(Industry=factor(ifelse(IND1950<100 | IND1950>976,1,
ifelse(IND1950<246, 2,
ifelse(IND1950==246 | IND1950==976,4,
ifelse(IND1950>700,7,
ifelse(IND1950>600,5,
ifelse(IND1950>500,6,3))))))))
levels(b$Industry) <- c('none','agricultural/extractive','manufacturing',
'construction or general labor', 'trade',
'transporation/communication/utilities',
'service')
#Check to see what the head (top) looks like
c <- b %>% select(YEAR, PERWT, Industry)
head (c)
#Group by YEAR, City, and Industry
f1 <- c %>% group_by(YEAR, Industry) %>% summarise(Number=sum (PERWT))
head (f1)
#Graph industry
fig1 <- ggplot(data=f1,aes(x=YEAR,y=Number,fill=Industry)) +
geom_bar(stat='identity',position='fill') +
labs(x='Year',y='Percent of Population',fill='Industry',title='Newark Industry, 1930-2010') +
scale_y_continuous(labels=scales::percent) +
scale_x_continuous(breaks=c(1930,1940,1950,1960,1970,1980,1990,2000,2010)) +
scale_fill_brewer(palette='Set1') +
theme_bw() + theme(legend.position='bottom')
#print it
png('Newark_Industry.png',height=500,width=1000)
print(fig1)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment