Last active
April 10, 2016 22:27
-
-
Save jonocarroll/6e158719d6d672027b267002a07735dd to your computer and use it in GitHub Desktop.
52vis Challenge Week 2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## 52vis challenges, week 2 | |
## originally seen at | |
## https://rud.is/b/2016/04/06/52vis-week-2-2016-week-14-honing-in-on-the-homeless/ | |
## this version blogged @ http://jcarroll.com.au/2016/04/10/52vis-week-2-challenge/ | |
## github: github.com/jonocarroll/2016-14 | |
## This script produces a choropleth for the USA homeless population | |
## as a per mille proportion of each state's population, with the | |
## colorscale set to white at the national median, blue at the | |
## lowest value, and capped at 3x the national median in red. | |
## The yearly data is looped over in a .gif | |
## load relevant packages | |
pacman::p_load(magrittr, dplyr, tidyr, ggplot2, httr, readxl, purrr) | |
pacman::p_load(data.table, maptools, broom, ggthemes, ggalt, viridis) | |
pacman::p_load_gh("hrbrmstr/albersusa") | |
pacman::p_load_gh("dgrtwo/gganimate") | |
## git repository 2016-14 forked from https://github.com/52vis/2016-14.git | |
## and new folder created | |
setwd("jonocarroll") | |
## load the data from hudexchange.info (download once) | |
# URL <- "https://www.hudexchange.info/resources/documents/2007-2015-PIT-Counts-by-CoC.xlsx" | |
# GET(URL, write_disk("homeless.xlsx", overwrite=TRUE)) | |
## load the sheets into a list (by sheet name, a.k.a. year) of data.frames via map | |
HUDdata <- map(as.character(2007:2015), ~read_excel("homeless.xlsx", sheet=.x)) %>% | |
map(function(x) mutate(x, Year=as.integer(sub(".*, ","",names(x)[3])))) %>% ## add a year column to each list element | |
map(function(x) setNames(x, sub("(.*),.*","\\1",names(x)))) ## remove the year from the column names | |
## bind the list back to a data.frame, leaving entries blank where not supplied (new categories added in 2011) | |
HUDdataDF <- rbindlist(HUDdata, fill=TRUE) %>% filter(`CoC Name`!="Total") | |
## change the integers back to integers | |
HUDdataDF %<>% map_at(c(3:ncol(.)), as.integer) %>% as_data_frame | |
## add a state column based on the 2-letter prefix of CoC Number | |
HUDdataDF %<>% mutate(State=substr(`CoC Number`, 1, 2)) | |
## save a copy so we don't have to do that again | |
save(HUDdataDF, file="HUDdata_data.frame_2016-14.RData") | |
## sum the total homeless within each state | |
## drop the remaining columns, we'll work with those another time | |
HUDdataDF %<>% | |
select(Year, State, `Total Homeless`) %>% | |
group_by(State, Year) %>% | |
summarise(nHomeless=sum(`Total Homeless`)) | |
## use the state populations courtesy of hrbrmstr | |
uspop <- read.csv("../uspop.csv", stringsAsFactors=FALSE) | |
uspop_long <- gather(uspop, year, population, -name, -iso_3166_2) | |
uspop_long$year <- as.integer(sub("X", "", uspop_long$year)) | |
HUDdataDF %<>% merge(uspop_long, by.x=c("State", "Year"), by.y=c("iso_3166_2", "year")) | |
## normalise the total homeless population as a proportion of 1000 persons (per mille) in each state population | |
HUDdataDF %<>% group_by(Year, State) %>% mutate(HomelessProp=1e3L*nHomeless/population) | |
## I needed an excuse to us the albersusa package, this is a good one | |
us <- usa_composite() | |
us_map <- tidy(us, region="name") | |
## merge the us_map data with ours | |
map_with_data <- HUDdataDF %>% merge(us_map, by.x="name", by.y="id") %>% mutate(id=name) | |
## make more than 3x the median value as a 'plus' group | |
## NB: this doesn't affect the median value | |
map_with_data$HomelessProp[map_with_data$HomelessProp > 3*median(map_with_data$HomelessProp, na.rm=TRUE)] <- 3*median(map_with_data$HomelessProp, na.rm=TRUE) | |
## save a copy so we don't have to do that again | |
save(map_with_data, file="map_with_data_2016-14.RData") | |
## build the animated plot | |
gg <- ggplot(map_with_data) | |
gg <- gg + labs(subtitle=paste0("USA Homeless population scaled by state population,\ncapped at 3x national median (", | |
format(3*median(map_with_data$HomelessProp, na.rm=TRUE), digits=3),"/1000)"), | |
caption="Data: https://www.hudexchange.info/resources/documents/2007-2015-PIT-Counts-by-CoC.xlsx ") | |
gg <- gg + geom_map(map=us_map, | |
aes(x=long, y=lat, map_id=id, fill=HomelessProp, frame=Year), | |
color="#2b2b2b", size=0.1) | |
gg <- gg + theme_map() | |
gg <- gg + coord_proj(us_laea_proj) | |
gg <- gg + scale_fill_gradient2(name="Homeless\npopulation \u2030", | |
low="steelblue", high="firebrick", | |
midpoint=median(map_with_data$HomelessProp, na.rm=TRUE), | |
limits=range(map_with_data$HomelessProp, na.rm=TRUE), | |
breaks=c(0,1,2,3,4), | |
labels=c("0","1","2","3","4+")) | |
gg <- gg + theme(legend.position=c(0.86, 0.3), legend.key.size=unit(2,"cm")) | |
gg <- gg + theme(text=element_text(size=30, family="Arial Narrow")) | |
## view the animation | |
gg_animate(gg) | |
## output the animation | |
gg_animate(gg, interval=1, ani.width=1600, ani.height=1200, file="HomelessPopulation.gif") | |
## optimise the gif using Imagemagick | |
system("convert HomelessPopulation.gif -fuzz 10% -layers OptimizePlus -layers OptimizeTransparency HomelessPopulation_optim.gif") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment