Last active
April 11, 2016 23:37
-
-
Save jonocarroll/3f5e6cb61d9fb6c6a8f6afa85b8c6cfb to your computer and use it in GitHub Desktop.
Australian version of the 52Vis Week 2 Challenge
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## 52vis challenges, week 2 -- adapted for Australian statistics | |
## otherwise follows procedures found at | |
## github.com/jonocarroll/2016-14 | |
## http://jcarroll.com.au/2016/04/10/52vis-week-2-challenge/ | |
## this version blogged @ http://jcarroll.com.au/2016/04/12/australian-homeless/ | |
## This script produces a chloropleth for the Australian homeless population | |
## as a per mille proportion of each state's population, with the | |
## colorscale set to the same as the USA homeless graph (white at the national | |
## median, blue at the lowest value, and capped at 3x the national median in red). | |
## The graph is repeated with a scale more suitable for the Australian statistics. | |
## The yearly data is looped over in a .gif | |
## load relevant packages | |
pacman::p_load(magrittr, dplyr, tidyr, ggplot2, httr, readxl, purrr) | |
pacman::p_load(data.table, maptools, broom, ggthemes, ggalt, viridis, httr) | |
pacman::p_load_gh("hrbrmstr/albersusa") | |
pacman::p_load_gh("dgrtwo/gganimate") | |
pacman::p_load_gh("hrbrmstr/ggalt") | |
## and new folder created | |
setwd("jonocarroll") | |
## load the data from abs.gov.au (download once) | |
## http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/2049.02011?OpenDocument | |
# URL <- "http://abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&20490do001_2011.xls&2049.0&Data%20Cubes&4B192F075234A583CA257AB1001709B0&0&2011&12.11.2012&Previous" | |
# GET(URL, write_disk("homeless_AUS.xls", overwrite=TRUE)) | |
## load the sheets into a list (by sheet name, a.k.a. year) of data.frames via map | |
AUSdata <- read_excel("homeless_AUS.xls", sheet="Table_1", skip=5)[27:35,] %>% setNames(c("State", | |
"n2001", "pc2001", "p10k2001", | |
"n2006", "pc2006", "p10k2006", | |
"n2011", "pc2011", "p10k2011")) | |
## gather, and add a year column | |
AUSdataDF <- AUSdata %>% | |
gather(Year, nHomeless, c(n2001, n2006, n2011)) %>% | |
dplyr::select(-c(pc2001, pc2006, pc2011, p10k2001, p10k2006, p10k2011)) %>% | |
mutate(Year=as.integer(substr(Year,2,5))) | |
## save a copy so we don't have to do that again | |
save(AUSdataDF, file="AUSdata_data.frame_2016-14.RData") | |
## merge with abs.gov.au statep opulation statistics | |
## http://blog.id.com.au/2012/population/australian-population/australian-2011-census-based-population-estimates/ | |
auspop <- read.csv("auspop.csv", stringsAsFactors=FALSE) | |
auspop %<>% gather(Year, Population, -State) %>% mutate(Year=as.integer(substr(Year,2,5))) | |
AUSdataDF %<>% merge(auspop, by=c("State", "Year")) | |
## normalise the total homeless population as a proportion of 1000 persons (per mille) in each state population | |
AUSdataDF %<>% group_by(Year, State) %>% mutate(HomelessProp=1e3L*nHomeless/Population) | |
## http://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&1270055001_ste_2011_aust_shape.zip&1270.0.55.001&Data%20Cubes&1D26EC44E6ABD911CA257801000D8779&0&July%202011&23.12.2010&Latest | |
pacman::p_load(rgdal, maptools, ggplot2) | |
aus <- readOGR(dsn=".", layer="STE_2011_AUST") | |
aus@data$id = rownames(aus@data) | |
aus_map <- tidy(aus, region="id") | |
aus_map.df = merge(aus_map, aus@data, by="id") | |
## merge the aus_map data with ours | |
map_with_data <- AUSdataDF %>% merge(aus_map.df, by.x="State", by.y="STE_NAME11") | |
## make more than 3x the US national median value as a 'plus' group | |
map_with_data$HomelessProp[map_with_data$HomelessProp > 4.89] <- 4.89 | |
## save a copy so we don't have to do that again | |
save(map_with_data, file="map_with_data_aus_2016-14.RData") | |
## build the animated plot | |
gg <- ggplot(map_with_data) | |
gg <- gg + labs(subtitle=paste0("AUS Homeless population scaled by state population,\ncapped at 3x USA national median (4.89/1000)"), | |
caption="Data: http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/2049.02011?OpenDocument ") | |
gg <- gg + geom_map(map=aus_map, | |
aes(x=long, y=lat, map_id=id, fill=HomelessProp, frame=Year), | |
color="#2b2b2b", size=0.1) | |
gg <- gg + theme_map() | |
gg <- gg + scale_fill_gradient2(name="Homeless\npopulation \u2030", | |
low="steelblue", high="firebrick", | |
midpoint=4.89/3, | |
limits=c(0,5), | |
breaks=c(0,1,2,3,4), | |
labels=c("0","1","2","3","4+")) | |
gg <- gg + theme(legend.position=c(0.86, 0.3), legend.key.size=unit(2,"cm")) | |
gg <- gg + theme(text=element_text(size=30, family="Arial Narrow")) | |
## view the animation | |
gg_animate(gg) | |
## output the animation | |
gg_animate(gg, interval=1, ani.width=1600, ani.height=1200, file="HomelessPopulation_AUSversion_USscale.gif") | |
## optimise the gif using Imagemagick | |
system("convert HomelessPopulation_AUSversion_USscale.gif -fuzz 10% -layers OptimizePlus -layers OptimizeTransparency HomelessPopulation_optim_AUSversion_USscale.gif") | |
## | |
## now repeat, but for our own median | |
## | |
## merge the aus_map data with ours | |
map_with_data <- AUSdataDF %>% merge(aus_map.df, by.x="State", by.y="STE_NAME11") | |
## make more than 3x the US national median value as a 'plus' group | |
map_with_data$HomelessProp[map_with_data$HomelessProp > 3*median(map_with_data$HomelessProp, na.rm=TRUE)] <- 3*median(map_with_data$HomelessProp, na.rm=TRUE) | |
## save a copy so we don't have to do that again | |
save(map_with_data, file="map_with_data_aus2_2016-14.RData") | |
## build the animated plot | |
gg <- ggplot(map_with_data) | |
gg <- gg + labs(subtitle=paste0("AUS Homeless population scaled by state population,\ncapped at 3x AUS national median (10.57/1000)"), | |
caption="Data: http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/2049.02011?OpenDocument ") | |
gg <- gg + geom_map(map=aus_map, | |
aes(x=long, y=lat, map_id=id, fill=HomelessProp, frame=Year), | |
color="#2b2b2b", size=0.1) | |
gg <- gg + theme_map() | |
gg <- gg + scale_fill_gradient2(name="Homeless\npopulation \u2030", | |
low="steelblue", high="firebrick", | |
midpoint=median(map_with_data$HomelessProp, na.rm=TRUE), | |
limits=c(0,12), | |
breaks=seq(0,10,1), | |
labels=c("0","1","2","3","4","5","6","7","8","9","10+")) | |
gg <- gg + theme(legend.position=c(0.86, 0.3), legend.key.size=unit(2,"cm")) | |
gg <- gg + theme(text=element_text(size=30, family="Arial Narrow")) | |
## view the animation | |
gg_animate(gg) | |
## output the animation | |
gg_animate(gg, interval=1, ani.width=1600, ani.height=1200, file="HomelessPopulation_AUSversion_AUSscale.gif") | |
## optimise the gif using Imagemagick | |
system("convert HomelessPopulation_AUSversion_AUSscale.gif -fuzz 10% -layers OptimizePlus -layers OptimizeTransparency HomelessPopulation_optim_AUSversion_AUSscale.gif") | |
## for clarity, add a lollipop graph (courtesy of hrbrmstr) | |
devtools::install_github("hrbrmstr/ggalt") | |
library(ggalt) | |
AUSdataDF %<>% mutate(StateYear=paste0(State,", ",Year)) | |
gg2 <- ggplot(AUSdataDF %>% ungroup %>% filter(State!="Northern Territory") %>% arrange(HomelessProp), aes(y=reorder(StateYear, HomelessProp), x=HomelessProp)) | |
gg2 <- gg2 + geom_lollipop(aes(color=factor(Year)), horizontal=TRUE) | |
gg2 <- gg2 + labs(x="Homeless population \u2030", | |
y="State, Year", | |
title="Australian Homeless Population", | |
subtitle="Excluding Northern Territory", | |
caption="Data: http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/2049.02011?OpenDocument ") | |
gg2 <- gg2 + theme_bw() | |
gg2 <- gg2 + theme(legend.key=element_blank()) | |
gg2 <- gg2 + scale_color_discrete(guide=FALSE) | |
gg2 <- gg2 + facet_wrap(~Year, nrow=1) | |
gg2 <- gg2 + theme(text=element_text(family="Arial Narrow")) | |
gg2 | |
ggsave(gg2, filename="HomelessPopulation_AUSbyStateNONT.png", width=7, height=7, units="in", dpi=600) | |
gg2NT <- ggplot(AUSdataDF %>% ungroup %>% arrange(HomelessProp), aes(y=reorder(StateYear, HomelessProp), x=HomelessProp)) | |
gg2NT <- gg2NT + geom_lollipop(aes(color=factor(Year)), horizontal=TRUE) | |
gg2NT <- gg2NT + labs(x="Homeless population \u2030", | |
y="State, Year", | |
title="Australian Homeless Population", | |
# subtitle="Excluding Northern Territory", | |
caption="Data: http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/2049.02011?OpenDocument ") | |
gg2NT <- gg2NT + theme_bw() | |
gg2NT <- gg2NT + theme(legend.key=element_blank()) | |
gg2NT <- gg2NT + scale_color_discrete(guide=FALSE) | |
gg2NT <- gg2NT + theme(text=element_text(family="Arial Narrow")) | |
gg2NT | |
ggsave(gg2NT, filename="HomelessPopulation_AUSbyState.png", width=7, height=7, units="in", dpi=600) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment