Skip to content

Instantly share code, notes, and snippets.

@ttmmghmm
Forked from hrbrmstr/bls_spending.R
Last active August 29, 2015 14:22
Show Gist options
  • Save ttmmghmm/d45538769c51239fe56b to your computer and use it in GitHub Desktop.
Save ttmmghmm/d45538769c51239fe56b to your computer and use it in GitHub Desktop.
dplyr ems
library(rvest)
library(magrittr)
library(ggplot2)
library(dplyr)
library(tidyr)
library(scales)
# get page
pg <- html("http://www.bls.gov/opub/ted/2015/consumer-spending-by-age-group-in-2013.htm#tab-2")
# extract table
pg %>%
html_nodes("table") %>%
extract2(1) %>%
html_table(header=TRUE) %>%
rename(spending_category=`Spending category`) %>%
filter(spending_category != "Total") %>%
gather(age_group, value, -spending_category) %>%
mutate(label=percent(value/100)) -> spending
# for facet ordering
spending %>%
group_by(spending_category) %>%
summarise(μ=mean(value)) %>%
arrange(μ) %>% .$spending_category -> cat_levels
# plot
spending %>%
mutate(spending_category=factor(spending_category,
levels=cat_levels, ordered=TRUE)) %>%
ggplot(aes(x=age_group, y=value)) +
geom_bar(stat="identity", aes(fill=spending_category)) +
geom_text(aes(y=value+5, label=label), size=2.5) +
scale_x_discrete(expand=c(0, 0)) +
scale_y_continuous(expand=c(0, 0), limits=c(0, max(spending$value)+10)) +
labs(x=NULL, y=NULL, title="Consumer spending by age group in 2013\n") +
facet_wrap(~spending_category) +
coord_flip() +
theme(panel.background=element_rect(fill="#f0f0f0")) +
theme(panel.grid=element_blank()) +
theme(panel.border=element_blank()) +
theme(panel.margin=unit(1, "lines")) +
theme(strip.background=element_blank()) +
theme(strip.text=element_text(size=6.5)) +
theme(axis.ticks=element_blank()) +
theme(axis.text.x=element_blank()) +
theme(legend.position="none") -> gg
# save
ggsave("bls_spending.svg", width=9, height=6)
# http://www.r-bloggers.com/animated-us-hexbin-map-of-the-avian-flu-outbreak/
# To make an animated map of cumulative flock totals by week
library(rvest) # scraping
library(stringr) # string manipulation
library(lubridate) # date conversion
library(dplyr) # data mjnging
library(zoo) # for locf
library(ggplot2) # plotting
library(rgdal) # map stuff
library(rgeos) # map stuff
"http://www.aphis.usda.gov/wps/portal/aphis/ourfocus/animalhealth/sa_animal_disease_information/sa_avian_health/ct_avian_influenza_disease/!ut/p/a1/lVJbb4IwFP41e1qwFZDLI-oUnGgyswm8kAMUaAaFQNG4X7-ibnEPYtakDz3nO_kupyhAHgoYHGgGnFYMiv4daOFqa8vjKZad5c58wc7mY-Eaa13Z2qoA-AKA7xwL_53fvjpaP_-Gp_Z8jHcK2qMABTHjNc-RD3VO2zCuGCeMhwWNGmhOT7iFsOqaMK3irj2_gNESijAnUPD8tpLQlkBLQsrSqinPJi7tAwX2i4_5tSBgRUfYF_wM9mLqmCbIj2QzxZpMJMUYg6TGkSLBBCaSPEnSJIljXVH0q_kBdw_CO5sXkNnSslV9LQJTDRk7czGumy7GjnYFDOTrCw36XRJTRbt_mlo9VD1FgfuctqrVByA37szNBAPwXOpzR97gPi7tm30gb2AfQkxWVJH4ifvZLavFIsUQrA1JSUOaUV61HHnH43HUtQmMsuqA6vK9NJST9JluNlKwyPr7DT6YvRs!/?1dmy&urile=wcm%3apath%3a%2Faphis_content_library%2Fsa_our_focus%2Fsa_animal_health%2Fsa_animal_disease_information%2Fsa_avian_health%2Fsa_detections_by_states%2Fct_ai_pacific_flyway" %>%
#' read in the data, extract the table and clean up the fields
#' also clean up the column names to since they are fairly nasty
html -> pg
pg %>%
html_nodes("table") %>%
# two tables in the code and we only need the first one.
magrittr::extract2(1) %>%
html_table(header=TRUE) %>%
filter(`Flock size`!="pending") %>%
# inconsistent in the values used for various columns.
# scan the rendered table on the USDA page by eye - the column names are horrible
# commas in the flock counts - handy to have the date as an actual date type
mutate(Species=str_replace(tolower(Species), "s$", ""),
`Avian influenza subtype*`=str_replace_all(`Avian influenza subtype*`, " ", ""),
`Flock size`=as.numeric(str_replace_all(`Flock size`, ",", "")),
`Confirmation date`=as.Date(mdy(`Confirmation date`))) %>%
rename(state=State, county=County, flyway=Flyway, flock_type=`Flock type`,
species=Species, subtype=`Avian influenza subtype*`, date=`Confirmation date`,
flock_size=`Flock size`) -> birds
# To make an animated map of cumulative flock totals by week
birds %>%
mutate(week=as.numeric(format(birds$date, "%Y%U"))) %>%
arrange(week) %>%
group_by(week, state) %>%
tally(flock_size) %>%
group_by(state) %>%
# calculate the cumulative sums
mutate(cum=cumsum(n)) %>%
ungroup %>%
select(week, state, cum) %>%
mutate(week=as.Date(paste(week, 1), "%Y%U %u")) %>%
# fill in the gaps where there are missing state/week combinations
left_join(tidyr::expand(., week, state), .) %>%
# carry the last observations by state/week forward in this expanded data frame
group_by(state) %>%
do(na.locf(.)) %>%
# make breaks for data ranges so we can more intelligently map them to colors
mutate(state_abb=state.abb[match(state, state.name)],
cum=as.numeric(ifelse(is.na(cum), 0, cum)),
brks=cut(cum,
breaks=c(0, 200, 50000, 1000000, 10000000, 50000000),
labels=c("1-200", "201-50K", "50k-1m",
"1m-10m", "10m-50m"))) -> by_state_and_week
# standard animation steps:
# determine where we’re going to break the data up
# feed that into a loop
# partition the data in the loop
# render the plot to a file
# combine all the individual images into an animation
i <- 0
for (wk in unique(by_state_and_week$week)) {
# filter by week
by_state_and_week %>% filter(week==wk) -> this_wk
# hack to let us color the state labels in white or black depending on
# the value of the fill
this_wk %>%
filter(brks %in% c("1m-10m", "10m-50m")) %>%
.$state_abb %>%
unique -> white_states
centers %>%
mutate(txt_col="black") %>%
mutate(txt_col=ifelse(id %in% white_states, "white", "black")) -> centers
# setup the plot
gg <- ggplot()
gg <- gg + geom_map(data=us_map, map=us_map,
aes(x=long, y=lat, map_id=id),
color="white", fill="#dddddd", size=2)
gg <- gg + geom_map(data=this_wk, map=us_map,
aes(fill=brks, map_id=state_abb),
color="white", size=2)
gg <- gg + geom_text(data=centers,
aes(label=id, x=x, y=y, color=txt_col), size=4)
gg <- gg + scale_color_identity()
gg <- gg + scale_fill_brewer(name="Combined flock sizen(all types)",
palette="RdPu", na.value="#dddddd", drop=FALSE)
gg <- gg + guides(fill=guide_legend(override.aes=list(colour=NA)))
gg <- gg + coord_map()
gg <- gg + labs(x=NULL, y=NULL,
title=sprintf("U.S. Avian Flu Total Impact as of %sn", wk))
gg <- gg + theme_bw()
gg <- gg + theme(plot.title=element_text(face="bold", hjust=0, size=24))
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(panel.grid=element_blank())
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(axis.text=element_blank())
gg <- gg + theme(legend.position="bottom")
gg <- gg + theme(legend.direction="horizontal")
gg <- gg + theme(legend.title.align=1)
print(gg)
i <- i + 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment