Skip to content

Instantly share code, notes, and snippets.

@jkeirstead
Last active March 11, 2018 00:17
Show Gist options
  • Star 27 You must be signed in to star a gist
  • Fork 12 You must be signed in to fork a gist
  • Save jkeirstead/df85c839bd8f0026ee05 to your computer and use it in GitHub Desktop.
Save jkeirstead/df85c839bd8f0026ee05 to your computer and use it in GitHub Desktop.
Draw waterfall charts in R. See post at http://www.jameskeirstead.ca/blog/waterfall-plots-in-r
category value sector
UK production emissions 632 UK
Carbon flows from EU 88 EU
Carbon flows to EU -61 EU
Carbon flows from other Annex 1 82 Annex 1
Carbon flows to other Annex 1 -39 Annex 1
Carbon flows from non-Annex 1 104 Other non-Annex 1
Carbon flows from non-Annex 1 64 China
Carbon flows to non-Annex 1 -25 Non-Annex 1
UK consumption emissions 845 UK
source("waterfall.r")
## Load the data and set correct column names
df <- read.csv("data.csv", stringsAsFactors=FALSE)
## ----further-prep--------------------------------------------------------
## Tidy the levels
df$category <- factor(df$category, levels=unique(df$category))
levels(df$category) <- gsub("flows ", "flows \n", levels(df$category))
levels(df$category) <- gsub(" emissions", "\nemissions", levels(df$category))
df$sector <- factor(df$sector, levels=c("UK", "EU", "Annex 1", "Non-Annex 1",
"China", "Other non-Annex 1"))
## ----prepare-plot, echo=TRUE---------------------------------------------
## Determines the spacing between columns in the waterfall chart
offset <- 0.3
gg <- waterfall(df, offset=offset) +
coord_cartesian(ylim=c(600, 900)) +
scale_fill_manual(guide="none", values=c(rgb(81, 34, 112, max=255),
rgb(125, 96, 153, max=255),
rgb(116, 173, 226, max=255),
rgb(17, 135, 146, max=255),
rgb(69, 171, 183, max=255),
rgb(17, 135, 146, max=255))) +
labs(x="", y="Mt CO2",
title="UK embodied emissions balance (import and export) with major regions, 2004") +
theme_classic() +
annotate("text", x=6, y=838, label="China", colour="white") +
annotate("text", x=8 + offset, y=900,
hjust=1, vjust=1,
size=3,
label="Data source: Carbon Trust (2011)",
fontface="italic") +
theme(plot.title=element_text(face="bold", hjust=0, vjust=2))
## ----plot, dev='png', fig.width=12, fig.height=7.5-----------------------
print(gg)
#' Makes a waterfall plot
#'
#' Makes a waterfall plot using ggplot2. The bars will be plotted in
#' the order specified by the factoring of the 'category' column.
#' Values should represent the positive or negative changes relative
#' to the previous bar.
#'
#' @param df a dataframe with columns 'category' (an ordered factor),
#' 'value' (numeric), and 'sector' (character)
#' @param offset the spacing between the columns, default = 0.3
#'
#' @examples
#' raw <- data.frame(category=c("A", "B", "C", "D"),
#' value=c(100, -20, 10, 90),
#' sector=1)
#'
#' df1 <- transform(raw, category=factor(category))
#' waterfall(df1) + theme_bw() + labs(x="", y="Value")
#'
#' df2 <- transform(raw, category=factor(category, levels=c("A", "C", "B", "D")))
#' waterfall(df2) + theme_bw() + labs(x="", y="Value")
#'
#' @return a ggplot2 object
waterfall <- function(df, offset=0.3) {
library(ggplot2)
library(scales)
library(dplyr)
## Add the order column to the raw data frame and order appropriately
df <- df %>% mutate(order=as.numeric(category)) %>% arrange(order)
## The last value needs to be negated so that it goes down to
## zero. Throws a warning if the cumulative sum doesn't match.
last.id <- nrow(df)
df$value[last.id] <- -df$value[last.id]
## Calculate the cumulative sums
df <- df %>% mutate(cs1=cumsum(value))
## Throw a warning if the values don't match zero as expected
final_value <- tail(df$cs1, 1)
if (final_value!=0) {
warning(sprintf("Final value doesn't return to 0. %.2d instead.", final_value))
}
## Calculate the max and mins for each category and sector
df <- transform(df, min.val=c(0, head(cs1, -1)),
max.val=c(head(cs1, -1), 0))
df <- df %>% group_by(order, category, sector, value, cs1) %>%
summarize(min=min(min.val, max.val), max=max(min.val, max.val))
## Create the lines data frame to link the bars
lines <- df %>% group_by(order) %>% summarize(cs=max(cs1))
lines <- with(lines, data.frame(x=head(order, -1),
xend=tail(order, -1),
y=head(cs, -1),
yend=head(cs, -1)))
## Add the offset parameter
df <- transform(df, offset=offset)
## Make the plot
gg <- ggplot() +
geom_segment(data=lines, aes(x=x, y=y, xend=xend, yend=yend), linetype="dashed") +
geom_rect(data=df, aes(xmin=order - offset,
xmax=order + offset,
ymin=min,
ymax=max, fill=sector)) +
scale_x_continuous(breaks=unique(df$order), labels=unique(df$category))
return(gg)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment