Create a gist now

Instantly share code, notes, and snippets.

@araastat /ggkm.R
Last active Sep 2, 2016

Plotting a Kaplan-Meier curve using ggplot. ggkmTable.R adds a table below the plot showing numbers at risk at different times.
#’ Create a Kaplan-Meier plot using ggplot2
#’
#’ @param sfit a \code{\link[survival]{survfit}} object
#’ @param returns logical: if \code{TRUE}, return an ggplot object
#’ @param xlabs x-axis label
#’ @param ylabs y-axis label
#’ @param ystratalabs The strata labels. \code{Default = levels(summary(sfit)$strata)}
#’ @param ystrataname The legend name. Default = “Strata”
#’ @param timeby numeric: control the granularity along the time-axis
#’ @param main plot title
#’ @param pval logical: add the pvalue to the plot?
#’ @return a ggplot is made. if returns=TRUE, then an ggplot object
#’ is returned
#’ @author Abhijit Dasgupta with contributions by Gil Tomas
#’ \url{http://statbandit.wordpress.com/2011/03/08/an-enhanced-kaplan-meier-plot/}
#’ @export
#’ @examples
#’ \dontrun{
#’ data(colon)
#’ fit <- survfit(Surv(time,status)~rx, data=colon)
#' ggkm(fit, timeby=500)
#' }
ggkm <- function(sfit, returns = FALSE,
xlabs = "Time", ylabs = "survival probability",
ystratalabs = NULL, ystrataname = NULL,
timeby = 100, main = "Kaplan-Meier Plot",
pval = TRUE, ...) {
require(plyr)
require(ggplot2)
require(survival)
require(gridExtra)
if(is.null(ystratalabs)) {
ystratalabs <- as.character(levels(summary(sfit)$strata))
}
m <- max(nchar(ystratalabs))
if(is.null(ystrataname)) ystrataname <- "Strata"
times <- seq(0, max(sfit$time), by = timeby)
.df <- data.frame(time = sfit$time, n.risk = sfit$n.risk,
n.event = sfit$n.event, surv = sfit$surv, strata = summary(sfit, censored = T)$strata,
upper = sfit$upper, lower = sfit$lower)
levels(.df$strata) <- ystratalabs
zeros <- data.frame(time = 0, surv = 1, strata = factor(ystratalabs, levels=levels(.df$strata)),
upper = 1, lower = 1)
.df <- rbind.fill(zeros, .df)
d <- length(levels(.df$strata))
p <- ggplot(.df, aes(time, surv, group = strata)) +
geom_step(aes(linetype = strata), size = 0.7) +
theme_bw() +
theme(axis.title.x = element_text(vjust = 0.5)) +
scale_x_continuous(xlabs, breaks = times, limits = c(0, max(sfit$time))) +
scale_y_continuous(ylabs, limits = c(0, 1)) +
theme(panel.grid.minor = element_blank()) +
theme(legend.position = c(ifelse(m < 10, .28, .35), ifelse(d < 4, .25, .35))) +
theme(legend.key = element_rect(colour = NA)) +
labs(linetype = ystrataname) +
theme(plot.margin = unit(c(0, 1, .5, ifelse(m < 10, 1.5, 2.5)), "lines")) +
ggtitle(main)
if(pval) {
sdiff <- survdiff(eval(sfit$call$formula), data = eval(sfit$call$data))
pval <- pchisq(sdiff$chisq, length(sdiff$n)-1, lower.tail = FALSE)
pvaltxt <- ifelse(pval < 0.0001, "p < 0.0001", paste("p =", signif(pval, 3)))
p <- p + annotate("text", x = 0.6 * max(sfit$time), y = 0.1, label = pvaltxt)
}
## Plotting the graphs
print(p)
if(returns) return(p)
}
#’ Create a Kaplan-Meier plot using ggplot2
#’
#’ @param sfit a \code{\link[survival]{survfit}} object
#’ @param table logical: Create a table graphic below the K-M plot, indicating at-risk numbers?
#’ @param returns logical: if \code{TRUE}, return an arrangeGrob object
#’ @param xlabs x-axis label
#’ @param ylabs y-axis label
#’ @param ystratalabs The strata labels. \code{Default = levels(summary(sfit)$strata)}
#’ @param ystrataname The legend name. Default = “Strata”
#’ @param timeby numeric: control the granularity along the time-axis
#’ @param main plot title
#’ @param pval logical: add the pvalue to the plot?
#’ @return a ggplot is made. if return=TRUE, then an arrangeGlob object
#’ is returned
#’ @author Abhijit Dasgupta with contributions by Gil Tomas
#’ \url{http://statbandit.wordpress.com/2011/03/08/an-enhanced-kaplan-meier-plot/}
#’ @export
#’ @examples
#’ \dontrun{
#’ data(colon)
#’ fit <- survfit(Surv(time,status)~rx, data=colon)
#' ggkm(fit, timeby=500)
#' }
ggkmTable <- function(sfit, table=TRUE,returns = FALSE,
xlabs = "Time", ylabs = "survival probability",
ystratalabs = NULL, ystrataname = NULL,
timeby = 100, main = "Kaplan-Meier Plot",
pval = TRUE, ...) {
require(plyr)
require(ggplot2)
require(survival)
require(gridExtra)
if(is.null(ystratalabs)) {
ystratalabs <- as.character(levels(summary(sfit)$strata))
}
m <- max(nchar(ystratalabs))
if(is.null(ystrataname)) ystrataname <- "Strata"
times <- seq(0, max(sfit$time), by = timeby)
.df <- data.frame(time = sfit$time, n.risk = sfit$n.risk,
n.event = sfit$n.event, surv = sfit$surv, strata = summary(sfit, censored = T)$strata,
upper = sfit$upper, lower = sfit$lower)
levels(.df$strata) <- ystratalabs
zeros <- data.frame(time = 0, surv = 1, strata = factor(ystratalabs, levels=levels(.df$strata)),
upper = 1, lower = 1)
.df <- rbind.fill(zeros, .df)
d <- length(levels(.df$strata))
p <- ggplot(.df, aes(time, surv, group = strata)) +
geom_step(aes(linetype = strata), size = 0.7) +
theme_bw() +
theme(axis.title.x = element_text(vjust = 0.5)) +
scale_x_continuous(xlabs, breaks = times, limits = c(0, max(sfit$time))) +
scale_y_continuous(ylabs, limits = c(0, 1)) +
theme(panel.grid.minor = element_blank()) +
theme(legend.position = c(ifelse(m < 10, .28, .35), ifelse(d < 4, .25, .35))) +
theme(legend.key = element_rect(colour = NA)) +
labs(linetype = ystrataname) +
theme(plot.margin = unit(c(0, 1, .5, ifelse(m < 10, 1.5, 2.5)), "lines")) +
ggtitle(main)
if(pval) {
sdiff <- survdiff(eval(sfit$call$formula), data = eval(sfit$call$data))
pval <- pchisq(sdiff$chisq, length(sdiff$n)-1, lower.tail = FALSE)
pvaltxt <- ifelse(pval < 0.0001, "p < 0.0001", paste("p =", signif(pval, 3)))
p <- p + annotate("text", x = 0.6 * max(sfit$time), y = 0.1, label = pvaltxt)
}
## Create a blank plot for place-holding
## .df <- data.frame()
blank.pic <- ggplot(.df, aes(time, surv)) +
geom_blank() +
theme_bw() +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),
axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.ticks = element_blank(), panel.grid.major = element_blank(),
panel.border = element_blank())
if(table) {
## Create table graphic to include at-risk numbers
risk.data <- data.frame(strata = summary(sfit, times = times, extend = TRUE)$strata,
time = summary(sfit, times = times, extend = TRUE)$time,
n.risk = summary(sfit, times = times, extend = TRUE)$n.risk)
data.table <- ggplot(risk.data, aes(x = time, y = strata, label = format(n.risk, nsmall = 0))) +
#, color = strata)) +
geom_text(size = 3.5) +
theme_bw() +
scale_y_discrete(breaks = as.character(levels(risk.data$strata)), labels = ystratalabs) +
# scale_y_discrete(#format1ter = abbreviate,
# breaks = 1:3,
# labels = ystratalabs) +
scale_x_continuous("Numbers at risk", limits = c(0, max(sfit$time))) +
theme(axis.title.x = theme_text(size = 10, vjust = 1), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(),
axis.text.x = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_text(face = "bold", hjust = 1))
data.table <- data.table + theme(legend.position = "none") +
xlab(NULL) + ylab(NULL)
data.table <- data.table +
theme(plot.margin = unit(c(-1.5, 1, 0.1, ifelse(m < 10, 2.5, 3.5)-0.28 * m), "lines"))
## Plotting the graphs
## p <- ggplotGrob(p)
## p <- addGrob(p, textGrob(x = unit(.8, "npc"), y = unit(.25, "npc"), label = pvaltxt,
## gp = gpar(fontsize = 12)))
grid.arrange(p, blank.pic, data.table,
clip = FALSE, nrow = 3, ncol = 1,
heights = unit(c(2, .1, .25),c("null", "null", "null")))
if(returns) {
a <- arrangeGrob(p, blank.pic, data.table, clip = FALSE,
nrow = 3, ncol = 1, heights = unit(c(2, .1, .25),c("null", "null", "null")))
return(a)
}
}
else {
## p <- ggplotGrob(p)
## p <- addGrob(p, textGrob(x = unit(0.5, "npc"), y = unit(0.23, "npc"),
## label = pvaltxt, gp = gpar(fontsize = 12)))
print(p)
if(returns) return(p)
}
}
@dmenne

See my fork for some corrections. Also added to https://bitbucket.org/dmenne/dmisc2

@dksamuel

Sorry, the code runs without a hitch in R but no plot is produced !

@KOBonsu

Sorry, the code runs well without error but no plot is produced. Can you please help?

@mark-ubivac

I'm getting a plot but the table fails. It seems theme_text() and a number of other calls are deprecated functions in ggplot2? I'll try and trouble shoot it but the solution to change opts_text() from the original statbandit function to theme_text() here is no longer functional. I tried doing the same thing actually before finding this.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment