Skip to content

Instantly share code, notes, and snippets.

@christophergandrud
Last active March 31, 2020 04:58
Show Gist options
  • Save christophergandrud/5675688 to your computer and use it in GitHub Desktop.
Save christophergandrud/5675688 to your computer and use it in GitHub Desktop.
Function for creating nonparamentric multiple change point plots with estimates from the ecp package.
#' Function for creating nonparamentric multiple change point plots with
#' estimates from the ecp package.
#'
#' @param data A data frame with the covariates and time variable.
#' @param Vars A character vector listing the names of the variates from
#' \code{data} to include in the nonparametric multiple change point analysis.
#' @param TimeVar A character string naming the time variable in \code{data}.
#' Must be in a format handled by POSIX, e.g. YYYY-MM-DD.
#' @param sig.lvl The level at which to sequentially test if a proposed change
#' point is statistically significant.
#' @param R The maximum number of random permutations to use in each iteration
#' of the permutaiton test. The permutation test p-value is calculated usign the
#' method outlined in Gandy (2009).
#' @param k Number of change point locations to estimate, surpressing the
#' permutation based testing. If \code{k=NULL} then only the statistically
#' significant estimated change points are returned.
#' @param min.size Minimum number of observations between change points.
#' @param alpha The moment index used for determining the distance between and
#' within segments.
#' @param PlotVars A character string of the variables from the analysis to
#' plot.
#' @param Grid Logical. If \code{TRUE} then each variable will be given its own
#' plot. If \code{FALSE} then values from all of the variables in \code{Var}
#' will be plotted on the same figure.
#' @param base_size numeric. The plot theme's base font size.
#' See \link{ggplot2}.
#' @param palette If a string, it will use that named palette. If a number, will
#' index into the list of palettes of appropriate type,
#' @param leg.name A character string. If \code{Facet = FALSE}, it allows you to
#' specify the legend name.
#' @param Titles A character vector of the same lenght as \code{Vars} giving
#' alternative title names for the plot titles. Only applies if
#' \code{Grid = TRUE}.
#' @param JustGraph logical indicating whether or not to only create a
#' grid-style time-series graph of the variables, without running a change point
#' analysis. When \code{JustGraph = TRUE} all other arguments apart from
#' \code{data}, \code{Vars}, \code{TimeVar}, \code{Titles} are ignored.
#'
#' @details Uses \code{\link{e.divisive}} to run a nonparametric multiple chang
#' point analysis (James and Matteson, 2013) on variables in a data frame. It
#' then uses \code{\link{ggplot2}} to plot the variable values with vertical
#' dashed lines indicating the estimated change points.
#'
#'
#' @seealso \link{ecp} and \link{ggplot2}
#'
#' @source Matteson, David S and Nicholas A James. 2014. ''A Nonparametric
#' Approach for Multiple Change Point Analysis of Multivariate Data.'' Journal
#' of the American Statistical Association 109(505):334-345.
#'
#' Gandy, A. (2009) "Sequential implementation of Monte Carlo tests with
#' uniformly bounded resampling risk." Journal of the American Statistical
#' Association.
#'
e.divGG <- function(data, Vars, TimeVar, sig.lvl = 0.05, R = 199, k = NULL,
min.size = 30, alpha = 1, PlotVars = NULL, Grid = TRUE,
base_size = 12, palette = "Set1", leg.name = "",
Titles = NULL, JustGraph = FALSE)
{
# Load required packages
require(ecp)
require(reshape2)
require(ggplot2)
require(gridExtra)
# Create plot titles if non specified
if (is.null(Titles)) Titles <- Vars
# Create T x d matrix
DataMatrix <- as.matrix(data[, Vars])
# Turn of faceting if DataMatrix only has 1 column
if (ncol(DataMatrix) == 1) Facet <- FALSE
# Function to graph only variables, not change points
NoCPGraph <- function(){
KeepVars <- c(TimeVar, Vars)
SubData <- data[, KeepVars]
SubMolten <- melt(data = SubData, id.vars = TimeVar,
meansure.vars = Vars)
SubMolten[, 1] <- as.POSIXct(SubMolten[, 1])
eachVar <- unique(SubMolten$variable)
VarLabels <- data.frame(eachVar, Titles, stringsAsFactors = FALSE)
names(SubMolten) <- c("Time", "variable", "value")
Rows <- 1:nrow(VarLabels)
p <- list()
for (i in Rows){
SubData <- subset(SubMolten, variable == VarLabels[i, 1])
Title_i <- VarLabels[i, 2]
p[[i]] <- ggplot(data = SubData,
aes(x = Time, y = value)) +
geom_line() +
xlab("") + ylab("") +
ggtitle(paste(Title_i, "\n")) +
theme_bw(base_size = base_size)
}
suppressWarnings(do.call(grid.arrange, p))
}
if (JustGraph) NoCPGraph()
else if (!isTRUE(JustGraph)){
# Estimate change points
CP <- e.divisive(X = DataMatrix, sig.lvl = sig.lvl, R = R, k = k,
min.size = min.size, alpha = alpha)
# Extract change points
CPEstimates <- CP$estimates
# Remove first/last points
# (these are the first and last values of the matrix)
CPEstSub <- CPEstimates[c(-1, -length(CPEstimates))]
if (length(CPEstSub) == 0){
NoCPGraph()
message(paste("\nNo change points found at the", sig.lvl,
"significance level.\n"))
}
else if (length(CPEstSub) != 0){
#Find corresponding TimeVar value
CPTimes <- as.POSIXct(data[CPEstSub, TimeVar])
# Report estimates
CPMessage <- lapply(CPTimes, function(x) paste(x, "\n"))
message("\nChange points estimated at:")
message(CPMessage)
# Melt data frame so that it can be plotted
if (!is.null(PlotVars)){
DataSub <- data[, c(TimeVar, PlotVars)]
DataMolten <- melt(data = DataSub, id.vars = TimeVar,
measure.vars = PlotVars)
}
else if (is.null(PlotVars)){
DataSub <- data[, c(TimeVar, Vars)]
DataMolten <- melt(data = DataSub, id.vars = TimeVar,
measure.vars = Vars)
}
# Clean pre plotting
names(DataMolten) <- c("Time", "GroupVar", "Value")
DataMolten$Time <- as.POSIXct(DataMolten$Time)
DataMolten <- merge(DataMolten, CPTimes, all = TRUE)
names(DataMolten) <- c("Time", "GroupVar", "Value", "Lines")
DataMolten$Lines[DataMolten$Time != DataMolten$Lines] <- NA
# Plot
if (length(unique(DataMolten$GroupVar)) == 1){
ggplot(data = DataMolten, aes(x = Time, y = Value)) +
geom_line() +
geom_vline(aes(xintercept = as.numeric(Lines)),
linetype = "longdash", colour = "#DE2D26") +
xlab("") + ylab("") +
theme_bw(base_size = base_size)
}
else if (length(unique(DataMolten$GroupVar)) > 1) {
if (Grid == FALSE){
ggplot(data = DataMolten, aes(x = Time, y = Value,
group = GroupVar, colour = GroupVar)) +
geom_line() +
geom_vline(aes(xintercept = as.numeric(Lines)),
linetype = "longdash") +
scale_colour_brewer(palette = palette,
name = leg.name) +
xlab("") + ylab("") +
theme_bw(base_size = base_size)
}
else if (Grid == TRUE){
eachVar <- unique(DataMolten$GroupVar)
VarLabels <- data.frame(eachVar, Titles,
stringsAsFactors = FALSE)
Rows <- 1:nrow(VarLabels)
p <- list()
for (i in Rows){
SubData <- subset(DataMolten,
GroupVar == VarLabels[i, 1])
Title_i <- VarLabels[i, 2]
p[[i]] <- ggplot(data = SubData,
aes(x = Time, y = Value)) +
geom_line() +
geom_vline(aes(xintercept =
as.numeric(Lines)),
linetype = "longdash",
colour = "#DE2D26") +
xlab("") + ylab("") +
ggtitle(paste(Title_i, "\n")) +
theme_bw(base_size = base_size)
}
suppressWarnings(do.call(grid.arrange, p))
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment