Skip to content

Instantly share code, notes, and snippets.

@walkerjeffd
Last active March 9, 2018 21:43
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save walkerjeffd/6179768 to your computer and use it in GitHub Desktop.
Save walkerjeffd/6179768 to your computer and use it in GitHub Desktop.
R functions for assigning and summarizing storm events based on interevent period and minimum event total precipitation
assign.events <- function(df, datetime.name="DATETIME", value.name="VALUE", interevent=8, threshold=0.1) {
# assigns events to data frame such as storm events or discharge events
require(plyr)
if (!(datetime.name %in% names(df))) {
stop(paste0('Could not find datetime column called ', datetime.name))
}
if (!(value.name %in% names(df))) {
stop(paste0('Could not find value column called ', value.name))
}
if (!is.regular(df[, datetime.name])) {
stop('Dataframe is not regular')
}
# make sure df is sorted by datetime
df <- df[order(df[, datetime.name]),]
# extract datetime and value
x <- data.frame(DATETIME=df[, datetime.name],
VALUE=df[, value.name])
# compute runs
x.rle <- rle(x$VALUE > 0)
# EVENT: logical column indicating if timestep is part of event
# does not yet account for events that do not meet minimum threshold
x$EVENT <- rep(x.rle$lengths*(x.rle$values | x.rle$lengths < interevent), x.rle$lengths)
x$EVENT <- x$EVENT > 0
# EVENT_ID: numeric column assigning unique IDs to each event
x$EVENT_ID <- c(as.numeric(x$EVENT[1]), ifelse(diff(x$EVENT)>0, diff(x$EVENT), 0))
x$EVENT_ID <- cumsum(x$EVENT_ID)*x$EVENT
# compute sum of each event
x.event <- ddply(x, .(EVENT_ID), summarise, SUM=sum(VALUE))
# take subset of events that meet minimum threshold
x.event <- subset(x.event, SUM >= threshold)
# assign events an ID of 0 if they did not meet the threshold
x$EVENT_ID[which(!(x$EVENT_ID %in% x.event$EVENT_ID))] <- 0
# reassign EVENT based on current EVENT_ID
x$EVENT <- x$EVENT_ID > 0
# reassign EVENT_ID based on new EVENT
x$EVENT_ID <- c(as.numeric(x$EVENT[1]), ifelse(diff(x$EVENT)>0, diff(x$EVENT), 0))
x$EVENT_ID <- cumsum(x$EVENT_ID)*x$EVENT
# assign DRY_ID for dry periods
x$DRY_ID <- c(as.numeric(!x$EVENT[1]), ifelse(diff(!x$EVENT)>0, diff(!x$EVENT), 0))
x$DRY_ID <- cumsum(x$DRY_ID)*(!x$EVENT)
# get rle of EVENT_ID
x.rle.event <- rle(x$EVENT_ID)
# add counter for each dry/wet event
x$COUNTER <- unlist(sapply(x.rle.event$lengths, seq))
# set EVENT_ID during dry events and DRY_ID during wet events to NA
x$EVENT_ID[which(x$EVENT_ID==0)] <- NA
x$DRY_ID[which(x$DRY_ID==0)] <- NA
# assign counter for dry events (number of hours since end of last wet event)
x$DRY_HOURS <- ifelse(!is.na(x$DRY_ID), x$COUNTER, 0)
# assign counter for wet events (number of hours since end of last dry event)
x$EVENT_HOURS <- ifelse(!is.na(x$EVENT_ID), x$COUNTER, 0)
# add EVENT_ID, DRY_HOURS, WET_HOURS to df and return
df$EVENT_ID <- x$EVENT_ID
df$EVENT_HOURS <- x$EVENT_HOURS
df$DRY_ID <- x$DRY_ID
df$DRY_HOURS <- x$DRY_HOURS
df
}
events.summary <- function(df, datetime.name="DATETIME", value.name="VALUE") {
# summarizes each event event in dataframe df
# assumes df contains columns EVENT_ID (from assign.events), DATETIME (POSIXct), and VALUE (numeric)
if (!("EVENT_ID" %in% names(df))) {
stop("Unable to find column EVENT_ID in dataframe")
}
if (!(datetime.name %in% names(df))) {
stop("Unable to find datetime column in dataframe")
}
if (!(value.name %in% names(df))) {
stop("Unable to find value column in dataframe")
}
df$DATETIME <- df[, datetime.name]
df$VALUE <- df[, value.name]
event <- ddply(subset(df, !is.na(EVENT_ID)), c("EVENT_ID"), summarise,
DURATION=length(DATETIME),
START=min(DATETIME),
END=max(DATETIME),
TOTAL=sum(VALUE),
PEAK=max(VALUE))
event
}
@stribstrib
Copy link

This is incredibly helpful;
I'm relatively new to R and have been working on trying to write my own version of rain event summary code.

One issue/request:
It looks as though the assumption is that each record is equivalent to a 1 hr timestep. Can you modify the function to incorporate a variable that allows users to define their record length? (For instance, much of my meteorological data is in 30 min steps).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment