Skip to content

Instantly share code, notes, and snippets.

@ivyleavedtoadflax
Last active October 16, 2016 11:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ivyleavedtoadflax/8badfe46f34b77678f3b934ae1e42cab to your computer and use it in GitHub Desktop.
Save ivyleavedtoadflax/8badfe46f34b77678f3b934ae1e42cab to your computer and use it in GitHub Desktop.
Getting max run length with associated testing
# Have a look at rle in the console. This function is largely bastardised from
# it. I have used a testing framework testthat to ensure that the outcome is
# what we expect it to be. You can add more tests as required!
# Define the max_rl function ----
max_rl <- function(x, val) {
# x is the target vector, val is the value for which we want to know the longest
# run length.
# Test that we are passing a vector a list to the function
if (!is.vector(x) && !is.list(x)) stop("'x' must be a vector of an atomic type")
# Caclulate length of vector
n <- length(x)
# Offset the vector with itself minus the last value
y <- x[-1L] != x[-n]
# Calculate index
i <- c(which(y | is.na(y)), n)
# Calculate length and values as in rle
lengths <- diff(c(0L, i))
values <- x[i]
# Determine the max which equals val. Need to first check that val is present
# in values, if not return 0.
if (val %in% values) {
max_val <- max(lengths[values == val])
} else {
max_val <- 0
}
# This step appears to be necessary to avoid issues when calling map_int later
max_val <- as.integer(max_val)
return(max_val)
}
# Test max_rl function ----
# Now lets run a suite of tests to check that this function works in the cases
# that we expect it to.
library(testthat)
test_that(
"max_rl behaves as expected",
{
# Case when we have a run of 5 1s, and we want to count the max run of 1s
expect_equal(
5,
max_rl(c(1,1,1,1,1), 1)
)
# Case when we have fewer than the length of the vector
expect_equal(
2,
max_rl(c(1,0,1,1,0), 1)
)
# Case when there are no values that we are interested in at all.
expect_equal(
0,
max_rl(c(1,1,1,1,1), 0)
)
})
# Define the max_rl_df function ----
# This seems to work. Now lets wrap this up into a function that can handle dataframes.
max_rl_df <- function(x, val) {
# First check that x is a dataframe or a matrix (note this encompasses data_frames/
# tibbles).
if (!is.data.frame(x) && !is.matrix(x)) stop("'x' must be a data.frame or matrix")
# Takes a df an val as an argument to pass to max_rl
x <- apply(x, MARGIN = 1, FUN = function(x) max_rl(x, val))
return(x)
}
# Test the max_rl function ----
# Now run obligatory tests on max_rl_df()
test_that(
'Test that max_rl_df works as expected',
{
# Create the dataframe
gap <- data.frame(
year1 = c(TRUE, FALSE, TRUE, TRUE, FALSE, FALSE),
year2 = c(TRUE, TRUE, FALSE, TRUE, FALSE, FALSE),
year3 = c(TRUE, FALSE, TRUE, TRUE, TRUE, FALSE),
year4 = c(TRUE, FALSE, FALSE, TRUE, FALSE, FALSE),
year5 = c(TRUE, FALSE, TRUE, FALSE, FALSE, FALSE)
)
# Do a manual count and check that function returns expected.
expect_equal(
c(5,1,1,4,1,0),
max_rl_df(gap, TRUE)
)
# Do the same for FALSE cases
expect_equal(
c(0,3,1,1,2,5),
max_rl_df(gap, FALSE)
)
# Test that it also works on matrices
expect_equal(
c(0,3,1,1,2,5),
max_rl_df(as.matrix(gap), FALSE)
)
}
)
# Apply the max_rl function ----
# All good, so how might you use this in a pipeline?
# Create the dataframe again
gap <- data.frame(
year1 = c(TRUE, FALSE, TRUE, TRUE, FALSE, FALSE),
year2 = c(TRUE, TRUE, FALSE, TRUE, FALSE, FALSE),
year3 = c(TRUE, FALSE, TRUE, TRUE, TRUE, FALSE),
year4 = c(TRUE, FALSE, FALSE, TRUE, FALSE, FALSE),
year5 = c(TRUE, FALSE, TRUE, FALSE, FALSE, FALSE)
)
# Now bind the dataframe to the max length of the TRUEs.
gap <- cbind(
gap,
len_TRUE = max_rl_df(gap, TRUE),
len_FALSE = max_rl_df(gap, FALSE)
)
gap
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment