Last active
October 16, 2016 11:11
-
-
Save ivyleavedtoadflax/8badfe46f34b77678f3b934ae1e42cab to your computer and use it in GitHub Desktop.
Getting max run length with associated testing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Have a look at rle in the console. This function is largely bastardised from | |
# it. I have used a testing framework testthat to ensure that the outcome is | |
# what we expect it to be. You can add more tests as required! | |
# Define the max_rl function ---- | |
max_rl <- function(x, val) { | |
# x is the target vector, val is the value for which we want to know the longest | |
# run length. | |
# Test that we are passing a vector a list to the function | |
if (!is.vector(x) && !is.list(x)) stop("'x' must be a vector of an atomic type") | |
# Caclulate length of vector | |
n <- length(x) | |
# Offset the vector with itself minus the last value | |
y <- x[-1L] != x[-n] | |
# Calculate index | |
i <- c(which(y | is.na(y)), n) | |
# Calculate length and values as in rle | |
lengths <- diff(c(0L, i)) | |
values <- x[i] | |
# Determine the max which equals val. Need to first check that val is present | |
# in values, if not return 0. | |
if (val %in% values) { | |
max_val <- max(lengths[values == val]) | |
} else { | |
max_val <- 0 | |
} | |
# This step appears to be necessary to avoid issues when calling map_int later | |
max_val <- as.integer(max_val) | |
return(max_val) | |
} | |
# Test max_rl function ---- | |
# Now lets run a suite of tests to check that this function works in the cases | |
# that we expect it to. | |
library(testthat) | |
test_that( | |
"max_rl behaves as expected", | |
{ | |
# Case when we have a run of 5 1s, and we want to count the max run of 1s | |
expect_equal( | |
5, | |
max_rl(c(1,1,1,1,1), 1) | |
) | |
# Case when we have fewer than the length of the vector | |
expect_equal( | |
2, | |
max_rl(c(1,0,1,1,0), 1) | |
) | |
# Case when there are no values that we are interested in at all. | |
expect_equal( | |
0, | |
max_rl(c(1,1,1,1,1), 0) | |
) | |
}) | |
# Define the max_rl_df function ---- | |
# This seems to work. Now lets wrap this up into a function that can handle dataframes. | |
max_rl_df <- function(x, val) { | |
# First check that x is a dataframe or a matrix (note this encompasses data_frames/ | |
# tibbles). | |
if (!is.data.frame(x) && !is.matrix(x)) stop("'x' must be a data.frame or matrix") | |
# Takes a df an val as an argument to pass to max_rl | |
x <- apply(x, MARGIN = 1, FUN = function(x) max_rl(x, val)) | |
return(x) | |
} | |
# Test the max_rl function ---- | |
# Now run obligatory tests on max_rl_df() | |
test_that( | |
'Test that max_rl_df works as expected', | |
{ | |
# Create the dataframe | |
gap <- data.frame( | |
year1 = c(TRUE, FALSE, TRUE, TRUE, FALSE, FALSE), | |
year2 = c(TRUE, TRUE, FALSE, TRUE, FALSE, FALSE), | |
year3 = c(TRUE, FALSE, TRUE, TRUE, TRUE, FALSE), | |
year4 = c(TRUE, FALSE, FALSE, TRUE, FALSE, FALSE), | |
year5 = c(TRUE, FALSE, TRUE, FALSE, FALSE, FALSE) | |
) | |
# Do a manual count and check that function returns expected. | |
expect_equal( | |
c(5,1,1,4,1,0), | |
max_rl_df(gap, TRUE) | |
) | |
# Do the same for FALSE cases | |
expect_equal( | |
c(0,3,1,1,2,5), | |
max_rl_df(gap, FALSE) | |
) | |
# Test that it also works on matrices | |
expect_equal( | |
c(0,3,1,1,2,5), | |
max_rl_df(as.matrix(gap), FALSE) | |
) | |
} | |
) | |
# Apply the max_rl function ---- | |
# All good, so how might you use this in a pipeline? | |
# Create the dataframe again | |
gap <- data.frame( | |
year1 = c(TRUE, FALSE, TRUE, TRUE, FALSE, FALSE), | |
year2 = c(TRUE, TRUE, FALSE, TRUE, FALSE, FALSE), | |
year3 = c(TRUE, FALSE, TRUE, TRUE, TRUE, FALSE), | |
year4 = c(TRUE, FALSE, FALSE, TRUE, FALSE, FALSE), | |
year5 = c(TRUE, FALSE, TRUE, FALSE, FALSE, FALSE) | |
) | |
# Now bind the dataframe to the max length of the TRUEs. | |
gap <- cbind( | |
gap, | |
len_TRUE = max_rl_df(gap, TRUE), | |
len_FALSE = max_rl_df(gap, FALSE) | |
) | |
gap |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment