Weiming-Hu/find.sequences.R

## find.sequences.R

#
#  "`-''-/").___..--''"`-._
# (`6_ 6  )   `-.  (     ).`-.__.`)   WE ARE ...
# (_Y_.)'  ._   )  `._ `. ``-..-'    PENN STATE!
#   _ ..`--'_..-_/  /--'_.' ,'
# (il),-''  (li),'  ((!.-'
#
#
# Author: Weiming Hu (weiming@psu.edu) and Martina Calovi (mxc895@psu.edu)
#         Geoinformatics and Earth Observation Laboratory (http://geolab.psu.edu)
#         Department of Geography and Institute for CyberScience
#         The Pennsylvania State University
#

#' find.sequences
#'
#' find.sequences finds the start and end indices for
#' continuous sequences that have a length larger than
#' `min.len`. The values for the sequences should all
#' be larger than the threshold.
#'
#' @param v A numeric vector
#' @param min.len The minimum length of a sequence to
#' be extracted.
#' @param threshold The minimum value for the sequence.
#'
#' @return A data frame with the start and end indices.
#'
#' @examples
#' v <- sin(seq(1, 20, length.out = 200))
#' v[sample(length(v), floor(length(v) * 0.2))] <- NA
#'
#' plot(v, type = 'b')
#' min.len <- 5
#' threshold <- 0.5
#' df <- find.sequences(v, 5, 0)
#' abline(v = df$Start.ID, col = 'green')
#' abline(v = df$End.ID, col = 'red')
#'
#' @md
#' @export
find.sequences <- function(v, min.len, threshold) {

  start.id <- c()
  end.id <- c()

  continous.day.count <- 0
  record <- F

  for (i in 1:length(v)) {

    if (is.na(v[i])) {

      if (record) {
        record <- F

        if (continous.day.count >= min.len) {
          end.id <- c(end.id, i - 1)
        } else {
          start.id <- start.id[-length(start.id)]
        }
      }

      continous.day.count <- 0

    } else {
      if (v[i] > threshold) {
        continous.day.count <- continous.day.count + 1

        if (!record) {
          record <- T
          start.id <- c(start.id, i)
        }

      } else {

        if (record) {
          record <- F

          if (continous.day.count >= min.len) {
            end.id <- c(end.id, i - 1)
          } else {
            start.id <- start.id[-length(start.id)]
          }

        }

        continous.day.count <- 0
      }
    }
  }

  if (record) {
    if (continous.day.count >= min.len) {
      end.id <- c(end.id, i)
    } else {
      start.id <- start.id[-length(start.id)]
    }
  }

  return(data.frame(Start.ID = start.id,
                    End.ID = end.id))
}

	#
	# "`-''-/").___..--''"`-._
	# (`6_ 6 ) `-. ( ).`-.__.`) WE ARE ...
	# (_Y_.)' ._ ) `._ `. ``-..-' PENN STATE!
	# _ ..`--'_..-_/ /--'_.' ,'
	# (il),-'' (li),' ((!.-'
	#
	#
	# Author: Weiming Hu (weiming@psu.edu) and Martina Calovi (mxc895@psu.edu)
	# Geoinformatics and Earth Observation Laboratory (http://geolab.psu.edu)
	# Department of Geography and Institute for CyberScience
	# The Pennsylvania State University
	#

	#' find.sequences
	#'
	#' find.sequences finds the start and end indices for
	#' continuous sequences that have a length larger than
	#' `min.len`. The values for the sequences should all
	#' be larger than the threshold.
	#'
	#' @param v A numeric vector
	#' @param min.len The minimum length of a sequence to
	#' be extracted.
	#' @param threshold The minimum value for the sequence.
	#'
	#' @return A data frame with the start and end indices.
	#'
	#' @examples
	#' v <- sin(seq(1, 20, length.out = 200))
	#' v[sample(length(v), floor(length(v) * 0.2))] <- NA
	#'
	#' plot(v, type = 'b')
	#' min.len <- 5
	#' threshold <- 0.5
	#' df <- find.sequences(v, 5, 0)
	#' abline(v = df$Start.ID, col = 'green')
	#' abline(v = df$End.ID, col = 'red')
	#'
	#' @md
	#' @export
	find.sequences <- function(v, min.len, threshold) {

	start.id <- c()
	end.id <- c()

	continous.day.count <- 0
	record <- F

	for (i in 1:length(v)) {

	if (is.na(v[i])) {

	if (record) {
	record <- F

	if (continous.day.count >= min.len) {
	end.id <- c(end.id, i - 1)
	} else {
	start.id <- start.id[-length(start.id)]
	}
	}

	continous.day.count <- 0

	} else {
	if (v[i] > threshold) {
	continous.day.count <- continous.day.count + 1

	if (!record) {
	record <- T
	start.id <- c(start.id, i)
	}

	} else {

	if (record) {
	record <- F

	if (continous.day.count >= min.len) {
	end.id <- c(end.id, i - 1)
	} else {
	start.id <- start.id[-length(start.id)]
	}

	}

	continous.day.count <- 0
	}
	}
	}

	if (record) {
	if (continous.day.count >= min.len) {
	end.id <- c(end.id, i)
	} else {
	start.id <- start.id[-length(start.id)]
	}
	}

	return(data.frame(Start.ID = start.id,
	End.ID = end.id))
	}