mpettis/gist:baca4c428a8ab499b98b513e7fa3255e

## gistfile1.txt
Very procedural code, but works.  If I have a set of time intervals (lubridate intervals), and I want to consolidate any chain of overlapping intervals into a single interval with left end the min of all start times and right side as max of all end times, this is the code to do that.

``` r
## Consolidating intervals

suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(lubridate))

# In this table of intervals, rows 3,4, and 5 form a chain of intervals.  They should be rolled into 1 interval.
# And note rows 3 and 5 do not themselves overlap, but they are chained together by having overlap with row 4.
dat <- read.csv(text="
start,end
2017-09-01 00:00,2017-09-01 00:01
2017-09-01 00:02,2017-09-01 00:03
2017-09-01 00:04,2017-09-01 00:08
2017-09-01 00:07,2017-09-01 00:15
2017-09-01 00:09,2017-09-01 00:16
2017-09-01 00:20,2017-09-01 00:22") %>%
  transmute(
    gtStart = ymd_hm(start)
    , gtEnd = ymd_hm(end))


iv_clean <- list()
iv_process <- interval(dat$gtStart, dat$gtEnd)

while(length(iv_process) > 0) {
  e <- iv_process[1]
  iv_process <- iv_process[-1]

  ## If e is last item in iv_process, add it to iv_clean and stop processing
  if (!length(iv_process)) {
    if (!length(iv_clean)) {
      iv_clean <- e
    } else {
      iv_clean <- c(e, iv_clean)
    }
    break
  }

  ## For every remaining interval that overlaps e, union it with e
  ## And trip a flag that says that we found an overlapping interval
  e_nonoverlapping <- TRUE
  for (i in 1:length(iv_process)) {
    if (int_overlaps(e, iv_process[i])) {
      e_nonoverlapping <- FALSE
      iv_process[i] <- union(e, iv_process[i])
    }
  }

  ## If e did not overlap with any interval, then add it to iv_clean
  ## Otherwise, don't, and continue processing iv_process
  if (e_nonoverlapping) {
    if (!length(iv_clean)) {
      iv_clean <- e
    } else {
      iv_clean <- c(e, iv_clean)
    }
  }
}


## Print result
print(iv_clean)
#> [1] 2017-09-01 00:20:00 UTC--2017-09-01 00:22:00 UTC
#> [2] 2017-09-01 00:04:00 UTC--2017-09-01 00:16:00 UTC
#> [3] 2017-09-01 00:02:00 UTC--2017-09-01 00:03:00 UTC
#> [4] 2017-09-01 00:00:00 UTC--2017-09-01 00:01:00 UTC
```
	Very procedural code, but works. If I have a set of time intervals (lubridate intervals), and I want to consolidate any chain of overlapping intervals into a single interval with left end the min of all start times and right side as max of all end times, this is the code to do that.

	``` r
	## Consolidating intervals

	suppressPackageStartupMessages(library(dplyr))
	suppressPackageStartupMessages(library(lubridate))

	# In this table of intervals, rows 3,4, and 5 form a chain of intervals. They should be rolled into 1 interval.
	# And note rows 3 and 5 do not themselves overlap, but they are chained together by having overlap with row 4.
	dat <- read.csv(text="
	start,end
	2017-09-01 00:00,2017-09-01 00:01
	2017-09-01 00:02,2017-09-01 00:03
	2017-09-01 00:04,2017-09-01 00:08
	2017-09-01 00:07,2017-09-01 00:15
	2017-09-01 00:09,2017-09-01 00:16
	2017-09-01 00:20,2017-09-01 00:22") %>%
	transmute(
	gtStart = ymd_hm(start)
	, gtEnd = ymd_hm(end))


	iv_clean <- list()
	iv_process <- interval(dat$gtStart, dat$gtEnd)

	while(length(iv_process) > 0) {
	e <- iv_process[1]
	iv_process <- iv_process[-1]

	## If e is last item in iv_process, add it to iv_clean and stop processing
	if (!length(iv_process)) {
	if (!length(iv_clean)) {
	iv_clean <- e
	} else {
	iv_clean <- c(e, iv_clean)
	}
	break
	}

	## For every remaining interval that overlaps e, union it with e
	## And trip a flag that says that we found an overlapping interval
	e_nonoverlapping <- TRUE
	for (i in 1:length(iv_process)) {
	if (int_overlaps(e, iv_process[i])) {
	e_nonoverlapping <- FALSE
	iv_process[i] <- union(e, iv_process[i])
	}
	}

	## If e did not overlap with any interval, then add it to iv_clean
	## Otherwise, don't, and continue processing iv_process
	if (e_nonoverlapping) {
	if (!length(iv_clean)) {
	iv_clean <- e
	} else {
	iv_clean <- c(e, iv_clean)
	}
	}
	}


	## Print result
	print(iv_clean)
	#> [1] 2017-09-01 00:20:00 UTC--2017-09-01 00:22:00 UTC
	#> [2] 2017-09-01 00:04:00 UTC--2017-09-01 00:16:00 UTC
	#> [3] 2017-09-01 00:02:00 UTC--2017-09-01 00:03:00 UTC
	#> [4] 2017-09-01 00:00:00 UTC--2017-09-01 00:01:00 UTC
	```