Last active
October 12, 2017 23:49
-
-
Save mpettis/baca4c428a8ab499b98b513e7fa3255e to your computer and use it in GitHub Desktop.
R: Time Interval Consolidation of Overlaps
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Very procedural code, but works. If I have a set of time intervals (lubridate intervals), and I want to consolidate any chain of overlapping intervals into a single interval with left end the min of all start times and right side as max of all end times, this is the code to do that. | |
``` r | |
## Consolidating intervals | |
suppressPackageStartupMessages(library(dplyr)) | |
suppressPackageStartupMessages(library(lubridate)) | |
# In this table of intervals, rows 3,4, and 5 form a chain of intervals. They should be rolled into 1 interval. | |
# And note rows 3 and 5 do not themselves overlap, but they are chained together by having overlap with row 4. | |
dat <- read.csv(text=" | |
start,end | |
2017-09-01 00:00,2017-09-01 00:01 | |
2017-09-01 00:02,2017-09-01 00:03 | |
2017-09-01 00:04,2017-09-01 00:08 | |
2017-09-01 00:07,2017-09-01 00:15 | |
2017-09-01 00:09,2017-09-01 00:16 | |
2017-09-01 00:20,2017-09-01 00:22") %>% | |
transmute( | |
gtStart = ymd_hm(start) | |
, gtEnd = ymd_hm(end)) | |
iv_clean <- list() | |
iv_process <- interval(dat$gtStart, dat$gtEnd) | |
while(length(iv_process) > 0) { | |
e <- iv_process[1] | |
iv_process <- iv_process[-1] | |
## If e is last item in iv_process, add it to iv_clean and stop processing | |
if (!length(iv_process)) { | |
if (!length(iv_clean)) { | |
iv_clean <- e | |
} else { | |
iv_clean <- c(e, iv_clean) | |
} | |
break | |
} | |
## For every remaining interval that overlaps e, union it with e | |
## And trip a flag that says that we found an overlapping interval | |
e_nonoverlapping <- TRUE | |
for (i in 1:length(iv_process)) { | |
if (int_overlaps(e, iv_process[i])) { | |
e_nonoverlapping <- FALSE | |
iv_process[i] <- union(e, iv_process[i]) | |
} | |
} | |
## If e did not overlap with any interval, then add it to iv_clean | |
## Otherwise, don't, and continue processing iv_process | |
if (e_nonoverlapping) { | |
if (!length(iv_clean)) { | |
iv_clean <- e | |
} else { | |
iv_clean <- c(e, iv_clean) | |
} | |
} | |
} | |
## Print result | |
print(iv_clean) | |
#> [1] 2017-09-01 00:20:00 UTC--2017-09-01 00:22:00 UTC | |
#> [2] 2017-09-01 00:04:00 UTC--2017-09-01 00:16:00 UTC | |
#> [3] 2017-09-01 00:02:00 UTC--2017-09-01 00:03:00 UTC | |
#> [4] 2017-09-01 00:00:00 UTC--2017-09-01 00:01:00 UTC | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment