Created
September 9, 2017 11:37
-
-
Save JerryWho/1bf919ef73792569eb38f6462c6d7a8e to your computer and use it in GitHub Desktop.
R: Filling missing dates in a grouped time series.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(lubridate) | |
set.seed(1234) | |
# Time series should run vom 2017-01-01 til 2017-01-10 | |
date <- data.frame(date = seq.Date(from=ymd("2017-01-01"), to=ymd("2017-01-10"), by="days"), v = 1) | |
# Two grouping dimensions | |
d1 <- data.frame(d1 = c("A", "B", "C", "D"), v = 1) | |
d2 <- data.frame(d2 = c(1, 2, 3, 4, 5), v = 1) | |
# Generate the data.frame | |
df <- full_join(date, full_join(d1, d2)) %>% | |
select(date, d1, d2) | |
# and ad to value columns | |
df$v1 <- runif(200) | |
df$v2 <- runif(200) | |
# group by the dimension columns | |
df <- df %>% | |
group_by(d1, d2) | |
# create missing dates | |
df.missing <- df %>% | |
filter(v1 <= 0.8) | |
# So now 2017-01-01 and 2017-01-10, A, 5 are missing now | |
df.missing %>% | |
filter(d1 == "A" & d2 == 5) | |
start <- min(df.missing$date) | |
end <- max(df.missing$date) | |
all.dates <- data.frame(date=seq.Date(start, end, by="day")) | |
my_join <- function(data) { | |
# get value of both dimensions | |
d1.set <- data$d1[[1]] | |
d2.set <- data$d2[[1]] | |
tmp <- full_join(data, all.dates) %>% | |
# First we need to ungroup. Otherwise we can't change d1 and d2 because they are grouping variables | |
ungroup() %>% | |
mutate( | |
d1 = d1.set, | |
d2 = d2.set | |
) %>% | |
group_by(d1, d2) | |
return(tmp) | |
} | |
df.missing %>% | |
filter(d1 == "A" & d2 == 5) | |
df.missing %>% | |
do(my_join(.)) %>% | |
filter(d1 == "A" & d2 == 5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment