JerryWho/fill_grouped_time_series.R

## fill_grouped_time_series.R
library(dplyr)
library(lubridate)

set.seed(1234)
# Time series should run vom 2017-01-01 til 2017-01-10
date <- data.frame(date = seq.Date(from=ymd("2017-01-01"), to=ymd("2017-01-10"), by="days"), v = 1)
# Two grouping dimensions
d1   <- data.frame(d1 = c("A", "B", "C", "D"), v = 1)
d2   <- data.frame(d2 = c(1, 2, 3, 4, 5), v = 1)

# Generate the data.frame
df <- full_join(date, full_join(d1, d2)) %>%
  select(date, d1, d2)
# and ad to value columns
df$v1 <- runif(200)
df$v2 <- runif(200)

# group by the dimension columns
df <- df %>%
  group_by(d1, d2)

# create missing dates
df.missing <- df %>%
  filter(v1 <= 0.8)

# So now  2017-01-01 and 2017-01-10, A, 5 are missing now
df.missing %>%
  filter(d1 == "A" & d2 == 5)


start <- min(df.missing$date)
end   <- max(df.missing$date)

all.dates <- data.frame(date=seq.Date(start, end, by="day"))

my_join <- function(data) {
  # get value of both dimensions
  d1.set <- data$d1[[1]]
  d2.set <- data$d2[[1]]

  tmp <- full_join(data, all.dates) %>%
    # First we need to ungroup.  Otherwise we can't change d1 and d2 because they are grouping variables
    ungroup() %>%
    mutate(
      d1 = d1.set,
      d2 = d2.set
    ) %>%
    group_by(d1, d2)

  return(tmp)
}

df.missing %>%
  filter(d1 == "A" & d2 == 5)


df.missing %>%
  do(my_join(.)) %>%
  filter(d1 == "A" & d2 == 5)
	library(dplyr)
	library(lubridate)

	set.seed(1234)
	# Time series should run vom 2017-01-01 til 2017-01-10
	date <- data.frame(date = seq.Date(from=ymd("2017-01-01"), to=ymd("2017-01-10"), by="days"), v = 1)
	# Two grouping dimensions
	d1 <- data.frame(d1 = c("A", "B", "C", "D"), v = 1)
	d2 <- data.frame(d2 = c(1, 2, 3, 4, 5), v = 1)

	# Generate the data.frame
	df <- full_join(date, full_join(d1, d2)) %>%
	select(date, d1, d2)
	# and ad to value columns
	df$v1 <- runif(200)
	df$v2 <- runif(200)

	# group by the dimension columns
	df <- df %>%
	group_by(d1, d2)

	# create missing dates
	df.missing <- df %>%
	filter(v1 <= 0.8)

	# So now 2017-01-01 and 2017-01-10, A, 5 are missing now
	df.missing %>%
	filter(d1 == "A" & d2 == 5)


	start <- min(df.missing$date)
	end <- max(df.missing$date)

	all.dates <- data.frame(date=seq.Date(start, end, by="day"))

	my_join <- function(data) {
	# get value of both dimensions
	d1.set <- data$d1[[1]]
	d2.set <- data$d2[[1]]

	tmp <- full_join(data, all.dates) %>%
	# First we need to ungroup. Otherwise we can't change d1 and d2 because they are grouping variables
	ungroup() %>%
	mutate(
	d1 = d1.set,
	d2 = d2.set
	) %>%
	group_by(d1, d2)

	return(tmp)
	}

	df.missing %>%
	filter(d1 == "A" & d2 == 5)


	df.missing %>%
	do(my_join(.)) %>%
	filter(d1 == "A" & d2 == 5)