eibanez/commands.R

## commands.R
# load contents of test.R first

library(RSQLite)
library(data.table)
library(dplyr)
test <- query_test(db, "interval", "Generator", "Generation")

## test.R
query_test <- function(db, time, col, prop, columns = "name", time.range = NULL, filter = NULL, phase = 4) {
  # Open connection
  thesql <- src_sqlite(db$filename)

  # Query interval data
  # Get the table names that store the data
  if (identical(prop, "*")) {
    t.name <- db$properties[[1]]
  } else if(length(prop) == 1L) {
    t.name <- db$properties[[1]] %>% filter(property == prop)
  } else {
    t.name <- db$properties[[1]] %>% filter(property %in% prop)
  }
  t.name <- t.name %>%
    filter(collection == col, phase_id == phase, is_summary == 0) %>%
    select(collection, property, table_name) %>%
    mutate(table_name = gsub("data_interval_", "", table_name))

  # Get max/min time existing in the table to be queried
  #   In case time table has more time stamps than those in the dataset
  time.limit <- t.name %>%
    group_by(collection, property) %>%
    do(
      tbl(thesql, .$table_name) %>%
        filter(phase_id == phase) %>%
        summarize(time_from = min(time_from), time_to = max(time_to)) %>%
        collect
    )
  min.time.data <- min(time.limit$time_from)
  max.time.data <- max(time.limit$time_to)

  message("min.time.data: ", min.time.data)
  message("max.time.data: ", max.time.data)

  # Collect time data
  time.data <- tbl(thesql, "time") %>%
    filter(phase_id == phase) %>%
    filter(between(time, min.time.data, max.time.data)) %>%
    rplexos:::filter_rplexos_time(time.range) %>%
    select(time) %>%
    collect

  write.csv(time.data, "test-timedata.csv", row.names = FALSE)

  # If time data is empty, return an empty data frame
  if (nrow(time.data) == 0L) {
    DBI::dbDisconnect(thesql$con)
    return(data.frame())
  }

  # Convert into R time-data format
  time.data$time <- lubridate::ymd_hms(time.data$time)

  # Get interval data
  out <- t.name %>%
    group_by(collection, property) %>%
    do(tbl(thesql, .$table_name) %>%
         filter(phase_id == phase) %>%
         rplexos:::filter_rplexos(filter) %>%
         rplexos:::filter_rplexos_time(time.range, modified = TRUE) %>%
         select(-time_to) %>%
         rename(time = time_from) %>%
         rplexos:::select_rplexos(columns, add.key = TRUE) %>%
         collect
    ) %>%
    ungroup %>%
    mutate(time = lubridate::ymd_hms(time))

  # Mask out data
  out <- out %>%
    mutate(name = name %>% factor %>% as.numeric,
           value = value %>% factor %>% as.numeric)
  write.csv(out, "test-out.csv", row.names = FALSE)

  # Expand data
  #   This will be easier when dplyr supports rolling joins
  out2 <- data.table(out, key = "key,time")
  cj2 <- CJ(key = unique(out$key), time = time.data$time)

  write.csv(cj2, "test-cj2.csv", row.names = FALSE)

  message("key: \t", length(unique(out$key)))
  message("time:\t", length(time.data$time))
  message("time2:\t", length(unique(time.data$time)))
  message("out2:\t", nrow(out2))
  message("cj2: \t", nrow(cj2))

  out3 <- out2[cj2, roll = TRUE, allow.cartesian = TRUE]
  out <- out3 %>%
    as.data.frame(stringsAsFactors = FALSE) %>%
    select(-key)

  write.csv(out3, "test-out3.csv", row.names = FALSE)
  message("out3:\t", nrow(out3))

  # Restore time zone
  attributes(out$time) <- attributes(time.data$time)

  # Disconnect database
  DBI::dbDisconnect(thesql$con)

  # Return value
  return(out)
}
	# load contents of test.R first

	library(RSQLite)
	library(data.table)
	library(dplyr)
	test <- query_test(db, "interval", "Generator", "Generation")
	query_test <- function(db, time, col, prop, columns = "name", time.range = NULL, filter = NULL, phase = 4) {
	# Open connection
	thesql <- src_sqlite(db$filename)

	# Query interval data
	# Get the table names that store the data
	if (identical(prop, "*")) {
	t.name <- db$properties[[1]]
	} else if(length(prop) == 1L) {
	t.name <- db$properties[[1]] %>% filter(property == prop)
	} else {
	t.name <- db$properties[[1]] %>% filter(property %in% prop)
	}
	t.name <- t.name %>%
	filter(collection == col, phase_id == phase, is_summary == 0) %>%
	select(collection, property, table_name) %>%
	mutate(table_name = gsub("data_interval_", "", table_name))

	# Get max/min time existing in the table to be queried
	# In case time table has more time stamps than those in the dataset
	time.limit <- t.name %>%
	group_by(collection, property) %>%
	do(
	tbl(thesql, .$table_name) %>%
	filter(phase_id == phase) %>%
	summarize(time_from = min(time_from), time_to = max(time_to)) %>%
	collect
	)
	min.time.data <- min(time.limit$time_from)
	max.time.data <- max(time.limit$time_to)

	message("min.time.data: ", min.time.data)
	message("max.time.data: ", max.time.data)

	# Collect time data
	time.data <- tbl(thesql, "time") %>%
	filter(phase_id == phase) %>%
	filter(between(time, min.time.data, max.time.data)) %>%
	rplexos:::filter_rplexos_time(time.range) %>%
	select(time) %>%
	collect

	write.csv(time.data, "test-timedata.csv", row.names = FALSE)

	# If time data is empty, return an empty data frame
	if (nrow(time.data) == 0L) {
	DBI::dbDisconnect(thesql$con)
	return(data.frame())
	}

	# Convert into R time-data format
	time.data$time <- lubridate::ymd_hms(time.data$time)

	# Get interval data
	out <- t.name %>%
	group_by(collection, property) %>%
	do(tbl(thesql, .$table_name) %>%
	filter(phase_id == phase) %>%
	rplexos:::filter_rplexos(filter) %>%
	rplexos:::filter_rplexos_time(time.range, modified = TRUE) %>%
	select(-time_to) %>%
	rename(time = time_from) %>%
	rplexos:::select_rplexos(columns, add.key = TRUE) %>%
	collect
	) %>%
	ungroup %>%
	mutate(time = lubridate::ymd_hms(time))

	# Mask out data
	out <- out %>%
	mutate(name = name %>% factor %>% as.numeric,
	value = value %>% factor %>% as.numeric)
	write.csv(out, "test-out.csv", row.names = FALSE)

	# Expand data
	# This will be easier when dplyr supports rolling joins
	out2 <- data.table(out, key = "key,time")
	cj2 <- CJ(key = unique(out$key), time = time.data$time)

	write.csv(cj2, "test-cj2.csv", row.names = FALSE)

	message("key: \t", length(unique(out$key)))
	message("time:\t", length(time.data$time))
	message("time2:\t", length(unique(time.data$time)))
	message("out2:\t", nrow(out2))
	message("cj2: \t", nrow(cj2))

	out3 <- out2[cj2, roll = TRUE, allow.cartesian = TRUE]
	out <- out3 %>%
	as.data.frame(stringsAsFactors = FALSE) %>%
	select(-key)

	write.csv(out3, "test-out3.csv", row.names = FALSE)
	message("out3:\t", nrow(out3))

	# Restore time zone
	attributes(out$time) <- attributes(time.data$time)

	# Disconnect database
	DBI::dbDisconnect(thesql$con)

	# Return value
	return(out)
	}