Last active
March 16, 2021 23:32
-
-
Save cboettig/a46eba3ad7183e2e9f36265ddd970571 to your computer and use it in GitHub Desktop.
Format fable timeseries, fbl_ts, to EFI flat-file format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Helper functions to turn a fable timeseries, which uses a special "distribution" column, | |
## into a flat-file format. efi_statistic_format uses a 'statistic' column (indicating either mean or sd), | |
## while efi_ensemble_format uses an 'ensemble' column, drawing `n` times from the distribution. | |
efi_statistic_format <- function(df){ | |
## determine variable name | |
var <- attributes(df)$dist | |
## Normal distribution: use distribution mean and variance | |
df %>% | |
dplyr::mutate(sd = sqrt( distributional::variance( .data[[var]] ) ) ) %>% | |
dplyr::rename(mean = .mean) %>% | |
dplyr::select(time, siteID, .model, mean, sd) %>% | |
tidyr::pivot_longer(c(mean, sd), names_to = "statistic", values_to = var) | |
} | |
efi_ensemble_format <- function(df, times = 10) { | |
## determine variable name | |
var <- attributes(df)$dist | |
n_groups <- nrow(df) | |
## Draw `times` samples from distribution using | |
suppressWarnings({ | |
expand <- df %>% | |
dplyr::mutate(sample = distributional::generate( .data[[var]], times) ) | |
}) | |
expand %>% | |
tidyr::unnest(sample) %>% | |
dplyr::mutate(ensemble = rep(1:times, n_groups)) %>% | |
dplyr::select(time, siteID, ensemble, {{var}} := sample) | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(fable) | |
library(distributional) | |
source("as_efi.R") # the above script | |
## Get the latest beetle target data. | |
targets <- read_csv("https://data.ecoforecast.org/targets/beetles/beetles-targets.csv.gz") | |
## Coerce to a "tsibble" time-series-data-table | |
targets <- as_tsibble(targets, index = time, key = siteID) | |
## Compute a simple mean/sd model per site... obviously silly given huge seasonal aspect | |
fc_richness <- targets %>% | |
model(null = MEAN(richness)) %>% | |
forecast(h = "1 year") | |
fc_abundance <- targets %>% | |
model(null = MEAN(abundance)) %>% | |
forecast(h = "1 year") | |
## Combine richness and abundance forecasts. drop the 'model' column since we have only one model here | |
forecast <- inner_join( efi_format(fc_richness), efi_format(fc_abundance) ) %>% select(!.model) | |
## Write out the forecast file | |
filename <- glue::glue("beetles-{date}-{team}.csv.gz", date=Sys.Date(), team = "EFI_null") | |
readr::write_csv(forecast, filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment