Last active
August 24, 2017 23:18
-
-
Save mmerce/7ab2a4e952b2cdaa913e6847ca5ff3c6 to your computer and use it in GitHub Desktop.
Creating a dataset containing the forecast prediction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "Forecast dataset", | |
"description": "Creating a dataset that contains the original fields and the forecast prediction for a certain ets-model", | |
"inputs": [ | |
{ | |
"name": "timeseries-id", | |
"type": "timeseries-id", | |
"description": "Select the timeseries" | |
}, | |
{ | |
"name": "horizon", | |
"type": "number", | |
"default": 0, | |
"description": "Integer value that sets the horizon for the forecast. (optional)" | |
}, | |
{ | |
"name": "ets-model", | |
"type": "string", | |
"description": "Name of the ets-model to use to forecast (optional9. If empty, the lowest error model is returned.", | |
"default": "" | |
} | |
], | |
"outputs": [ | |
{ | |
"name": "output-dataset", | |
"type": "dataset-id", | |
"description": "Dataset containing the timeseries data and the forecast." | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(define ERROR_MEASURE "aic") | |
;; Creating an inline source and dataset from a list of input data | |
(define (inline-dataset data-list name) | |
(let (rows (for (item data-list) (join "," item)) | |
s-id (create-and-wait-source {"data" (join "\n" rows) | |
"name" name | |
"source_parser" {"header" true}}) | |
f-ids (keys ((fetch s-id) "fields" {})) | |
f-upd {"fields" (iterate (f {} f-id f-ids) | |
(assoc f f-id {"optype" "numeric"}))} | |
s-id (update-and-wait s-id f-upd)) | |
(create-dataset {"source" s-id}))) | |
;; Filtering a dataset selecting only some input fields | |
(define (filter-dataset dataset-id input-fields) | |
(create-and-wait-dataset {"origin_dataset" dataset-id | |
"input_fields" input-fields})) | |
;; Choosing the models with lowest aic from the timeseries | |
(define (get-best-models timeseries input-fields) | |
(let (ets-models (timeseries ["time_series" "ets_models"]) | |
max-error (max (map (lambda (x) (x ERROR_MEASURE -1)) | |
(iterate (all-m [] i-f input-fields) | |
(concat all-m (ets-models i-f))))) | |
add-dft-error (lambda (x) (if (contains? x ERROR_MEASURE) | |
x | |
(assoc x ERROR_MEASURE (+ max-error 1)))) | |
ets-models (iterate (ets {} i-f input-fields) | |
(assoc ets i-f (map add-dft-error (ets-models i-f)))) | |
ets-models (for (i-f input-fields) | |
(((sort-by-key ERROR_MEASURE | |
(ets-models i-f [])) 0 {}) "name"))) | |
(make-map input-fields ets-models))) | |
(define (get-timeseries-forecast timeseries input-fields ets-models) | |
(let (forecast (timeseries "forecast")) | |
(iterate (f {} i-f input-fields) | |
(assoc f | |
i-f | |
(filter (lambda (m) (= (m "model") (ets-models i-f))) | |
(forecast i-f)))))) | |
(define (get-forecast-data timeseries input-fields horizon ets-models) | |
(let (timeseries-id (timeseries "resource") | |
input-data (iterate (i-d {} i-f input-fields) | |
(assoc i-d i-f {"horizon" horizon | |
"ets_models" {"names" [(ets-models i-f)]}})) | |
forecast (fetch (create-and-wait-forecast {"timeseries" timeseries-id | |
"input_data" input-data}))) | |
(forecast ["forecast" "result"]))) | |
;; Creating a forecast for the input data and filtering the corresponding | |
;; ets-model if set | |
(define (get-forecast-rows forecast input-fields max-horizon) | |
(let (models (for (f input-fields) | |
(forecast f)) | |
points (map (lambda (m) ((m 0 {}) "point_forecast" [])) models)) | |
(when (= (count (filter empty? models)) (count models)) | |
(raise {"message" (str "The forecast has no " ets-model " model" | |
" information. Change your input data" | |
" to include that model")})) | |
(for (h (range 0 max-horizon)) | |
(iterate (row [] p points) | |
(append row (p h "")))))) | |
;; Extracting the names of the ets-models used to forecast | |
(define (get-model-names input-fields ets-models) | |
(for (i-f input-fields) | |
(ets-models i-f))) | |
;; Creating a dataset that stores the original data plus the forecast for a | |
;; concrete ets-model | |
(define (forecast-dataset timeseries-id horizon ets-model) | |
(let (timeseries (fetch timeseries-id) | |
input-fields (timeseries "objective_fields" []) | |
fields (timeseries ["time_series" "fields"]) | |
input-field-names (map (lambda (id) ((find-field fields id) "name")) | |
input-fields) | |
f-id-fn (lambda (f) ((find-field fields f) "id")) | |
input-fields (map f-id-fn input-fields) | |
;; getting the timeseries training dataset | |
dataset-id (timeseries "dataset") | |
filtered-dataset-id (filter-dataset dataset-id input-fields) | |
;; if no ets-model is provided, we choose the ones with lowest aic | |
;; in the timeseries | |
ets-models (if (empty? ets-model) | |
(get-best-models timeseries input-fields) | |
(make-map input-fields (repeat (count input-fields) | |
ets-model))) | |
;; if horizon is empty or smaller than the one in the timeseries, | |
;; we use the forecast in the timeseries. | |
;; We create a forecast otherwise | |
ts-horizon (timeseries "horizon") | |
horizon (if (and (integer? horizon) (> horizon 0)) | |
horizon | |
ts-horizon) | |
forecast (if (> ts-horizon horizon) | |
(get-timeseries-forecast timeseries | |
input-fields | |
ets-models) | |
(get-forecast-data timeseries | |
input-fields | |
horizon | |
ets-models)) | |
forecast-rows (get-forecast-rows forecast input-fields | |
horizon) | |
model-names (get-model-names input-fields ets-models) | |
forecast-rows (concat [input-field-names] forecast-rows) | |
forecast-dataset-id (inline-dataset forecast-rows | |
(str (timeseries "name") | |
" - forecast")) | |
merged-dataset-id (create-and-wait-dataset {"origin_datasets" [filtered-dataset-id | |
forecast-dataset-id]}) | |
fields (resource-fields merged-dataset-id) | |
field-labels (iterate (f-l {} n input-field-names mn model-names) | |
(assoc f-l | |
((find-field fields n) "id") | |
{"label" (str "forecast with model: " mn)}))) | |
(update-and-wait merged-dataset-id {"fields" field-labels}))) | |
(define output-dataset (forecast-dataset timeseries-id horizon ets-model)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment