mmerce/evaluation_weighted.json Secret

## evaluation_weighted.json
{
    "name": "Comparing model to batch-prediction-weighted model",
    "description": "Comparing evaluation of the usual model with a model built using a weight field. The field is defined by using k-fold batch predictions to take into account which instances are predicted correctly and the confidence of this prediction. It also balances the imbalanced classes.",
    "inputs": [
        {
            "name": "dataset-id",
            "type": "dataset-id",
            "description": "Select the dataset"
        },
        {
            "name": "k-folds",
            "type": "number",
            "description": "number of folds for batch predictions",
            "default": 5
        },
        {
            "name": "objective-name",
            "type": "string",
            "description": "Name of the field to be predicted",
            "default": ""
        },
        {
            "name": "model-options",
            "type": "map",
            "description": "Settings used to create the model",
            "default": {}
        },
        {
            "name": "batch-prediction-options",
            "type": "map",
            "description": "Settings used to create the batch predictions",
            "default": {}
        }
    ],
    "outputs": [
        {
            "name": "output-eval",
            "type": "map",
            "description": "Evaluation measures for the reference model (no batch predictions used) and the weighted one."
        }
    ]
}

## evaluation_weighted.whizzml
;;Balancing with cost function example
;;
;;Builds a model for imbalanced datasets assigning a weight to each instance
;;to balance the classes and selectively give importance
;;to the instances that result in the model's better performance.
;;
;;The evaluation to know whether an instance will be weighted more or less
;;is done in a k-fold cross-validation way, but instead of aggregating the
;;results, each prediction is compared to the real value per instance and
;;an associated weight is defined depending on the result of this
;;comparison, like a cost function.
;;
;; Inputs:
;;   dataset-id: (string) Dataset ID that contains the imbalanced data
;;   k-folds: (integer) Number of parts that the evaluated intermediate models
;;                      will be divided into
;;   objective-id: (string) ID of the field to be predicted
;;   model-options: (map) Attributes that will be used in the models creation
;;                        calls
;;   batch-prediction-options: (map) Attributes that will be used in the
;;                                   batch prediction creation calls
;;


;; k-fold cross-validation code
;; This code will eventually be defined as a library.

(define MODEL_OPTIONS ["balance_objective"
                       "missing_splits"
                       "pruning"
                       "weight_field"
                       "objective_weights"
                       "node_threshold"
                       "seed"])
(define ENSEMBLE_OPTIONS (concat MODEL_OPTIONS
                                 ["sample_rate"
                                  "replacement"
                                  "randomize"
                                  "number_of_models"]))
(define BATCH_PREDICTION_OPTIONS ["sample_rate"
                                  "out_of_bag"
                                  "range"
                                  "replacement"
                                  "ordering"
                                  "seed"
                                  "missing_strategy"
                                  "combiner"])
;; batch datasets using k-folds
;;
;; creates k-fold batch datasets for a dataset
;; Inputs:
;;   dataset-id: (string) Dataset ID
;;   k-folds: (integer) Number of folds
;;   model-options: (map) Options to use in model/ensemble
;;   batch-prediction-options: (map) Options to use in batch predictions
;;                                   creation
;;
;; Output: (list) batch predicted datasets
;;
;; Raises:
;;  101: The dataset-id argument is not a string
;;  102: The dataset-id is not a valid dataset ID
;;  103: The k-folds argument is not an integer
;;  104: The k-folds argument is not >= 2
;;  105: The k-folds argument higher than the maximum
;;  106: The objective field ID is not in the selectable IDs list
;;  107: The k-folds argument is too high compared to the number of rows
;;

;; check-resource-id
;;
;; Validates that the argument is a resource ID and its type. Raises an error
;; if otherwise.
;;
;; Inputs:
;;   resource-id: (string) Resource ID
;;   type: (string) Type of resource
;;
;; Output: (string) Checked resource ID
(define (check-resource-id resource-id type)
  (when (not (string? resource-id))
    (raise {"message" (str "Resource ID string expected. Found "
                           resource-id " instead.")
            "code" 101}))
  (when (not (= (resource-type resource-id) type))
    (raise {"message" (str "Failed to find a correct " type " ID.")
            "code" 102}))
  resource-id)


;; check-integer
;;
;; Validates that the argument is an integer. Raises error otherwise.
;;
;; Inputs:
;;  value: (number) Integer to be checked
;;  minimum: (number) Minimum value (false if not set)
;;  maximum: (number) Maximum value (false if not set)
;;
;; Output: (number) Checked integer
(define (check-integer value minimum maximum)
  (when (not (integer? value))
    (raise {"message" (str "Integer value expected. Found " value " instead.")
            "code" 103}))
  (when (and minimum (< value minimum))
    (raise {"message" (str "Minimum accepted value is " minimum ". " value
                           " found.")
            "code" 104}))
  (when (and maximum (> value maximum))
    (raise {"message" (str "Maximum accepted value is " maximum ". " value
                           " found.")
            "code" 105}))
  value)


;; check-k-folds-rows
;;
;; Validates that the number of rows in a dataset is at least twice the
;; number of k-folds. Raises error otherwise.
;;
;; Inputs:
;;  k-folds: (number) Integer to be checked
;;  dataset: (map) Dataset info
;;
(define (check-k-folds-rows k-folds dataset)
  (when (> k-folds (/ (dataset "rows" 0) 2))
    (raise {"message" (str "The dataset has too few rows to be split in "
                           k-folds
                           " parts.")
            "code" 107})))

;; choosable-objective-ids
;;
;; List of IDs of the fields in the dataset that can be chosen as objective
;; field.
;;
;; Inputs:
;;  fields: (map) Fields structure
;; Output: (list) list of field IDs
(define (choosable-objective-ids fields)
  (let (field-val (lambda (fid k) (fields [fid k] false))
        objective-types ["categorical", "numeric"]
        pref? (lambda (k) (field-val k "preferred"))
        pred? (lambda (k) (member? (field-val k "optype") objective-types)))
    (filter (lambda (x) (and (pref? x) (pred? x))) (keys fields))))


;; check-dataset-objective-id
;;
;; Validates that the argument is a valid objective id in the reference
;; dataset.
;;
;; Inputs:
;;  objective-id: (string) ID of the objective field
;;  dataset: (map) Dataset resource information
;;
;; Output: (string) Checked objective field ID
(define (check-dataset-objective-id objective-id dataset)
  (let (fields (dataset "fields" {})
        objective-ids (choosable-objective-ids fields))
    (when (not (member? objective-id objective-ids))
      (raise {"message" (str "Failed to find the objective ID in the dataset"
                             " choosable fields.")
              "code" 106}))))

;; get-objective-name
;;
;; Returns the name of the field used as objective field
;;
;; Inputs:
;;  dataset: (map) Dataset resource info
;;  objective-id: (string) ID of the objective field
;;
;; Outputs: (string) Name of the objective field

(define (get-objective-name dataset objective-id)
  (let (fields (dataset "fields" {}))
    (fields [objective-id "name"] false)))

;; get-objective-id
;;
;; Returns the ID of the field used as objective field
;;
;; Inputs:
;;  dataset: (map) Dataset resource info
;;  objective-name: (string) Name of the objective field
;;
;; Outputs: (string) ID of the objective field

(define (get-objective-id dataset objective-name)
  (let (fields (dataset "fields" {})
        objective-field (find-field fields objective-name))
    (if (not objective-field)
      (raise {"message" (str "Failed to find the "
                             objective-name
                             " field"
                             " in this dataset.")
              "code" 106})
      (objective-field "id" false))))

;; create-k-folds
;;
;; creating k-fold splits from a dataset
;;
;; Inputs:
;;   dataset-id: (string) Dataset ID
;;   k-folds: (integer) Number of folds
;;
;; Output: (list) List of dataset IDs
;;
(define (create-k-folds dataset-id k-folds)
  (let (k-fold-fn (lambda (x)
                    (create-dataset {"origin_dataset" dataset-id
                                     "row_offset" x
                                     "row_step" k-folds
                                     "new_fields" [{"name" "k_fold"
                                                    "field" (str x)}]}))
        dataset-ids (map k-fold-fn (range 0 k-folds)))
    (wait* dataset-ids)))

;; pair-k-folds
;;
;; Builds a list of pairs of hold-out and complementary datasets for all
;; the k-fold dataset IDs.
;;
;; Inputs:
;;   dataset-ids: (list) List of the k-fold dataset IDs
;;
;; Output: (list) List of pairs [hold-out dataset, multidataset with the rest]
;;
(define (pair-k-folds dataset-ids)
  (map (lambda(x)
         [(nth dataset-ids x)
          (concat (take x dataset-ids)
          (drop (+ x 1) dataset-ids))])
       (range 0 (count dataset-ids))))


;; select-map-keys
;;
;; Filters the keys in a map, keeping only the ones that appear in the list.
;;
;; Inputs:
;;   map: (map) Key, value maps
;;   keys-list: (list) List of keys to be kept in the map
;; Output: (map) filtered map with only the keys in the keys-list
;;
(define (select-map-keys a-map keys-list)
  (reduce (lambda (x y) (let (value (a-map y false))
                          (cond value (assoc x y value) x)))
          {}
          keys-list))

;; create-k-models
;;
;; Creates the models for a set of k-fold datasets
;;
;; Inputs:
;;   type: (string) type of model (model or ensemble)
;;   multidatasets: (list) List of lists of datset IDs once a k-fold is
;;                         excluded
;;   objective-name: (string) name of the objective field
;;   model-options: (map) Options for the model or ensemble
;;
;; Output: (list) model IDs
;;
(define (create-k-models type multidatasets objective-name model-options)
  (let (models (map (lambda (x)
                      (create type
                              (merge {"datasets" x
                                      "objective_field" objective-name}
                                     model-options)))
                     multidatasets))
    (wait* models)))

;; end of k-fold cross-validation code

;; create-k-bp-datasets
;;
;; Creates the models/ensembles and batch predictions' datasets
;; for a set of k-fold datasets
;;
;; Inputs:
;;   dataset-ids: (list) List of the k-fold dataset IDs
;;   objective-name: (string) Objective field name
;;   dataset-name: (string) Name of the origin dataset
;;   model-options: (map) Options used to build the models/ensembles
;;   batch-prediction-options: (map) Options used to build batch predictions
;;
;; Output: (list) List of dataset IDs
;;
(define (create-k-bp-datasets dataset-ids
                              objective-name
                              dataset-name
                              model-options
                              batch-prediction-options)
  (let (number-of-models (model-options "number_of_models" 1)
        k-fold-pairs (pair-k-folds dataset-ids)
        options (if (> number-of-models 1)
                    (select-map-keys model-options ENSEMBLE_OPTIONS)
                    (select-map-keys model-options MODEL_OPTIONS))
        type (if (> number-of-models 1) "ensemble" "model")
        multidatasets (map last k-fold-pairs)
        batch-predictions-options (select-map-keys batch-prediction-options
                                                   BATCH_PREDICTION_OPTIONS)
        models (create-k-models type
                                multidatasets
                                objective-name
                                options)
        batch-predictions (iterate (es []
                                       id dataset-ids
                                       mid models
                                       idx (range 1 (+ 1 (count dataset-ids))))
                            (let (name (str idx
                                            "-fold batch-prediction "
                                            dataset-name)
                              opts (assoc batch-prediction-options
                                          "name" name
                                          "all_fields" true
                                          "output_dataset" true
                                          "prediction_name" "__prediction__"
                                          "confidence" true
                                          "confidence_name" "__confidence__"
                                          "tags" ["script_garbage"]))
                            (append es (create-batchprediction id
                                                               mid
                                                               opts))))
        batch-predictions (wait* batch-predictions))
    (wait* (for (bp-id batch-predictions)
             (let (bp (fetch bp-id))
               (bp "output_dataset_resource"))))))

;; confidence-eval-weight
;;
;; Adds a weight field by using the following formula:
;;  - when prediction is correct, the confidence is multiplied by the
;;    inverse frequency of the class (total number of instances in the
;;    dataset over the number of instances of the class)
;;  - when prediction is not correct, the inverse of the confidence is
;;    multiplied by the frequency of the class)
;;
;; Inputs:
;;   dataset-id: (string) ID of the training dataset
;;   objective-id: (string) Objective field ID
;;   ds-ids: (list) List of the dataset IDs generated by the batch predictions
;;
;; Output: (list) List of dataset IDs

(define (confidence-eval-weight dataset-id objective-id ds-ids)
  (let (dataset (fetch dataset-id)
        distr (dataset ["fields" objective-id "summary" "categories"])
        total (apply + (for (item distr) (item 1)))
        class-inst (for (item distr) (flatline " (list @{{item}})"))
        class-inst (flatline "(real (head (tail (head (filter "
                              "(= (f {{objective-id}}) (nth _ 0)) "
                             "(list @{class-inst}))))))")
        weight (flatline "(if (= (f {{objective-id}}) (f \"__prediction__\")) "
                         "(/ (* (f \"__confidence__\") {total}) "
                         "{class-inst}) "
                         "(* (/ 1 (* (f \"__confidence__\") {total})) "
                         "{class-inst}))"))
    (for (ds-id ds-ids)
         (create-dataset ds-id
                         {"new_fields" [{"field" weight
                                         "name" "weight"}]
                          "tags" ["script_garbage"]}))))
;;k-fold-bp-w-model
;;
;;Creates the weighted model or ensemble from the original dataset
;;by doing a k-fold cross-validation and generating batch predictions
;;for every part of the dataset. The batch prediction results are used
;;to generate a weight per instance that will be one of the models
;;arguments.
;;
;; Inputs:
;;   dataset-id: (string) ID of the training dataset
;;   k-folds: (integer) Number of parts to use in the
;;   objective-name: (string) Objective field ID
;;   model-options: (map) Options used to build the models/ensembles
;;   batch-prediction-options: (map) Options used to build batch predictions
;;
;; Output: (list) List of dataset IDs

(define (k-fold-bp-w-model dataset-id
                           k-folds
                           objective-name
                           model-options
                           batch-prediction-options)
  (check-resource-id dataset-id "dataset")
  (check-integer k-folds 2 false)
  (let (dataset (fetch dataset-id)
        dataset-name (dataset "name" false)
        objective-id (get-objective-id dataset objective-name))
    (check-dataset-objective-id objective-id dataset)
    (check-k-folds-rows k-folds dataset)
    (let (k-fold-datasets (create-k-folds dataset-id k-folds)
          ds-ids (create-k-bp-datasets k-fold-datasets
                                       objective-name
                                       dataset-name
                                       model-options
                                       batch-prediction-options)
          ds-ids (confidence-eval-weight dataset-id objective-id ds-ids)
          weighted-ds (create-dataset {"origin_datasets" ds-ids}))
      (if (model-options "number_of_models" 1) > 1)
        (create-ensemble {"dataset" weighted-ds
                          "weight_field" "weight"
                          "objective_field" objective-name
                          "excluded_fields" ["k_fold"
                                             "__prediction__"
                                             "__confidence__"]})
        (create-model {"dataset" weighted-ds
                       "weight_field" "weight"
                       "objective_field" objective-name
                       "excluded_fields" ["k_fold"
                                          "__prediction__"
                                          "__confidence__"]}))))

;;evaluate-weighted
;;
;;Main procedure that:
;; - splits the original data in training and test datasets
;; - creates a default model and evaluates it to use this as reference
;; - creates the weights to be associated to each instance in the training
;;   dataset and generates a model using this weight field
;; - evaluates this weighted model
;; - builds a map with the basic evaluation measures
;;
;; Inputs:
;;   dataset-id: (string) ID of the training dataset
;;   k-folds: (integer) Number of parts to divide the training dataset to test
;;   objective-name: (string) Objective field name
;;   model-options: (map) Options used to build the models/ensembles
;;   batch-prediction-options: (map) Options used to build batch predictions
;;
;; Output: (map) Basic evaluation metrics for the reference model and
;;               the weighted one

(define (evaluate-weighted dataset-id
                           k-folds
                           objective-name
                           model-options
                           batch-prediction-options)
  (let (seed (model-options "seed" "bigml")
        [ds-train ds-test] (create-dataset-split dataset-id 0.8 seed)
        model (create-model ds-train model-options)
        eval-id (create-and-wait-evaluation model
                                            ds-test)
        weighted-model (k-fold-bp-w-model ds-train
                                          k-folds
                                          objective-name
                                          model-options
                                          batch-prediction-options)
        weighted-eval-id (create-and-wait-evaluation weighted-model
                                                     ds-test)
        weighted-eval (fetch weighted-eval-id)
        eval (fetch eval-id))
    {"phi" (eval ["result" "model" "average_phi"])
     "accuracy" (eval ["result" "model" "accuracy"])
     "precision" (eval ["result" "model" "average_precision"])
     "recall" (eval ["result" "model" "average_recall"])
     "evaluation" eval-id
     "weighted phi" (weighted-eval ["result" "model" "average_phi"])
     "weighted accuracy" (weighted-eval ["result" "model" "accuracy"])
     "weighted precision" (weighted-eval ["result" "model" "average_precision"])
     "weighted recall" (weighted-eval ["result" "model" "average_recall"])
     "weighted evaluation" weighted-eval-id}))

;;output-eval
;;output variable for the script. Contains the result of the evaluate-weighted
;;procedure
(define output-eval (evaluate-weighted dataset-id
                                       k-folds
                                       objective-name
                                       model-options
                                       batch-prediction-options))
	{
	"name": "Comparing model to batch-prediction-weighted model",
	"description": "Comparing evaluation of the usual model with a model built using a weight field. The field is defined by using k-fold batch predictions to take into account which instances are predicted correctly and the confidence of this prediction. It also balances the imbalanced classes.",
	"inputs": [
	{
	"name": "dataset-id",
	"type": "dataset-id",
	"description": "Select the dataset"
	},
	{
	"name": "k-folds",
	"type": "number",
	"description": "number of folds for batch predictions",
	"default": 5
	},
	{
	"name": "objective-name",
	"type": "string",
	"description": "Name of the field to be predicted",
	"default": ""
	},
	{
	"name": "model-options",
	"type": "map",
	"description": "Settings used to create the model",
	"default": {}
	},
	{
	"name": "batch-prediction-options",
	"type": "map",
	"description": "Settings used to create the batch predictions",
	"default": {}
	}
	],
	"outputs": [
	{
	"name": "output-eval",
	"type": "map",
	"description": "Evaluation measures for the reference model (no batch predictions used) and the weighted one."
	}
	]
	}
	;;Balancing with cost function example
	;;
	;;Builds a model for imbalanced datasets assigning a weight to each instance
	;;to balance the classes and selectively give importance
	;;to the instances that result in the model's better performance.
	;;
	;;The evaluation to know whether an instance will be weighted more or less
	;;is done in a k-fold cross-validation way, but instead of aggregating the
	;;results, each prediction is compared to the real value per instance and
	;;an associated weight is defined depending on the result of this
	;;comparison, like a cost function.
	;;
	;; Inputs:
	;; dataset-id: (string) Dataset ID that contains the imbalanced data
	;; k-folds: (integer) Number of parts that the evaluated intermediate models
	;; will be divided into
	;; objective-id: (string) ID of the field to be predicted
	;; model-options: (map) Attributes that will be used in the models creation
	;; calls
	;; batch-prediction-options: (map) Attributes that will be used in the
	;; batch prediction creation calls
	;;


	;; k-fold cross-validation code
	;; This code will eventually be defined as a library.

	(define MODEL_OPTIONS ["balance_objective"
	"missing_splits"
	"pruning"
	"weight_field"
	"objective_weights"
	"node_threshold"
	"seed"])
	(define ENSEMBLE_OPTIONS (concat MODEL_OPTIONS
	["sample_rate"
	"replacement"
	"randomize"
	"number_of_models"]))
	(define BATCH_PREDICTION_OPTIONS ["sample_rate"
	"out_of_bag"
	"range"
	"replacement"
	"ordering"
	"seed"
	"missing_strategy"
	"combiner"])
	;; batch datasets using k-folds
	;;
	;; creates k-fold batch datasets for a dataset
	;; Inputs:
	;; dataset-id: (string) Dataset ID
	;; k-folds: (integer) Number of folds
	;; model-options: (map) Options to use in model/ensemble
	;; batch-prediction-options: (map) Options to use in batch predictions
	;; creation
	;;
	;; Output: (list) batch predicted datasets
	;;
	;; Raises:
	;; 101: The dataset-id argument is not a string
	;; 102: The dataset-id is not a valid dataset ID
	;; 103: The k-folds argument is not an integer
	;; 104: The k-folds argument is not >= 2
	;; 105: The k-folds argument higher than the maximum
	;; 106: The objective field ID is not in the selectable IDs list
	;; 107: The k-folds argument is too high compared to the number of rows
	;;

	;; check-resource-id
	;;
	;; Validates that the argument is a resource ID and its type. Raises an error
	;; if otherwise.
	;;
	;; Inputs:
	;; resource-id: (string) Resource ID
	;; type: (string) Type of resource
	;;
	;; Output: (string) Checked resource ID
	(define (check-resource-id resource-id type)
	(when (not (string? resource-id))
	(raise {"message" (str "Resource ID string expected. Found "
	resource-id " instead.")
	"code" 101}))
	(when (not (= (resource-type resource-id) type))
	(raise {"message" (str "Failed to find a correct " type " ID.")
	"code" 102}))
	resource-id)


	;; check-integer
	;;
	;; Validates that the argument is an integer. Raises error otherwise.
	;;
	;; Inputs:
	;; value: (number) Integer to be checked
	;; minimum: (number) Minimum value (false if not set)
	;; maximum: (number) Maximum value (false if not set)
	;;
	;; Output: (number) Checked integer
	(define (check-integer value minimum maximum)
	(when (not (integer? value))
	(raise {"message" (str "Integer value expected. Found " value " instead.")
	"code" 103}))
	(when (and minimum (< value minimum))
	(raise {"message" (str "Minimum accepted value is " minimum ". " value
	" found.")
	"code" 104}))
	(when (and maximum (> value maximum))
	(raise {"message" (str "Maximum accepted value is " maximum ". " value
	" found.")
	"code" 105}))
	value)


	;; check-k-folds-rows
	;;
	;; Validates that the number of rows in a dataset is at least twice the
	;; number of k-folds. Raises error otherwise.
	;;
	;; Inputs:
	;; k-folds: (number) Integer to be checked
	;; dataset: (map) Dataset info
	;;
	(define (check-k-folds-rows k-folds dataset)
	(when (> k-folds (/ (dataset "rows" 0) 2))
	(raise {"message" (str "The dataset has too few rows to be split in "
	k-folds
	" parts.")
	"code" 107})))

	;; choosable-objective-ids
	;;
	;; List of IDs of the fields in the dataset that can be chosen as objective
	;; field.
	;;
	;; Inputs:
	;; fields: (map) Fields structure
	;; Output: (list) list of field IDs
	(define (choosable-objective-ids fields)
	(let (field-val (lambda (fid k) (fields [fid k] false))
	objective-types ["categorical", "numeric"]
	pref? (lambda (k) (field-val k "preferred"))
	pred? (lambda (k) (member? (field-val k "optype") objective-types)))
	(filter (lambda (x) (and (pref? x) (pred? x))) (keys fields))))


	;; check-dataset-objective-id
	;;
	;; Validates that the argument is a valid objective id in the reference
	;; dataset.
	;;
	;; Inputs:
	;; objective-id: (string) ID of the objective field
	;; dataset: (map) Dataset resource information
	;;
	;; Output: (string) Checked objective field ID
	(define (check-dataset-objective-id objective-id dataset)
	(let (fields (dataset "fields" {})
	objective-ids (choosable-objective-ids fields))
	(when (not (member? objective-id objective-ids))
	(raise {"message" (str "Failed to find the objective ID in the dataset"
	" choosable fields.")
	"code" 106}))))

	;; get-objective-name
	;;
	;; Returns the name of the field used as objective field
	;;
	;; Inputs:
	;; dataset: (map) Dataset resource info
	;; objective-id: (string) ID of the objective field
	;;
	;; Outputs: (string) Name of the objective field

	(define (get-objective-name dataset objective-id)
	(let (fields (dataset "fields" {}))
	(fields [objective-id "name"] false)))

	;; get-objective-id
	;;
	;; Returns the ID of the field used as objective field
	;;
	;; Inputs:
	;; dataset: (map) Dataset resource info
	;; objective-name: (string) Name of the objective field
	;;
	;; Outputs: (string) ID of the objective field

	(define (get-objective-id dataset objective-name)
	(let (fields (dataset "fields" {})
	objective-field (find-field fields objective-name))
	(if (not objective-field)
	(raise {"message" (str "Failed to find the "
	objective-name
	" field"
	" in this dataset.")
	"code" 106})
	(objective-field "id" false))))

	;; create-k-folds
	;;
	;; creating k-fold splits from a dataset
	;;
	;; Inputs:
	;; dataset-id: (string) Dataset ID
	;; k-folds: (integer) Number of folds
	;;
	;; Output: (list) List of dataset IDs
	;;
	(define (create-k-folds dataset-id k-folds)
	(let (k-fold-fn (lambda (x)
	(create-dataset {"origin_dataset" dataset-id
	"row_offset" x
	"row_step" k-folds
	"new_fields" [{"name" "k_fold"
	"field" (str x)}]}))
	dataset-ids (map k-fold-fn (range 0 k-folds)))
	(wait* dataset-ids)))

	;; pair-k-folds
	;;
	;; Builds a list of pairs of hold-out and complementary datasets for all
	;; the k-fold dataset IDs.
	;;
	;; Inputs:
	;; dataset-ids: (list) List of the k-fold dataset IDs
	;;
	;; Output: (list) List of pairs [hold-out dataset, multidataset with the rest]
	;;
	(define (pair-k-folds dataset-ids)
	(map (lambda(x)
	[(nth dataset-ids x)
	(concat (take x dataset-ids)
	(drop (+ x 1) dataset-ids))])
	(range 0 (count dataset-ids))))


	;; select-map-keys
	;;
	;; Filters the keys in a map, keeping only the ones that appear in the list.
	;;
	;; Inputs:
	;; map: (map) Key, value maps
	;; keys-list: (list) List of keys to be kept in the map
	;; Output: (map) filtered map with only the keys in the keys-list
	;;
	(define (select-map-keys a-map keys-list)
	(reduce (lambda (x y) (let (value (a-map y false))
	(cond value (assoc x y value) x)))
	{}
	keys-list))

	;; create-k-models
	;;
	;; Creates the models for a set of k-fold datasets
	;;
	;; Inputs:
	;; type: (string) type of model (model or ensemble)
	;; multidatasets: (list) List of lists of datset IDs once a k-fold is
	;; excluded
	;; objective-name: (string) name of the objective field
	;; model-options: (map) Options for the model or ensemble
	;;
	;; Output: (list) model IDs
	;;
	(define (create-k-models type multidatasets objective-name model-options)
	(let (models (map (lambda (x)
	(create type
	(merge {"datasets" x
	"objective_field" objective-name}
	model-options)))
	multidatasets))
	(wait* models)))

	;; end of k-fold cross-validation code

	;; create-k-bp-datasets
	;;
	;; Creates the models/ensembles and batch predictions' datasets
	;; for a set of k-fold datasets
	;;
	;; Inputs:
	;; dataset-ids: (list) List of the k-fold dataset IDs
	;; objective-name: (string) Objective field name
	;; dataset-name: (string) Name of the origin dataset
	;; model-options: (map) Options used to build the models/ensembles
	;; batch-prediction-options: (map) Options used to build batch predictions
	;;
	;; Output: (list) List of dataset IDs
	;;
	(define (create-k-bp-datasets dataset-ids
	objective-name
	dataset-name
	model-options
	batch-prediction-options)
	(let (number-of-models (model-options "number_of_models" 1)
	k-fold-pairs (pair-k-folds dataset-ids)
	options (if (> number-of-models 1)
	(select-map-keys model-options ENSEMBLE_OPTIONS)
	(select-map-keys model-options MODEL_OPTIONS))
	type (if (> number-of-models 1) "ensemble" "model")
	multidatasets (map last k-fold-pairs)
	batch-predictions-options (select-map-keys batch-prediction-options
	BATCH_PREDICTION_OPTIONS)
	models (create-k-models type
	multidatasets
	objective-name
	options)
	batch-predictions (iterate (es []
	id dataset-ids
	mid models
	idx (range 1 (+ 1 (count dataset-ids))))
	(let (name (str idx
	"-fold batch-prediction "
	dataset-name)
	opts (assoc batch-prediction-options
	"name" name
	"all_fields" true
	"output_dataset" true
	"prediction_name" "__prediction__"
	"confidence" true
	"confidence_name" "__confidence__"
	"tags" ["script_garbage"]))
	(append es (create-batchprediction id
	mid
	opts))))
	batch-predictions (wait* batch-predictions))
	(wait* (for (bp-id batch-predictions)
	(let (bp (fetch bp-id))
	(bp "output_dataset_resource"))))))

	;; confidence-eval-weight
	;;
	;; Adds a weight field by using the following formula:
	;; - when prediction is correct, the confidence is multiplied by the
	;; inverse frequency of the class (total number of instances in the
	;; dataset over the number of instances of the class)
	;; - when prediction is not correct, the inverse of the confidence is
	;; multiplied by the frequency of the class)
	;;
	;; Inputs:
	;; dataset-id: (string) ID of the training dataset
	;; objective-id: (string) Objective field ID
	;; ds-ids: (list) List of the dataset IDs generated by the batch predictions
	;;
	;; Output: (list) List of dataset IDs

	(define (confidence-eval-weight dataset-id objective-id ds-ids)
	(let (dataset (fetch dataset-id)
	distr (dataset ["fields" objective-id "summary" "categories"])
	total (apply + (for (item distr) (item 1)))
	class-inst (for (item distr) (flatline " (list @{{item}})"))
	class-inst (flatline "(real (head (tail (head (filter "
	"(= (f {{objective-id}}) (nth _ 0)) "
	"(list @{class-inst}))))))")
	weight (flatline "(if (= (f {{objective-id}}) (f \"__prediction__\")) "
	"(/ (* (f \"__confidence__\") {total}) "
	"{class-inst}) "
	"(* (/ 1 (* (f \"__confidence__\") {total})) "
	"{class-inst}))"))
	(for (ds-id ds-ids)
	(create-dataset ds-id
	{"new_fields" [{"field" weight
	"name" "weight"}]
	"tags" ["script_garbage"]}))))
	;;k-fold-bp-w-model
	;;
	;;Creates the weighted model or ensemble from the original dataset
	;;by doing a k-fold cross-validation and generating batch predictions
	;;for every part of the dataset. The batch prediction results are used
	;;to generate a weight per instance that will be one of the models
	;;arguments.
	;;
	;; Inputs:
	;; dataset-id: (string) ID of the training dataset
	;; k-folds: (integer) Number of parts to use in the
	;; objective-name: (string) Objective field ID
	;; model-options: (map) Options used to build the models/ensembles
	;; batch-prediction-options: (map) Options used to build batch predictions
	;;
	;; Output: (list) List of dataset IDs

	(define (k-fold-bp-w-model dataset-id
	k-folds
	objective-name
	model-options
	batch-prediction-options)
	(check-resource-id dataset-id "dataset")
	(check-integer k-folds 2 false)
	(let (dataset (fetch dataset-id)
	dataset-name (dataset "name" false)
	objective-id (get-objective-id dataset objective-name))
	(check-dataset-objective-id objective-id dataset)
	(check-k-folds-rows k-folds dataset)
	(let (k-fold-datasets (create-k-folds dataset-id k-folds)
	ds-ids (create-k-bp-datasets k-fold-datasets
	objective-name
	dataset-name
	model-options
	batch-prediction-options)
	ds-ids (confidence-eval-weight dataset-id objective-id ds-ids)
	weighted-ds (create-dataset {"origin_datasets" ds-ids}))
	(if (model-options "number_of_models" 1) > 1)
	(create-ensemble {"dataset" weighted-ds
	"weight_field" "weight"
	"objective_field" objective-name
	"excluded_fields" ["k_fold"
	"__prediction__"
	"__confidence__"]})
	(create-model {"dataset" weighted-ds
	"weight_field" "weight"
	"objective_field" objective-name
	"excluded_fields" ["k_fold"
	"__prediction__"
	"__confidence__"]}))))

	;;evaluate-weighted
	;;
	;;Main procedure that:
	;; - splits the original data in training and test datasets
	;; - creates a default model and evaluates it to use this as reference
	;; - creates the weights to be associated to each instance in the training
	;; dataset and generates a model using this weight field
	;; - evaluates this weighted model
	;; - builds a map with the basic evaluation measures
	;;
	;; Inputs:
	;; dataset-id: (string) ID of the training dataset
	;; k-folds: (integer) Number of parts to divide the training dataset to test
	;; objective-name: (string) Objective field name
	;; model-options: (map) Options used to build the models/ensembles
	;; batch-prediction-options: (map) Options used to build batch predictions
	;;
	;; Output: (map) Basic evaluation metrics for the reference model and
	;; the weighted one

	(define (evaluate-weighted dataset-id
	k-folds
	objective-name
	model-options
	batch-prediction-options)
	(let (seed (model-options "seed" "bigml")
	[ds-train ds-test] (create-dataset-split dataset-id 0.8 seed)
	model (create-model ds-train model-options)
	eval-id (create-and-wait-evaluation model
	ds-test)
	weighted-model (k-fold-bp-w-model ds-train
	k-folds
	objective-name
	model-options
	batch-prediction-options)
	weighted-eval-id (create-and-wait-evaluation weighted-model
	ds-test)
	weighted-eval (fetch weighted-eval-id)
	eval (fetch eval-id))
	{"phi" (eval ["result" "model" "average_phi"])
	"accuracy" (eval ["result" "model" "accuracy"])
	"precision" (eval ["result" "model" "average_precision"])
	"recall" (eval ["result" "model" "average_recall"])
	"evaluation" eval-id
	"weighted phi" (weighted-eval ["result" "model" "average_phi"])
	"weighted accuracy" (weighted-eval ["result" "model" "accuracy"])
	"weighted precision" (weighted-eval ["result" "model" "average_precision"])
	"weighted recall" (weighted-eval ["result" "model" "average_recall"])
	"weighted evaluation" weighted-eval-id}))

	;;output-eval
	;;output variable for the script. Contains the result of the evaluate-weighted
	;;procedure
	(define output-eval (evaluate-weighted dataset-id
	k-folds
	objective-name
	model-options
	batch-prediction-options))