jaor/metadata.json

## readme.md

      
    Raw
  

              readme.md
            
          
    Mark predictions made using missing values

This script takes a supervised model and an input dataset and performs
a batch prediction with them.  Then it generates a new dataset,
returned as the ouput corrected-predictions that contains an
additional prediction column (named "corrected-prediction") which
coincides with the original prediction when it was made without any
missing inputs, but uses the fallback category given by the script
parameter denied-class ("denied" by default) otherwise.  A second
dataset, filtered-predictions, contains only those rows with
non-denied predictions.
One can also specify the list of fields that are checked, using
fields.  Other properties such as output column names are easily
parameterizable.  Also, when the model is a regression, the additional
column will output string values: one could also introduce an
accepted-class for those cases easily.

  
## metadata.json
{
  "name": "mark-missing-in-predictions",
  "kind": "script",
  "description": "Perform a batch prediction and produce an additional dataset with missings marked",
  "source_code": "script.whizzml",
  "imports":[
    ],
  "inputs":[
    {
      "name": "model",
      "type": "supervised-model-id",
      "description": "The supervised model to use for predictions"
    },
    {
      "name": "dataset",
      "type": "dataset-id",
      "description": "The input dataset"
    },
    {
      "name": "denied-class",
      "type": "string",
      "description": "Class used to denote denied predictions",
      "default": "denied"
    },
    {
      "name": "fields",
      "description": "List of fields to check for missings, or empty for all",
      "type": "list",
      "default": []
    }],
  "outputs":[
    {
      "name": "batch-prediction",
      "type": "batchprediction-id",
      "description": "The full batch prediction"
    },
    {
      "name": "corrected-predictions",
      "description": "The dataset with all predictions, some marked as denied",
      "type": "dataset-id"
    },
    {
      "name": "filtered-predictions",
      "description": "The dataset only with predictions not marked as denied",
      "type": "dataset-id"
    }]
}

## script.whizzml

(define batch-prediction
  (wait (create-batchprediction model dataset {"output_dataset" true
                                               "all_fields" true})))

(define corrected-predictions
  (let (ds (wait ((fetch batch-prediction) "output_dataset_resource"))
        fds (if (empty? fields) (keys (resource-fields ds)) fields)
        obj (resource-property model "objective_field_name")
        flt (flatline "(if (> (count (filter (missing? _) (list @{{fds}}))) 0)"
                      "  {{denied-class}} (str (f {{obj}})))"))
    (wait (create-dataset ds {"new_fields" [{"field" flt
                                             "name" "corrected-prediction"}]}))))

(define filtered-predictions
  (let (flt (flatline "(!= {{denied-class}} (f \"corrected-prediction\"))"))
    (wait (create-dataset corrected-predictions
                          {"lisp_filter" flt
                           "excluded_fields" ["corrected-prediction"]}))))
	{
	"name": "mark-missing-in-predictions",
	"kind": "script",
	"description": "Perform a batch prediction and produce an additional dataset with missings marked",
	"source_code": "script.whizzml",
	"imports":[
	],
	"inputs":[
	{
	"name": "model",
	"type": "supervised-model-id",
	"description": "The supervised model to use for predictions"
	},
	{
	"name": "dataset",
	"type": "dataset-id",
	"description": "The input dataset"
	},
	{
	"name": "denied-class",
	"type": "string",
	"description": "Class used to denote denied predictions",
	"default": "denied"
	},
	{
	"name": "fields",
	"description": "List of fields to check for missings, or empty for all",
	"type": "list",
	"default": []
	}],
	"outputs":[
	{
	"name": "batch-prediction",
	"type": "batchprediction-id",
	"description": "The full batch prediction"
	},
	{
	"name": "corrected-predictions",
	"description": "The dataset with all predictions, some marked as denied",
	"type": "dataset-id"
	},
	{
	"name": "filtered-predictions",
	"description": "The dataset only with predictions not marked as denied",
	"type": "dataset-id"
	}]
	}

	(define batch-prediction
	(wait (create-batchprediction model dataset {"output_dataset" true
	"all_fields" true})))

	(define corrected-predictions
	(let (ds (wait ((fetch batch-prediction) "output_dataset_resource"))
	fds (if (empty? fields) (keys (resource-fields ds)) fields)
	obj (resource-property model "objective_field_name")
	flt (flatline "(if (> (count (filter (missing? _) (list @{{fds}}))) 0)"
	" {{denied-class}} (str (f {{obj}})))"))
	(wait (create-dataset ds {"new_fields" [{"field" flt
	"name" "corrected-prediction"}]}))))

	(define filtered-predictions
	(let (flt (flatline "(!= {{denied-class}} (f \"corrected-prediction\"))"))
	(wait (create-dataset corrected-predictions
	{"lisp_filter" flt
	"excluded_fields" ["corrected-prediction"]}))))