aficionado/JSON PML schemas.md

## callbacks-schema.json
{
  "description":"A list of callback specifications.  They're invoked
                 upon resource state changes.",
  "type":"array",
  "items":{
    "description":"Each callback specification contains at an HTTP method,
                   a URL, a list of triggering states and a synchronicity flag.",
    "type":"object",
    "properties":{
      "method":{
        "description":"The HTTP method to use.",
        "optional":true,
        "default":"GET",
        "enum":["GET", "POST", "PUT", "DELETE", "get", "post", "put", "delete"],
      },
      "url":{
        "description":"The URL to invoke with the method above.",
        "type":"string"
      },
      "states":{
        "description":"A list of states for which the callback should be
                       called.  If not given, the callback will be called
                       for all state changes.",
        "optional":true,
        "type":"array",
        "items":["string"]
      },
      "synchronous":{
        "description":"A flag specifying wether the callback is synchronous and
                       we need to wait for a remote response before proceeding.",
        "optional":true,
        "default":false,
        "type":"boolean"
      }
    }
  }
}

## field-coll-schema.json
{
  "description":"A collection of fields",
  "type":"object",
  "additionalProperties":{"$ref":"field-schema.json"}
}

## field-schema.json
{
  "description":"A field from the dataset",
  "type":[
    {
      "type":"object",
      "extends":{
        "$ref":"generic-field-schema.json"
      },
      "description":"A text field from the dataset",
      "properties":{
        "optype":{
          "description":"Operational type of this field",
          "optional":true,
          "enum":["text"]
        },
        "datatype":{
          "description":"The storage type of the field",
          "optional":true,
          "enum":["string","boolean"]
        },
        "summary":{
          "type":"object",
          "optional":true,
          "properties":{
            "missing_count":{
              "description":"# of instances missing this field",
              "type":"integer"
            }
          }
        }
      }
    },
    {
      "type":"object",
      "extends":{
        "$ref":"generic-field-schema.json"
      },
      "description":"A categorical field from the dataset",
      "properties":{
        "optype":{
          "description":"Operational type of this field",
          "optional":true,
          "enum":["categorical"]
        },
        "datatype":{
          "description":"The storage type of the field",
          "optional":true,
          "enum":["string"]
        },
        "summary":{
          "type":"object",
          "optional":true,
          "properties":{
            "missing_count":{
              "description":"# of instances missing this field",
              "type":["integer","null"]
            },
            "categories":{
              "description":"The possible categories for categorical fields",
              "type":"array",
              "items":{
                "type":"array",
                "items":[
                  {"type":["string", "number"]},
                  {"type":["integer","null"]}
                ]
              }
            }
          }
        }
      }
    },
    {
      "type":"object",
      "extends":{
        "$ref":"generic-field-schema.json"
      },
      "description":"A numeric field from the dataset",
      "properties":{
        "optype":{
          "description":"Operational type of this field",
          "optional":true,
          "enum":["numeric"]
        },
        "datatype":{
          "description":"The storage type of the field",
          "optional":true,
          "enum":[
            "integer","int8","int16","int32","int64",
            "float","double","day","month","year","hour",
            "minute","second","millisecond","day-of-week","day-of-month"
          ]
        },
        "summary":{
          "optional":true,
          "properties":{
            "missing_count":{
              "description":"# of instances missing this field",
              "type":"integer"
            },
            "maximum":{
              "description":"Maximum value for numeric fields",
              "optional":true,
              "type":"number"
            },
            "minimum":{
              "description":"Minimum value for numeric fields",
              "optional":true,
              "type":"number"
            },
            "median":{
              "description":"The approximate median for numeric fields",
              "optional":true,
              "type":"number"
            },
            "sum":{
              "description":"Sum of values (for mean calculation)",
              "type":"number"
            },
            "sum_squares":{
              "description":"Sum of squared values (for variance calculation)",
              "type":"number"
            },
            "population":{
              "description":"# of instances containing data for this field",
              "type":"integer"
            },
            "mean":{
              "description":"The sample mean for numeric fields",
              "optional":true,
              "type":"number"
            },
            "variance":{
              "description":"The sample variance for numeric fields",
              "optional":true,
              "type":"number"
            },
            "standard_deviation":{
              "description":"The sample standard deviation for numeric fields",
              "optional":true,
              "type":"number"
            },
            "splits":{
              "description":"DEPRECATED - Histogram split points for this field",
              "optional":true,
              "type":"array",
              "items":{
                "type":"number"
              }
            },
            "counts":{
              "description":"Captures the distribution for the field.  Contains
                             tuples of the unique values and their occurrence counts.
                             Used when there are 32 or less unique numeric values",
              "optional":true,
              "type":"array",
              "items":{
                "type":"array",
                "items":[
                  {"type":"number"},
                  {"type":"integer"}
                ]
              }
            },
            "bins":{
              "description":"Captures the distribution for the field.
                             Each tuple represents a bin from an approximate
                             histogram.  Each bin contains the bin mean and a
                             membership count.  Used when there are more than 32
                             unique numeric values.",
              "optional":true,
              "type":"array",
              "items":{
                "type":"array",
                "items":[
                  {"type":"number"},
                  {"type":"integer"}
                ]
              }
            }
          }
        }
      }
    },
    {
      "type":"object",
      "extends":{
        "$ref":"generic-field-schema.json"
      },
      "description":"A datetime field from the dataset",
      "properties":{
        "optype":{
          "description":"Operational type of this field",
          "optional":true,
          "enum":["datetime"]
        },
        "datatype":{
          "description":"The storage type of the field",
          "optional":true,
          "enum":["string"]
        },
        "time_formats":{
          "description":"Formats of times in this field from clj-time",
          "optional":true,
          "type":"array",
          "items":{
            "type":"string"
          }
        },
        "summary":{
          "type":"object",
          "optional":true,
          "properties":{
            "missing_count":{
              "description":"# of instances missing this field",
              "type":"integer"
            }
          }
        }
      }
    }
  ]
}

## generic-field-schema.json
{
  "description":"A field from the dataset",
  "type":"object",
  "properties":{
    "name":{
      "description":"Name for the field",
      "optional":true,
      "type":"string"
    },
    "description":{
      "description":"Free text description of the field",
      "optional":true,
      "type":"string"
    },
    "label":{
      "description":"A label for the field (free text for use by clients)",
      "optional":true,
      "type":"string"
    },
    "column_number":{
      "description":"Column from the data source",
      "optional":true,
      "type":"integer",
      "minimum":0
    },
    "parent_ids":{
      "description":"IDs of the parents if this is a generated field",
      "optional":true,
      "type":"array",
      "items":{"type":"string"}
    },
    "child_ids":{
      "description":"IDs of the children if this generates other fields",
      "optional":true,
      "type":"array",
      "items":{"type":"string"}
    },
    "optype":{
      "description":"Type describing how the field will be used in the model",
      "type":"string",
      "optional":true,
    },
    "auto_generated":{
      "description":"True if the field was generated from another field",
      "type":"boolean",
      "default":false,
      "optional":true
    },
    "datatype":{
      "description":"The storage type of the field",
      "type":"string",
      "optional":true
    },
    "preferred":{
      "description":"Whether the field is used by default for model creation",
      "optional":true,
      "default":true,
      "type":"boolean"
    },
    "missing_tokens":{
      "description":"Overrides global missing tokens",
      "optional":true,
      "type":"array",
      "items":{"type":"string"}
    },
    "locale":{
      "description":"Overrides global locale",
      "optional":true,
      "type":"string"
    }
  }
}

## JSON PML schemas.md

      
    Raw
  

              JSON PML schemas.md
            
          
    JSON PML: schemas for models

model-schema.json A generic ML model, containing fields shared by most models despite of their concrete type.  It uses:

sample-schema.json The schema for dataset sampling specifications
field-collection-schema.json Auxiliary schema describing a collection of field (or "properties") descriptors
generic-field-schema.json Properties shared by all fields, regardless of their type.
field-schema.json The union schema of all field descriptor types, with their specific properties.


tree-model-schema.json A specialization of the model schema to decision tree models.  It uses:

node-schema The schema for the nodes in a decision tree


## model-schema.json
{
  "description":"Metadata specifying a generic model",
  "type":"object",
  "properties":{
    "model":{
      "type":"object",
      "properties":{
        "kind":{
          "description":"Identifier of this model's kind (e.g., stree)",
          "optional":true,
          "type":"string"
        },
        "type":{
          "description":"Identifier of this model's type (e.g., regression)",
          "optional":true,
          "enum":["classification", "regression"]
        },
        "dataset_id":{
          "description":"Identifier of this model's dataset",
          "type":"string"
        },
        "row_range":{
          "description":"Rows used to build this model.",
          "type":"object",
          "optional":true,
          "properties":{
            "start":{
              "type":"number",
              "minimum":0
            },
            "size":{
              "type":"number",
              "minimum":0
            }
          }
        },
        "ordering":{
            "enum":["linear", "random", "deterministic"],
            "optional":"true",
            "default":"deterministic"
        },
        "sample":{
          "optional":true,
          "$ref":"sample-schema.json"
        },
        "locale":{
          "description":"Default locale for field values",
          "optional":true,
          "type":"string"
        },
        "callbacks":{
          "description":"A list of callbacks.",
          "optional":true,
          "$ref":"callbacks-schema.json"
        },
        "missing_tokens":{
          "description":"Default tokens that represent a missing value",
          "type":"array",
          "optional":true,
          "default":[],
          "items":{
            "type":"string"
          }
        },
        "fields":{
          "description":"Possibly partial list of descriptors for fields
                         used in the model",
          "optional":true,
          "$ref":"field-coll-schema.json"
        },
        "model_fields":{
          "description":"Map of descriptors for fields actually appearing
                         in the model, without summaries",
          "optional":true,
          "$ref":"field-coll-schema.json"
        },
        "input_fields":{
          "description":"List of input field identifiers (features)",
          "optional":true,
          "type":"array",
          "items":{"type":"string"}
        },
        "excluded_input_fields":{
          "description":"List of field identifiers to exclude from input",
          "optional":true,
          "type":"array",
          "items":{"type":"string"}
        },
        "objective_fields":{
          "description":"Collection of objective field identifiers (targets)",
          "optional":true,
          "type":"array",
          "items":{"type":"string"}
        },
        "objective_field":{
          "description":"Objective field (target) identifier.  Gets added to
                         objective_fields.  Useful for models with a single
                         objective field.",
          "optional":true,
          "type":"string"
        }
      }
    }
  }
}

## node-schema.json
{
  "type":"object",
  "properties":{
    "count":{
      "description":"Number of training instances at this node",
      "type":"number"
    },
    "predicate":{
      "type":[
        "boolean",
        {
          "type":"object",
          "properties":{
            "field":{
              "description":"Field used for this decision"
              "type":"string"
            },
            "operator":{
              "description":"Type of test used for the field"
              "type":"string"
            },
            "value":{
              "description":"Field used for this decision"
              "type":["number","string"]
            }
          }
        }
      ]
    },
    "output":{
      "description":"Prediction given at this node",
      "type":["number","string"]
    },
    "confidence":{
      "optional":true,
      "description":"Probability of correctness for classification and an
                     estimate of the error for regression.",
      "type":["number"],
      "minimum":0
    },
    "objective_summary":{
      "type":"object",
      "optional":true,
      "properties":{
        "categories":{
          "description":"Captures the distribution for the objective field.
                         Each tuple contains a category and the category
                         occurrence count.  Used when the objective is
                         categorical.

                         For partial models, counts are an estimation and,
                         as such, they won't be in general integers.  But
                         finished models the pairs will always consist of
                         a string and an integer.",
          "optional":true,
          "type":"array",
          "items":{
            "type":"array",
            "items":[
              {"type":["string", "number"]},
              {"type":"number"}]
          }
        },
        "counts":{
          "description":"Captures the distribution for the objective field.
                         Contains tuples of the unique values and their
                         occurrence counts.  Used when there are 32 or
                         less unique numeric values.

                         During model construction, these counts can be
                         fractional for partial models.  When the model is
                         finished, however, all counts will be integers.",
          "optional":true,
          "type":"array",
          "items":{
            "type":"array",
            "items":["number", "number"],
          }
        },
        "bins":{
          "description":"Captures the distribution for the objective field.
                         Each tuple represents a bin from an approximate
                         histogram.  Each bin contains the bin mean and a
                         membership count.  Used when there are more than
                         32 unique numeric values

                         During model construction, the counts can be
                         fractional for partial models.  When the model is
                         finished, however, all counts will be integers.",
          "optional":true,
          "type":"array",
          "items":{
            "type":"array",
            "items":["number", "number"]
          }
        },
        "maximum":{
          "description":"Maximum value for numeric fields, used when 'bins'
                         are present.",
          "optional":true,
          "type":"number"
        },
        "minimum":{
          "description":"Minimum value for numeric fields, used when 'bins'
                         are present.",
          "optional":true,
          "type":"number"
        },
      }
    },
    "distribution":{
      "description":"DEPRECATED - Distribution of the target at this node",
      "optional":true,
      "type":"array",
      "items":{
        "type":"array"
        "items":[
          {"type":["string", "number"]},
          {"type":"number"}
        ]
      }
    },
    "children":{
      "description":"Children of this node",
      "optional":true,
      "type":"array",
      "items":{"$ref":"node-schema.json"},
      "minItems":1
    }
  }
}

## sample-schema.json
{
  "description":"Sampling specification. If not given,
                 no sampling is performed.",
  "type":"object",
  "properties":{
    "rate":{
      "description":"The rate: fraction of rows we pick.",
      "type":"number",
      "minimum":0
    },
    "replace":{
      "description":"Whether we sample with replacement or not.",
      "type":"boolean",
      "optional":true,
      "default":false
    },
    "out_of_bag":{
      "description":"Is the sampling out of bag?",
      "type":"boolean",
      "optional":true,
      "default":false
    },
    "seed":{
      "description":"A string to feed the random number generator
                     used for sampling.  The same seed produces
                     always the same sample (if all other parameters
                     stay the same).  If not specified, we choose a
                     seed at random.",
      "type":"string",
      "optional":true
    }
  }
}

## tree-model-schema.json
{
  "description":"A model consisting of a decision tree",
  "type":"object",
  "extends":{
    "$ref":"model-schema.json"
  },
  "properties":{
    "model":{
      "type":"object",
      "properties":{
        "kind":{
          "description":"The kind of tree model.",
          "enum":["stree"]
        },
        "missing_strategy":{
          "description":"Action to take on missing data.",
          "optional":true,
          "enum":["last_prediction"]
        },
        "split_criterion":{
          "description":"Method of choosing best attribute and split point for
                         a given node.",
          "optional":true,
          "enum":["information_gain", "information_gain_ratio",
                  "information_gain_mix", "squared_error",
                  "squared_error_ratio"]
        },
        "stat_pruning":{
          "description":"Eliminates low confidence leaf nodes from tree using
                         statistical tests.",
          "optional":true,
          "default":true,
          "type":"boolean"
        },
        "support_threshold":{
          "description":"For a split to be valid, each child's support
                         (instances / total instances) must be greater than this
                         threshold.",
          "optional":true,
          "default":0.001,
          "type":"number",
          "minimum":0,
          "maximum":1
        },
        "depth_threshold":{
          "description":"The depth, or generation, limit for a tree.",
          "optional":true,
          "default":20,
          "type":"integer",
          "minimum":1
        },
        "prune_holdout":{
          "description":"DEPRECATED - Pruning with a holdout set.",
          "optional":true,
          "type":"number",
          "minimum":0,
          "maximum":1
        },
        "freeze_threshold":{
          "description":"PRIVATE - Once a field histogram exceeds this many
                         inserts, its bin locations are 'frozen' into place
                         to improve the performance of future inserts.",
          "optional":true,
          "default":4096,
          "type":"integer",
          "minimum":0
        },
        "objective_histogram_size":{
          "description":"PRIVATE - Size of the histograms for capturing the
                         objective field.",
          "optional":true,
          "default":32,
          "type":"integer"
        },
        "field_histogram_size":{
          "description":"PRIVATE - Size of the histograms for capturing
                         input fields.",
          "optional":true,
          "default":64,
          "type":"integer"
        },
        "similarity_threshold":{
          "description":"PRIVATE - The threshold for early splitting
                         (lower requires more similarity).",
          "optional":true,
          "default":0.15,
          "type":"number",
          "minimum":0,
          "maximum":1
        },
        "split_score_threshold":{
          "description":"PRIVATE - The minimum score required for a split to be
                         vaild.",
          "optional":true,
          "default":1.0E-12,
          "type":"number",
          "minimum":0,
          "maximum":1
        },
        "node_threshold":{
          "description":"PRIVATE - The soft limit for number of nodes in
                         the tree.",
          "optional":true,
          "default":1024,
          "type":"integer"
        },
        "selective_pruning":{
          "description":"PRIVATE - When true, stat pruning will have less effect
                         for small datasets.",
          "optional":true,
          "type":"boolean"
        },
        "z_statistic":{
          "description":"PRIVATE - Parameter for stat pruning.",
          "optional":true,
          "default":2,
          "type":"number"
        },
        "split_early":{
          "description":"PRIVATE - Allows early splits.",
          "optional":true,
          "default":true,
          "type":"boolean"
        },
        "importance":{
          "description":"Contains pairs of field ids and importance scores (one
                         for each input field). The higher the score, the more
                         the field helps reduce error on the training set.",
          "optional":true,
          "type":"array",
          "items":{
            "type":"array",
            "items":[
              {"type":"string"},
              {"type":"number"}
            ]
          }
        },
        "root":{
          "description":"The root node of the decision tree.",
          "optional":true,
          "$ref":"node-schema.json"
        }
      }
    }
  }
}
	{
	"description":"A list of callback specifications. They're invoked
	upon resource state changes.",
	"type":"array",
	"items":{
	"description":"Each callback specification contains at an HTTP method,
	a URL, a list of triggering states and a synchronicity flag.",
	"type":"object",
	"properties":{
	"method":{
	"description":"The HTTP method to use.",
	"optional":true,
	"default":"GET",
	"enum":["GET", "POST", "PUT", "DELETE", "get", "post", "put", "delete"],
	},
	"url":{
	"description":"The URL to invoke with the method above.",
	"type":"string"
	},
	"states":{
	"description":"A list of states for which the callback should be
	called. If not given, the callback will be called
	for all state changes.",
	"optional":true,
	"type":"array",
	"items":["string"]
	},
	"synchronous":{
	"description":"A flag specifying wether the callback is synchronous and
	we need to wait for a remote response before proceeding.",
	"optional":true,
	"default":false,
	"type":"boolean"
	}
	}
	}
	}
	{
	"description":"A collection of fields",
	"type":"object",
	"additionalProperties":{"$ref":"field-schema.json"}
	}
	{
	"description":"A field from the dataset",
	"type":[
	{
	"type":"object",
	"extends":{
	"$ref":"generic-field-schema.json"
	},
	"description":"A text field from the dataset",
	"properties":{
	"optype":{
	"description":"Operational type of this field",
	"optional":true,
	"enum":["text"]
	},
	"datatype":{
	"description":"The storage type of the field",
	"optional":true,
	"enum":["string","boolean"]
	},
	"summary":{
	"type":"object",
	"optional":true,
	"properties":{
	"missing_count":{
	"description":"# of instances missing this field",
	"type":"integer"
	}
	}
	}
	}
	},
	{
	"type":"object",
	"extends":{
	"$ref":"generic-field-schema.json"
	},
	"description":"A categorical field from the dataset",
	"properties":{
	"optype":{
	"description":"Operational type of this field",
	"optional":true,
	"enum":["categorical"]
	},
	"datatype":{
	"description":"The storage type of the field",
	"optional":true,
	"enum":["string"]
	},
	"summary":{
	"type":"object",
	"optional":true,
	"properties":{
	"missing_count":{
	"description":"# of instances missing this field",
	"type":["integer","null"]
	},
	"categories":{
	"description":"The possible categories for categorical fields",
	"type":"array",
	"items":{
	"type":"array",
	"items":[
	{"type":["string", "number"]},
	{"type":["integer","null"]}
	]
	}
	}
	}
	}
	}
	},
	{
	"type":"object",
	"extends":{
	"$ref":"generic-field-schema.json"
	},
	"description":"A numeric field from the dataset",
	"properties":{
	"optype":{
	"description":"Operational type of this field",
	"optional":true,
	"enum":["numeric"]
	},
	"datatype":{
	"description":"The storage type of the field",
	"optional":true,
	"enum":[
	"integer","int8","int16","int32","int64",
	"float","double","day","month","year","hour",
	"minute","second","millisecond","day-of-week","day-of-month"
	]
	},
	"summary":{
	"optional":true,
	"properties":{
	"missing_count":{
	"description":"# of instances missing this field",
	"type":"integer"
	},
	"maximum":{
	"description":"Maximum value for numeric fields",
	"optional":true,
	"type":"number"
	},
	"minimum":{
	"description":"Minimum value for numeric fields",
	"optional":true,
	"type":"number"
	},
	"median":{
	"description":"The approximate median for numeric fields",
	"optional":true,
	"type":"number"
	},
	"sum":{
	"description":"Sum of values (for mean calculation)",
	"type":"number"
	},
	"sum_squares":{
	"description":"Sum of squared values (for variance calculation)",
	"type":"number"
	},
	"population":{
	"description":"# of instances containing data for this field",
	"type":"integer"
	},
	"mean":{
	"description":"The sample mean for numeric fields",
	"optional":true,
	"type":"number"
	},
	"variance":{
	"description":"The sample variance for numeric fields",
	"optional":true,
	"type":"number"
	},
	"standard_deviation":{
	"description":"The sample standard deviation for numeric fields",
	"optional":true,
	"type":"number"
	},
	"splits":{
	"description":"DEPRECATED - Histogram split points for this field",
	"optional":true,
	"type":"array",
	"items":{
	"type":"number"
	}
	},
	"counts":{
	"description":"Captures the distribution for the field. Contains
	tuples of the unique values and their occurrence counts.
	Used when there are 32 or less unique numeric values",
	"optional":true,
	"type":"array",
	"items":{
	"type":"array",
	"items":[
	{"type":"number"},
	{"type":"integer"}
	]
	}
	},
	"bins":{
	"description":"Captures the distribution for the field.
	Each tuple represents a bin from an approximate
	histogram. Each bin contains the bin mean and a
	membership count. Used when there are more than 32
	unique numeric values.",
	"optional":true,
	"type":"array",
	"items":{
	"type":"array",
	"items":[
	{"type":"number"},
	{"type":"integer"}
	]
	}
	}
	}
	}
	}
	},
	{
	"type":"object",
	"extends":{
	"$ref":"generic-field-schema.json"
	},
	"description":"A datetime field from the dataset",
	"properties":{
	"optype":{
	"description":"Operational type of this field",
	"optional":true,
	"enum":["datetime"]
	},
	"datatype":{
	"description":"The storage type of the field",
	"optional":true,
	"enum":["string"]
	},
	"time_formats":{
	"description":"Formats of times in this field from clj-time",
	"optional":true,
	"type":"array",
	"items":{
	"type":"string"
	}
	},
	"summary":{
	"type":"object",
	"optional":true,
	"properties":{
	"missing_count":{
	"description":"# of instances missing this field",
	"type":"integer"
	}
	}
	}
	}
	}
	]
	}
	{
	"description":"Metadata specifying a generic model",
	"type":"object",
	"properties":{
	"model":{
	"type":"object",
	"properties":{
	"kind":{
	"description":"Identifier of this model's kind (e.g., stree)",
	"optional":true,
	"type":"string"
	},
	"type":{
	"description":"Identifier of this model's type (e.g., regression)",
	"optional":true,
	"enum":["classification", "regression"]
	},
	"dataset_id":{
	"description":"Identifier of this model's dataset",
	"type":"string"
	},
	"row_range":{
	"description":"Rows used to build this model.",
	"type":"object",
	"optional":true,
	"properties":{
	"start":{
	"type":"number",
	"minimum":0
	},
	"size":{
	"type":"number",
	"minimum":0
	}
	}
	},
	"ordering":{
	"enum":["linear", "random", "deterministic"],
	"optional":"true",
	"default":"deterministic"
	},
	"sample":{
	"optional":true,
	"$ref":"sample-schema.json"
	},
	"locale":{
	"description":"Default locale for field values",
	"optional":true,
	"type":"string"
	},
	"callbacks":{
	"description":"A list of callbacks.",
	"optional":true,
	"$ref":"callbacks-schema.json"
	},
	"missing_tokens":{
	"description":"Default tokens that represent a missing value",
	"type":"array",
	"optional":true,
	"default":[],
	"items":{
	"type":"string"
	}
	},
	"fields":{
	"description":"Possibly partial list of descriptors for fields
	used in the model",
	"optional":true,
	"$ref":"field-coll-schema.json"
	},
	"model_fields":{
	"description":"Map of descriptors for fields actually appearing
	in the model, without summaries",
	"optional":true,
	"$ref":"field-coll-schema.json"
	},
	"input_fields":{
	"description":"List of input field identifiers (features)",
	"optional":true,
	"type":"array",
	"items":{"type":"string"}
	},
	"excluded_input_fields":{
	"description":"List of field identifiers to exclude from input",
	"optional":true,
	"type":"array",
	"items":{"type":"string"}
	},
	"objective_fields":{
	"description":"Collection of objective field identifiers (targets)",
	"optional":true,
	"type":"array",
	"items":{"type":"string"}
	},
	"objective_field":{
	"description":"Objective field (target) identifier. Gets added to
	objective_fields. Useful for models with a single
	objective field.",
	"optional":true,
	"type":"string"
	}
	}
	}
	}
	}
	{
	"type":"object",
	"properties":{
	"count":{
	"description":"Number of training instances at this node",
	"type":"number"
	},
	"predicate":{
	"type":[
	"boolean",
	{
	"type":"object",
	"properties":{
	"field":{
	"description":"Field used for this decision"
	"type":"string"
	},
	"operator":{
	"description":"Type of test used for the field"
	"type":"string"
	},
	"value":{
	"description":"Field used for this decision"
	"type":["number","string"]
	}
	}
	}
	]
	},
	"output":{
	"description":"Prediction given at this node",
	"type":["number","string"]
	},
	"confidence":{
	"optional":true,
	"description":"Probability of correctness for classification and an
	estimate of the error for regression.",
	"type":["number"],
	"minimum":0
	},
	"objective_summary":{
	"type":"object",
	"optional":true,
	"properties":{
	"categories":{
	"description":"Captures the distribution for the objective field.
	Each tuple contains a category and the category
	occurrence count. Used when the objective is
	categorical.

	For partial models, counts are an estimation and,
	as such, they won't be in general integers. But
	finished models the pairs will always consist of
	a string and an integer.",
	"optional":true,
	"type":"array",
	"items":{
	"type":"array",
	"items":[
	{"type":["string", "number"]},
	{"type":"number"}]
	}
	},
	"counts":{
	"description":"Captures the distribution for the objective field.
	Contains tuples of the unique values and their
	occurrence counts. Used when there are 32 or
	less unique numeric values.

	During model construction, these counts can be
	fractional for partial models. When the model is
	finished, however, all counts will be integers.",
	"optional":true,
	"type":"array",
	"items":{
	"type":"array",
	"items":["number", "number"],
	}
	},
	"bins":{
	"description":"Captures the distribution for the objective field.
	Each tuple represents a bin from an approximate
	histogram. Each bin contains the bin mean and a
	membership count. Used when there are more than
	32 unique numeric values

	During model construction, the counts can be
	fractional for partial models. When the model is
	finished, however, all counts will be integers.",
	"optional":true,
	"type":"array",
	"items":{
	"type":"array",
	"items":["number", "number"]
	}
	},
	"maximum":{
	"description":"Maximum value for numeric fields, used when 'bins'
	are present.",
	"optional":true,
	"type":"number"
	},
	"minimum":{
	"description":"Minimum value for numeric fields, used when 'bins'
	are present.",
	"optional":true,
	"type":"number"
	},
	}
	},
	"distribution":{
	"description":"DEPRECATED - Distribution of the target at this node",
	"optional":true,
	"type":"array",
	"items":{
	"type":"array"
	"items":[
	{"type":["string", "number"]},
	{"type":"number"}
	]
	}
	},
	"children":{
	"description":"Children of this node",
	"optional":true,
	"type":"array",
	"items":{"$ref":"node-schema.json"},
	"minItems":1
	}
	}
	}
	{
	"description":"Sampling specification. If not given,
	no sampling is performed.",
	"type":"object",
	"properties":{
	"rate":{
	"description":"The rate: fraction of rows we pick.",
	"type":"number",
	"minimum":0
	},
	"replace":{
	"description":"Whether we sample with replacement or not.",
	"type":"boolean",
	"optional":true,
	"default":false
	},
	"out_of_bag":{
	"description":"Is the sampling out of bag?",
	"type":"boolean",
	"optional":true,
	"default":false
	},
	"seed":{
	"description":"A string to feed the random number generator
	used for sampling. The same seed produces
	always the same sample (if all other parameters
	stay the same). If not specified, we choose a
	seed at random.",
	"type":"string",
	"optional":true
	}
	}
	}
	{
	"description":"A model consisting of a decision tree",
	"type":"object",
	"extends":{
	"$ref":"model-schema.json"
	},
	"properties":{
	"model":{
	"type":"object",
	"properties":{
	"kind":{
	"description":"The kind of tree model.",
	"enum":["stree"]
	},
	"missing_strategy":{
	"description":"Action to take on missing data.",
	"optional":true,
	"enum":["last_prediction"]
	},
	"split_criterion":{
	"description":"Method of choosing best attribute and split point for
	a given node.",
	"optional":true,
	"enum":["information_gain", "information_gain_ratio",
	"information_gain_mix", "squared_error",
	"squared_error_ratio"]
	},
	"stat_pruning":{
	"description":"Eliminates low confidence leaf nodes from tree using
	statistical tests.",
	"optional":true,
	"default":true,
	"type":"boolean"
	},
	"support_threshold":{
	"description":"For a split to be valid, each child's support
	(instances / total instances) must be greater than this
	threshold.",
	"optional":true,
	"default":0.001,
	"type":"number",
	"minimum":0,
	"maximum":1
	},
	"depth_threshold":{
	"description":"The depth, or generation, limit for a tree.",
	"optional":true,
	"default":20,
	"type":"integer",
	"minimum":1
	},
	"prune_holdout":{
	"description":"DEPRECATED - Pruning with a holdout set.",
	"optional":true,
	"type":"number",
	"minimum":0,
	"maximum":1
	},
	"freeze_threshold":{
	"description":"PRIVATE - Once a field histogram exceeds this many
	inserts, its bin locations are 'frozen' into place
	to improve the performance of future inserts.",
	"optional":true,
	"default":4096,
	"type":"integer",
	"minimum":0
	},
	"objective_histogram_size":{
	"description":"PRIVATE - Size of the histograms for capturing the
	objective field.",
	"optional":true,
	"default":32,
	"type":"integer"
	},
	"field_histogram_size":{
	"description":"PRIVATE - Size of the histograms for capturing
	input fields.",
	"optional":true,
	"default":64,
	"type":"integer"
	},
	"similarity_threshold":{
	"description":"PRIVATE - The threshold for early splitting
	(lower requires more similarity).",
	"optional":true,
	"default":0.15,
	"type":"number",
	"minimum":0,
	"maximum":1
	},
	"split_score_threshold":{
	"description":"PRIVATE - The minimum score required for a split to be
	vaild.",
	"optional":true,
	"default":1.0E-12,
	"type":"number",
	"minimum":0,
	"maximum":1
	},
	"node_threshold":{
	"description":"PRIVATE - The soft limit for number of nodes in
	the tree.",
	"optional":true,
	"default":1024,
	"type":"integer"
	},
	"selective_pruning":{
	"description":"PRIVATE - When true, stat pruning will have less effect
	for small datasets.",
	"optional":true,
	"type":"boolean"
	},
	"z_statistic":{
	"description":"PRIVATE - Parameter for stat pruning.",
	"optional":true,
	"default":2,
	"type":"number"
	},
	"split_early":{
	"description":"PRIVATE - Allows early splits.",
	"optional":true,
	"default":true,
	"type":"boolean"
	},
	"importance":{
	"description":"Contains pairs of field ids and importance scores (one
	for each input field). The higher the score, the more
	the field helps reduce error on the training set.",
	"optional":true,
	"type":"array",
	"items":{
	"type":"array",
	"items":[
	{"type":"string"},
	{"type":"number"}
	]
	}
	},
	"root":{
	"description":"The root node of the decision tree.",
	"optional":true,
	"$ref":"node-schema.json"
	}
	}
	}
	}
	}