Skip to content

Instantly share code, notes, and snippets.

@mmerce
Created October 4, 2016 22:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mmerce/d04a8a15f7088c1d7f2e2a0c91cc1455 to your computer and use it in GitHub Desktop.
Save mmerce/d04a8a15f7088c1d7f2e2a0c91cc1455 to your computer and use it in GitHub Desktop.
restricting SMACdown to use a balanced objective
;; Here's a custom generator for creating BigML ensembles. As
;; "random_candidate_ratio" tends towards 1, the ensemble becomes a
;; bag.
(define (smacdown-ensemble--model-params-generator objective-type)
(lambda ()
(let (max-trees 127
max-nodes 1999
regression (= "numeric" objective-type))
{"random_candidate_ratio" (rand)
"stat_pruning" (if (< (rand) 0.5) false true)
"balance_objective" true
"number_of_models" (+ 1 (round (exp (* (log max-trees) (rand)))))
"randomize" true
"node_threshold" (round (rand-range 4 max-nodes))})))
;; This function takes a training and test set (and an objective field
;; id) and evaluates a set of parameters by training a model with
;; those parameters and performing an evaluation on them. We decide
;; that phi is the metric we'd like to opimize, so we pull 1 - phi out
;; of each evaluation to return as the objective, as the algorithm
;; seeks to *minimize* a value and we want to *maximize* phi.
(define (smacdown-ensemble--evaluator train test obj metric name)
(lambda (params itr)
(log-info "Evaluating " (count params) " candidates...")
(let (train-params {"dataset" train
"objective_field" obj
"seed" "SMACdown"
"name" (str name " smacdown itr " itr " test model")}
mod-fn (lambda (p) (merge p train-params))
eval-fn (lambda (m) {"model" m "dataset" test})
mod-ids (create* "ensemble" (map mod-fn params))
eval-ids (create* "evaluation" (map eval-fn mod-ids))
phi (lambda (ev)
(let (metric-value (ev ["result" "model" metric] false))
(if (not (number? metric-value))
(raise {"message" (str metric " is not a valid metric!")
"code" -30})
(- 1 metric-value)))))
(log-info "Evaluation complete.")
(map (lambda (eid) (phi (fetch (wait eid)))) eval-ids))))
;; Find optimal parameters using SMACdown
(define (find-optimal-parameters train-params objective-id objective-type)
(let (test-params (assoc train-params "out_of_bag" true)
train-id (create-dataset train-params)
test-id (create-dataset test-params)
_ (wait* [train-id test-id])
eval-fn (smacdown-ensemble--evaluator train-id
test-id
objective-id
metric
"smacdown-ensemble")
generator (smacdown-ensemble--model-params-generator objective-type)
output (smacdown-optimize generator eval-fn "smacdown-ensemble"))
(for (p output)
(assoc (dissoc p smacdown--actual)
metric (- 1 (p smacdown--actual))))))
;; Delete resources ignoring errors
(define (safe-delete id)
(try (delete id)
(catch e (log-info (str "Error deleting resource " id " ignored")))))
;; Take a dataset, create a training and test set, and find the
;; optimal parameters. The function returns a list of parameters
;; ranked by objective.
(define (optimize-ensemble dataset-id objective-id metric)
(let (train-params {"origin_dataset" dataset-id
"sample_rate" 0.8
"replacement" false
"seed" "SMACdown"}
test-params (assoc train-params "out_of_bag" true)
obj-id (if (= objective-id "default")
(dataset-get-objective-id dataset-id)
objective-id)
otype (or ((fetch dataset-id) ["fields" obj-id "optype"] false)
(raise {"message" (str "Invalid objective field")}))
params (find-optimal-parameters train-params obj-id otype)
_ (log-info "SMACdown search complete")
_ (when delete-resources
(log-info "Deleting intermediate resources...")
(map safe-delete (created-resources)))
_ (log-info "Training model on full dataset...")
mod-prms (merge ((head params) "parameters" {})
{"objective_field" obj-id "seed" "SMACdown"})
full-mod (create-ensemble dataset-id mod-prms)
train-id (create-dataset train-params)
test-id (create-dataset test-params)
best-mod (create-ensemble train-id mod-prms)
best-eval (create-evaluation best-mod test-id))
(wait* [best-eval full-mod])
(cons (assoc (head params)
"full_model" full-mod
"model" best-mod
"evaluation" best-eval)
(tail params))))
(define result (optimize-ensemble dataset-id objective-id metric))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment