Created
October 4, 2016 22:02
-
-
Save mmerce/d04a8a15f7088c1d7f2e2a0c91cc1455 to your computer and use it in GitHub Desktop.
restricting SMACdown to use a balanced objective
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Here's a custom generator for creating BigML ensembles. As | |
;; "random_candidate_ratio" tends towards 1, the ensemble becomes a | |
;; bag. | |
(define (smacdown-ensemble--model-params-generator objective-type) | |
(lambda () | |
(let (max-trees 127 | |
max-nodes 1999 | |
regression (= "numeric" objective-type)) | |
{"random_candidate_ratio" (rand) | |
"stat_pruning" (if (< (rand) 0.5) false true) | |
"balance_objective" true | |
"number_of_models" (+ 1 (round (exp (* (log max-trees) (rand))))) | |
"randomize" true | |
"node_threshold" (round (rand-range 4 max-nodes))}))) | |
;; This function takes a training and test set (and an objective field | |
;; id) and evaluates a set of parameters by training a model with | |
;; those parameters and performing an evaluation on them. We decide | |
;; that phi is the metric we'd like to opimize, so we pull 1 - phi out | |
;; of each evaluation to return as the objective, as the algorithm | |
;; seeks to *minimize* a value and we want to *maximize* phi. | |
(define (smacdown-ensemble--evaluator train test obj metric name) | |
(lambda (params itr) | |
(log-info "Evaluating " (count params) " candidates...") | |
(let (train-params {"dataset" train | |
"objective_field" obj | |
"seed" "SMACdown" | |
"name" (str name " smacdown itr " itr " test model")} | |
mod-fn (lambda (p) (merge p train-params)) | |
eval-fn (lambda (m) {"model" m "dataset" test}) | |
mod-ids (create* "ensemble" (map mod-fn params)) | |
eval-ids (create* "evaluation" (map eval-fn mod-ids)) | |
phi (lambda (ev) | |
(let (metric-value (ev ["result" "model" metric] false)) | |
(if (not (number? metric-value)) | |
(raise {"message" (str metric " is not a valid metric!") | |
"code" -30}) | |
(- 1 metric-value))))) | |
(log-info "Evaluation complete.") | |
(map (lambda (eid) (phi (fetch (wait eid)))) eval-ids)))) | |
;; Find optimal parameters using SMACdown | |
(define (find-optimal-parameters train-params objective-id objective-type) | |
(let (test-params (assoc train-params "out_of_bag" true) | |
train-id (create-dataset train-params) | |
test-id (create-dataset test-params) | |
_ (wait* [train-id test-id]) | |
eval-fn (smacdown-ensemble--evaluator train-id | |
test-id | |
objective-id | |
metric | |
"smacdown-ensemble") | |
generator (smacdown-ensemble--model-params-generator objective-type) | |
output (smacdown-optimize generator eval-fn "smacdown-ensemble")) | |
(for (p output) | |
(assoc (dissoc p smacdown--actual) | |
metric (- 1 (p smacdown--actual)))))) | |
;; Delete resources ignoring errors | |
(define (safe-delete id) | |
(try (delete id) | |
(catch e (log-info (str "Error deleting resource " id " ignored"))))) | |
;; Take a dataset, create a training and test set, and find the | |
;; optimal parameters. The function returns a list of parameters | |
;; ranked by objective. | |
(define (optimize-ensemble dataset-id objective-id metric) | |
(let (train-params {"origin_dataset" dataset-id | |
"sample_rate" 0.8 | |
"replacement" false | |
"seed" "SMACdown"} | |
test-params (assoc train-params "out_of_bag" true) | |
obj-id (if (= objective-id "default") | |
(dataset-get-objective-id dataset-id) | |
objective-id) | |
otype (or ((fetch dataset-id) ["fields" obj-id "optype"] false) | |
(raise {"message" (str "Invalid objective field")})) | |
params (find-optimal-parameters train-params obj-id otype) | |
_ (log-info "SMACdown search complete") | |
_ (when delete-resources | |
(log-info "Deleting intermediate resources...") | |
(map safe-delete (created-resources))) | |
_ (log-info "Training model on full dataset...") | |
mod-prms (merge ((head params) "parameters" {}) | |
{"objective_field" obj-id "seed" "SMACdown"}) | |
full-mod (create-ensemble dataset-id mod-prms) | |
train-id (create-dataset train-params) | |
test-id (create-dataset test-params) | |
best-mod (create-ensemble train-id mod-prms) | |
best-eval (create-evaluation best-mod test-id)) | |
(wait* [best-eval full-mod]) | |
(cons (assoc (head params) | |
"full_model" full-mod | |
"model" best-mod | |
"evaluation" best-eval) | |
(tail params)))) | |
(define result (optimize-ensemble dataset-id objective-id metric)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment