Created
January 17, 2012 19:42
-
-
Save lspector/1628406 to your computer and use it in GitHub Desktop.
Clojure code for tree-based genetic programming with tags (see http://hampshire.edu/lspector/tags-gecco-2011/)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Lee Spector (lspector@hampshire.edu) 20120106-20120117 | |
;; Clojure code for tree-based genetic programming with tags (see http://hampshire.edu/lspector/tags-gecco-2011/) | |
;; REQUIRES Clojure 1.3 for the concurrency to work (set single-thread-mode to true otherwise) | |
(ns tag-regression | |
(:require [clojure.zip :as zip])) | |
(def single-thread-mode false) | |
(def absolute-depth-limit 17) | |
(def population-size 1000) | |
(def trivial-geography-radius 500) | |
(def maximum-generations 50) | |
(def reproductive-tournament-size 7) | |
(def allow-tagging true) | |
(def tag-limit 1000) | |
(def execution-limit 1000) | |
(def penalty-for-exceeding-limit 10000000000000N) | |
(def int-erc-range [-10 10]) | |
(def node-selection-method :koza) ;; should be :koza or :tournament | |
(def node-tournament-size 2) | |
;; 8x^3 + y^3 + 27 | |
;(def target-data | |
; (for [x (range -10 11) | |
; y (range -10 11)] | |
; [{'x x 'y y} | |
; (+ (* 8 x x x) | |
; (* y y y) | |
; 27)])) | |
; ((xy)^4 + (xy)^2) + (16y^4 + 4y^2) + (81x^4 + 9x^2) | |
;(def target-data | |
; (for [x (range -10 11) | |
; y (range -10 11)] | |
; [{'x x 'y y} | |
; (+ (* (* x y) (* x y) (* x y) (* x y)) | |
; (* (* x y) (* x y)) | |
; (* 16 y y y y) | |
; (* 4 y y) | |
; (* 81 x x x x) | |
; (* 9 x x))])) | |
;; Tom's problem: (x+1)^5 with the only integer constant being 1 | |
(def target-data | |
(for [x (range -10 11)] | |
[{'x x} | |
(* (inc x) (inc x) (inc x) (inc x) (inc x))])) | |
(def function-table | |
(if allow-tagging | |
(zipmap '(+' -' *' pd :tagged-erf :tag-erf ) | |
'(2 2 2 2 0 1)) | |
(zipmap '(+' -' *' pd noop0 noop1) | |
'(2 2 2 2 0 1)))) | |
(def terminal-set | |
;'(x y :int-erc) | |
;'(x :int-erc) | |
'(x 1) | |
) | |
;;; stuff from eval_with_tagging.clj here | |
(def tagdo-semantics true) | |
(defn closest-association | |
"Returns the value for the closest match to the given tag in the given tag space, with | |
default-value returned if the tag-space is empty." | |
[tag tag-space default-value] | |
(if (empty? tag-space) | |
default-value | |
(loop [associations (conj (vec tag-space) (first tag-space))] ;; conj does wrap | |
(if (or (empty? (rest associations)) | |
(<= tag (ffirst associations))) | |
(second (first associations)) | |
(recur (rest associations)))))) | |
(defn eval-with-tagging | |
"Returns the result of evaluating expression with the provided step-limit and | |
constants (which should be a map of symbols to values). The provided default-value | |
is returned both for tag references that occur before any values have been tagged | |
and for tagging operations (unless tagdo-semantics is true, in which case the | |
argument to the tagging operation is evaluated and its value is returned). If the | |
step-limit is exceeded then :limit-exceeded is returned. Tagging is accomplished | |
by means of an item in function position of the form {:tag n} where n is an integer, | |
and where the single argument paired with this 'function' is the item to be tagged. | |
Tag references look like zero-argument function calls but with a function of the | |
form {:tagged n} where n is an integer. In the context of boolean values the | |
evaluator supports an 'if' form that takes three arguments: a condition, an if-true | |
clause, and an if-false clause." | |
([expression step-limit constants default-value] | |
(first (eval-with-tagging expression (sorted-map) step-limit constants default-value))) | |
([expression tag-space step-limit constants default-value] | |
;; these calls return [value tag-space steps-remaining] | |
(if (<= step-limit 0) | |
[:limit-exceeded tag-space step-limit] | |
(let [step-limit (dec step-limit)] | |
(if (not (seq? expression)) | |
[(get constants expression expression) tag-space step-limit] | |
(if (= 1 (count expression)) | |
(if (map? (first expression)) | |
(eval-with-tagging | |
(closest-association (:tagged (first expression)) tag-space default-value) | |
tag-space step-limit constants default-value) | |
[((resolve (first expression))) tag-space step-limit]) | |
(if (map? (first expression)) | |
(if tagdo-semantics | |
(eval-with-tagging (second expression) | |
(assoc tag-space (:tag (first expression)) (second expression)) | |
step-limit | |
constants | |
default-value) | |
[default-value | |
(assoc tag-space (:tag (first expression)) (second expression)) | |
step-limit]) | |
(if (= 'if (first expression)) | |
(let [condition-eval-result | |
(eval-with-tagging (second expression) tag-space step-limit constants default-value)] | |
(if (first condition-eval-result) | |
(eval-with-tagging (nth expression 2) | |
(nth condition-eval-result 1) | |
(nth condition-eval-result 2) | |
constants | |
default-value) | |
(eval-with-tagging (nth expression 3) | |
(nth condition-eval-result 1) | |
(nth condition-eval-result 2) | |
constants | |
default-value))) | |
(let [arg-evaluation-results | |
(loop [remaining (rest expression) | |
ts tag-space | |
lim step-limit | |
results []] | |
(if (empty? remaining) | |
results | |
(if (<= lim 0) | |
(recur (rest remaining) ts lim (conj results [:limit-exceeded ts lim])) | |
(let [first-result (eval-with-tagging | |
(first remaining) ts lim constants default-value)] | |
(recur (rest remaining) | |
(nth first-result 1) | |
(nth first-result 2) | |
(conj results first-result)))))) | |
vals (map first arg-evaluation-results) | |
ending-limit (nth (last arg-evaluation-results) 2) | |
ending-ts (nth (last arg-evaluation-results) 1)] | |
(if (<= ending-limit 0) | |
[:limit-exceeded ending-ts ending-limit] | |
[(apply (resolve (first expression)) vals) ending-ts ending-limit])))))))))) | |
;;; end of stuff from eval_with_tagging.clj | |
(defn expand-erc | |
[item] | |
(cond (= item :int-erc) (+ (first int-erc-range) | |
(rand-int (inc (- (second int-erc-range) | |
(first int-erc-range))))) | |
:else item)) | |
(defn expand-erf | |
[item] | |
(cond (= item :tag-erf) {:tag (rand-int tag-limit)} | |
(= item :tagged-erf) {:tagged (rand-int tag-limit)} | |
:else item)) | |
;; random code generator from the GP field guide p 14 | |
(def function-set (keys function-table)) | |
(def terminal-proportion (/ (count terminal-set) | |
(+ (count terminal-set) (count function-set)))) | |
(defn random-code | |
[depth-limit method] ;; method should be :grow or :full | |
(if (or (= depth-limit 0) | |
(and (= method :grow) | |
(< (rand) terminal-proportion))) | |
(expand-erc (rand-nth terminal-set)) | |
(let [f (expand-erf (rand-nth function-set))] | |
(cons f (repeatedly (if (map? f) | |
(if (:tag f) 1 0) | |
(get function-table f)) | |
#(random-code (dec depth-limit) method)))))) | |
(defn pd | |
"Protected biginteger division; returns 0 if the denominator is zero." | |
[num denom] | |
(if (zero? denom) | |
0 | |
(bigint (quot num denom)))) | |
(defn noop0 [] 0) | |
(defn noop1 [a] a) | |
(defn abs ;; works even for bigints | |
[x] | |
(if (< x 0) (- x) x)) | |
(defn error | |
[individual] | |
(reduce +' (map (fn [[bindings target]] | |
(let [result (eval-with-tagging individual execution-limit bindings 0)] | |
(if (= result :limit-exceeded) | |
penalty-for-exceeding-limit | |
(abs (-' result target))))) | |
target-data))) | |
(defn ramp-depth [] (rand-nth (range 2 6))) | |
;; We can now generate and evaluate random small programs, as with: | |
;; (let [i (random-code 3 :full)] (println (error i) "from individual" i)) | |
(defn codesize [c] | |
(if (seq? c) | |
(count (flatten c)) | |
1)) | |
(defn at-index | |
"Returns a subtree of tree indexed by point-index in a depth first traversal." | |
[tree point-index] | |
(let [index (mod (Math/abs point-index) (codesize tree)) | |
zipper (zip/seq-zip tree)] | |
(loop [z zipper i index] | |
(if (zero? i) | |
(zip/node z) | |
(if (seq? (zip/node z)) | |
(recur (zip/next (zip/next z)) (dec i)) | |
(recur (zip/next z) (dec i))))))) | |
(defn insert-at-index | |
"Returns a copy of tree with the subtree formerly indexed by | |
point-index (in a depth-first traversal) replaced by new-subtree." | |
[tree point-index new-subtree] | |
(let [index (mod (Math/abs point-index) (codesize tree)) | |
zipper (zip/seq-zip tree)] | |
(loop [z zipper i index] | |
(if (zero? i) | |
(zip/root (zip/replace z new-subtree)) | |
(if (seq? (zip/node z)) | |
(recur (zip/next (zip/next z)) (dec i)) | |
(recur (zip/next z) (dec i))))))) | |
(defn annotate-points | |
"Returns a sequence of [index kind] pairs where kind is :internal or :leaf." | |
[tree] | |
(let [limit (codesize tree)] | |
(loop [z (zip/seq-zip tree) index 0 results []] | |
(if (= index limit) | |
results | |
(if (seq? (zip/node z)) | |
(recur (zip/next (zip/next z)) | |
(inc index) | |
(conj results [index :internal])) | |
(recur (zip/next z) | |
(inc index) | |
(conj results [index :leaf]))))))) | |
(defn select-node-90-10 | |
"returns an index" | |
[tree] | |
(let [annotated (annotate-points tree) | |
internals (map first (filter #(= (second %) :internal) annotated)) | |
leaves (map first (filter #(= (second %) :leaf) annotated))] | |
(cond (empty? internals) (rand-nth leaves) | |
(empty? leaves) (rand-nth internals) | |
(< (rand) 0.9) (rand-nth internals) | |
:else (rand-nth leaves)))) | |
(defn select-node-by-tournament | |
"returns an index" | |
[tree] | |
(let [num-nodes (codesize tree) | |
tournament-set (repeatedly node-tournament-size #(rand-int num-nodes))] | |
(ffirst (sort #(> (first %1) (first %2)) | |
(map #(vector % (codesize (at-index tree %))) tournament-set))))) | |
(defn select-node | |
[tree] | |
(case node-selection-method | |
:tournament (select-node-by-tournament tree) | |
:koza (select-node-90-10 tree) | |
)) | |
(defn depth | |
[expression] | |
(if (not (seq? expression)) | |
0 | |
(let [subsequences (filter seq? expression)] | |
(if (empty? subsequences) | |
1 | |
(inc (apply max (map depth subsequences))))))) | |
(defn mutate | |
[i] | |
(let [child (insert-at-index | |
i | |
(select-node i) | |
(random-code (ramp-depth) (if (< (rand) 0.5) :grow :full)))] | |
(if (> (depth child) absolute-depth-limit) | |
i | |
child))) | |
(defn crossover | |
[i j] | |
(let [child (insert-at-index | |
i | |
(select-node i) | |
(at-index j (select-node j)))] | |
(if (> (depth child) absolute-depth-limit) | |
i | |
child))) | |
(defn pmapall | |
"Like pmap but: 1) coll should be finite, 2) the returned sequence | |
will not be lazy, 3) calls to f may occur in any order, to maximize | |
multicore processor utilization, and 4) takes only one coll so far." | |
[f coll] | |
(if single-thread-mode | |
(map f coll) | |
(let [agents (map #(agent % :error-handler (fn [agnt except] (println except))) coll)] | |
(dorun (map #(send % f) agents)) | |
(apply await agents) | |
(doall (map deref agents))))) | |
;; During evolution we'll maintain the population as a sequence of | |
;; [program error] pairs. | |
(defn pair-with-errors | |
"Returns a vector of [program error] pairs." | |
[programs] | |
(println "Computing errors...") | |
(vec (pmapall #(vector % (error %)) programs))) | |
(defn select | |
"Returns the program of the best [program error] pair in a tournament | |
set with the specified size, location, and radius." | |
[prog-err-pairs tournament-size location radius] | |
(let [limit (count prog-err-pairs) | |
tournament-set (repeatedly | |
tournament-size | |
#(nth prog-err-pairs | |
(mod (+ location | |
(- (rand-int (inc (* radius 2))) | |
radius)) | |
limit)))] | |
(first (first (sort #(< (second %1) (second %2)) (vec tournament-set)))))) | |
;; Now we can evolve a solution by starting with a random population and | |
;; repeatedly evaluating, checking for a solution, and producing a new | |
;; population. | |
(defn evolve | |
[popsize] | |
(println "Starting evolution...") | |
(loop [generation 0 | |
population (pair-with-errors (concat (repeatedly (/ popsize 2) #(random-code (ramp-depth) :grow)) | |
(repeatedly (/ popsize 2) #(random-code (ramp-depth) :full))))] | |
(let [sorted (sort #(< (second %1) (second %2)) population)] | |
(println "Generation:" generation) | |
(println "Best error:" (second (first sorted))) | |
(println "Best program:" (first (first sorted))) | |
(println "Best program size:" (codesize (first (first sorted)))) | |
(println "Best program depth:" (depth (first (first sorted)))) | |
(println " Median error:" (second (nth sorted | |
(int (/ popsize 2))))) | |
(println " Average program size:" | |
(float (/ (reduce + (map codesize (map first population))) | |
(count population)))) | |
(println " Average program depth:" | |
(float (/ (reduce + (map depth (map first population))) | |
(count population)))) | |
(println " Tag call ratio:" | |
(float (/ (count (filter :tag (filter map? (flatten (map first population))))) | |
(count (flatten (map first population)))))) | |
(println " Tagged call ratio:" | |
(float (/ (count (filter :tagged (filter map? (flatten (map first population))))) | |
(count (flatten (map first population)))))) | |
(println " Unique error values in population:" | |
(count (distinct (map second population)))) | |
(if (< (second (first sorted)) 0.1) ;; good enough to count as success | |
(println "Success:" (first (first sorted))) | |
(if (>= generation maximum-generations) | |
(println "Failure") | |
(recur | |
(inc generation) | |
(pair-with-errors | |
(for [i (range popsize)] | |
(let [operator (rand) | |
tsize reproductive-tournament-size | |
radius trivial-geography-radius] | |
(cond (< operator 0.05) (mutate (select population tsize i radius)) | |
(< operator 0.95) (crossover (select population tsize i radius) | |
(select population tsize i radius)) | |
:else (select population tsize i radius))))))))))) | |
(defn -main | |
[& args] | |
(evolve population-size) | |
;(System/exit 0) | |
) | |
(-main) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment