Created
November 19, 2009 18:33
-
-
Save making/238952 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(require :komainu) | |
(use-package :nurarihyon) | |
(use-package :nurikabe) | |
(use-package :chimi) | |
(use-package :komainu) | |
(defclass* <pgrl> | |
() | |
((environment nil) | |
(n 0) | |
(theta nil) ; 方策(適応するパラメータ) | |
(eps nil) ; 変動ステップサイズ | |
(eta 0.8) ; 学習率? | |
(time 5) ; 1ステップで試行する方策数 | |
(debugp nil) | |
) | |
) | |
(defmethod init ((pgrl <pgrl>) env &key (eta 0.8) (time 5) (debugp nil)) | |
(setf (environment-of pgrl) env) | |
(setf (theta-of pgrl) (initial-param-set env)) | |
(setf (eps-of pgrl) (param-step env)) | |
(setf (n-of pgrl) (length (theta-of pgrl))) | |
(setf (eta-of pgrl) eta) | |
(setf (time-of pgrl) time) | |
(setf (debugp-of pgrl) debugp) | |
env | |
) | |
(defmethod get-result ((pgrl <pgrl>)) | |
(theta-of pgrl) | |
) | |
(defmethod debugp ((pgrl <pgrl>) debugp) | |
(setf (debugp-of pgrl) debugp) | |
) | |
(defmethod process ((pgrl <pgrl>) &optional (max-num 10)) | |
(with-slots (environment theta n eps eta time debugp) | |
pgrl | |
(let ((R (make-array time)) | |
(A (make-float-vector n)) | |
(rewards (make-array time)) | |
) | |
(dotimes (k max-num) | |
(if (finishp environment) (return t)) | |
(if debugp (format t ";; theta = ~a~%" theta)) | |
(dotimes (i time) | |
(setf (elt R i) (make-float-vector n)) | |
(dotimes (j n) | |
(let ((rc (1- (random 3)))) ; unbiased random choice from (-1, 0, 1) | |
(setf (elt (elt R i) j) (+ (elt theta j) (* (elt eps j) rc))) | |
) | |
) | |
) | |
(dotimes (i time) | |
;; run system using parameter set R_i, evaluate rewards | |
(setf (elt rewards i) (run environment (elt R i))) | |
) | |
(dotimes (j n) | |
(let ((avg+ 0.0) (num+ 0) | |
(avg0 0.0) (num0 0) | |
(avg- 0.0) (num- 0) | |
) | |
(dotimes (i time) | |
(cond | |
((> (elt (elt R i) j) (elt theta j)) | |
;; positive perturbation | |
(incf avg+ (elt rewards i)) | |
(incf num+) | |
) | |
((< (elt (elt R i) j) (elt theta j)) | |
;; negative perturbation | |
(incf avg- (elt rewards i)) | |
(incf num-) | |
) | |
(t | |
;; zero perturbation | |
(incf avg0 (elt rewards i)) | |
(incf num0) | |
) | |
) | |
) | |
;; average | |
(if (plusp num+) (setf avg+ (/ avg+ (float num+)))) | |
(if (plusp num0) (setf avg0 (/ avg0 (float num0)))) | |
(if (plusp num-) (setf avg- (/ avg- (float num-)))) | |
(if debugp (format t ";; +:~7,3f 0:~7,3f -:~7,3f~%" avg+ avg0 avg-)) | |
;; calculate gradient | |
(setf (elt A j) (if (and (> avg0 avg+) (> avg0 avg-)) 0.0 (- avg+ avg-))) | |
) | |
) | |
(if debugp (format t ";; A = ~a~%" A)) | |
;; normalize and weight | |
(if (plusp (norm A)) (setf A (scale eta (normalize-vector A)))) | |
(dotimes (j n) | |
(setf (elt A j) (* (elt A j) (elt eps j))) | |
) | |
(v+ theta A theta) | |
) | |
) | |
) | |
) | |
(defclass* <pgrl-environment> | |
() () | |
) | |
(defmethod initial-param-set ((env <pgrl-environment>)) | |
(float-vector 0.0) | |
) | |
(defmethod param-step ((env <pgrl-environment>)) | |
(float-vector 0.0) | |
) | |
(defmethod run ((env <pgrl-environment>) theta) | |
0.0 | |
) | |
(defmethod finishp ((env <pgrl-environment>)) | |
nil | |
) | |
;; test-env1 | |
;; f(x, y) = - x^4 - y^4 + x^2 + y^2 - 2xy の極大値を求める | |
;; 解: f(1, -1) = f(-1, 1) = 2 | |
(defclass* <test-env1> (<pgrl-environment>) | |
() | |
) | |
(defmethod initial-param-set ((env <test-env1>)) | |
(float-vector 0.0 0.0) | |
) | |
(defmethod param-step ((env <test-env1>)) | |
(float-vector 0.3 0.3) | |
) | |
(defmethod run ((env <test-env1>) theta) | |
(let ((x (elt theta 0)) (y (elt theta 1))) | |
(+ (* -1 (expt x 4)) (* -1 (expt y 4)) (expt x 2) (expt y 2) (* -2 x y)) | |
) | |
) | |
(defun test1 () | |
(let* ((env (make-instance '<test-env1>)) | |
(theta (initial-param-set env)) | |
(agn (make-instance '<pgrl> | |
:environment env | |
:theta theta | |
:eps (param-step env) | |
:n (length theta) | |
:time 10 | |
:eta 0.8 | |
:debugp nil)) | |
) | |
(process agn 100) | |
(format t "~%;; result : ~a = ~a~%" (get-result agn) (run env (get-result agn))) | |
) | |
) | |
;; test-env2 | |
(defconstant +gravity+ 9.8) | |
(defvar *viewer*) | |
(defvar *target*) | |
(defvar *ball*) | |
(defclass* <test-env2> (<pgrl-environment>) | |
() | |
) | |
(defmethod initial-param-set ((env <test-env2>)) | |
;; v0 theta phi | |
(float-vector 100.0 30.0 30.0) | |
) | |
(defmethod param-step ((env <test-env2>)) | |
(float-vector 15.0 5.0 5.0) | |
) | |
(defmethod run ((env <test-env2>) param) | |
;; kick | |
(let ((time 0.0) | |
(v0 (elt param 0)) | |
(theta (elt param 1)) | |
(phi (elt param 2)) | |
(pos (float-vector 0.0 0.0 0.0)) | |
(nearest-dist 1000000) ;; tekitou | |
(nearest-point (float-vector 0.0 0.0 0.0)) | |
) | |
(locate *ball* pos :world) | |
(objects *viewer* (list *ball* *target* *world-coords*)) | |
(while (>= (elt pos 2) 0.0) | |
(incf time 0.5) | |
(setf pos (float-vector (* v0 (cos (deg2rad theta)) (cos (deg2rad phi)) time) | |
(* v0 (cos (deg2rad theta)) (sin (deg2rad phi)) time) | |
(+ (* v0 (sin (deg2rad theta)) time) | |
(* -0.5 +gravity+ time time)))) | |
(let ((dist (distance pos (worldpos-of (coords-of *target*))))) | |
(when (< dist nearest-dist) | |
(setf nearest-dist dist) | |
(setf nearest-point pos) | |
) | |
) | |
(locate *ball* pos :world) | |
(let ((dist (distance (worldpos-of (coords-of *ball*)) | |
(worldpos-of (coords-of *target*))))) | |
(when (< dist 25.0) | |
;; intersect!! | |
(let ((reward (* 100 (+ (/ 1.0 time) | |
(/ 1.0 (+ 1.0 (expt (* 0.01 dist) 2))))))) | |
(format t ";; hit ~a !!!!~%" reward) | |
(return-from run reward) | |
) | |
) | |
) | |
(draw-objects *viewer*) | |
) | |
(* -0.1 nearest-dist) | |
) | |
) | |
(defun test2 () | |
(unless (boundp '*viewer*) | |
(setq *viewer* (make-komainu-viewer :loggingp nil))) | |
(unless (boundp '*target*) | |
(setq *target* (make-cube 100 100 100 :color :green)) | |
(transparent *target* 0.5) | |
) | |
(unless (boundp '*ball*) | |
(setq *ball* (make-cube 50 50 50 :color :yellow)) | |
;; (transparent *ball* 0.5) | |
) | |
(locate *target* (float-vector (+ 500 (random 500)) (+ 500 (random 500)) (+ 500 (random 500)))) | |
(objects *viewer* (list *target* *ball* *world-coords*)) | |
(let* ((env (make-instance '<test-env2>)) | |
(theta (initial-param-set env)) | |
(agn (make-instance '<pgrl> | |
:environment env | |
:theta theta | |
:eps (param-step env) | |
:n (length theta) | |
:time 10 | |
:eta 0.8 | |
:debugp nil)) | |
) | |
(process agn 100) | |
(format t "~%;; result : ~a = ~a~%" (get-result agn) (run env (get-result agn))) | |
) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment