Skip to content

Instantly share code, notes, and snippets.

@making
Created November 19, 2009 18:33
Show Gist options
  • Save making/238952 to your computer and use it in GitHub Desktop.
Save making/238952 to your computer and use it in GitHub Desktop.
(require :komainu)
(use-package :nurarihyon)
(use-package :nurikabe)
(use-package :chimi)
(use-package :komainu)
(defclass* <pgrl>
()
((environment nil)
(n 0)
(theta nil) ; 方策(適応するパラメータ)
(eps nil) ; 変動ステップサイズ
(eta 0.8) ; 学習率?
(time 5) ; 1ステップで試行する方策数
(debugp nil)
)
)
(defmethod init ((pgrl <pgrl>) env &key (eta 0.8) (time 5) (debugp nil))
(setf (environment-of pgrl) env)
(setf (theta-of pgrl) (initial-param-set env))
(setf (eps-of pgrl) (param-step env))
(setf (n-of pgrl) (length (theta-of pgrl)))
(setf (eta-of pgrl) eta)
(setf (time-of pgrl) time)
(setf (debugp-of pgrl) debugp)
env
)
(defmethod get-result ((pgrl <pgrl>))
(theta-of pgrl)
)
(defmethod debugp ((pgrl <pgrl>) debugp)
(setf (debugp-of pgrl) debugp)
)
(defmethod process ((pgrl <pgrl>) &optional (max-num 10))
(with-slots (environment theta n eps eta time debugp)
pgrl
(let ((R (make-array time))
(A (make-float-vector n))
(rewards (make-array time))
)
(dotimes (k max-num)
(if (finishp environment) (return t))
(if debugp (format t ";; theta = ~a~%" theta))
(dotimes (i time)
(setf (elt R i) (make-float-vector n))
(dotimes (j n)
(let ((rc (1- (random 3)))) ; unbiased random choice from (-1, 0, 1)
(setf (elt (elt R i) j) (+ (elt theta j) (* (elt eps j) rc)))
)
)
)
(dotimes (i time)
;; run system using parameter set R_i, evaluate rewards
(setf (elt rewards i) (run environment (elt R i)))
)
(dotimes (j n)
(let ((avg+ 0.0) (num+ 0)
(avg0 0.0) (num0 0)
(avg- 0.0) (num- 0)
)
(dotimes (i time)
(cond
((> (elt (elt R i) j) (elt theta j))
;; positive perturbation
(incf avg+ (elt rewards i))
(incf num+)
)
((< (elt (elt R i) j) (elt theta j))
;; negative perturbation
(incf avg- (elt rewards i))
(incf num-)
)
(t
;; zero perturbation
(incf avg0 (elt rewards i))
(incf num0)
)
)
)
;; average
(if (plusp num+) (setf avg+ (/ avg+ (float num+))))
(if (plusp num0) (setf avg0 (/ avg0 (float num0))))
(if (plusp num-) (setf avg- (/ avg- (float num-))))
(if debugp (format t ";; +:~7,3f 0:~7,3f -:~7,3f~%" avg+ avg0 avg-))
;; calculate gradient
(setf (elt A j) (if (and (> avg0 avg+) (> avg0 avg-)) 0.0 (- avg+ avg-)))
)
)
(if debugp (format t ";; A = ~a~%" A))
;; normalize and weight
(if (plusp (norm A)) (setf A (scale eta (normalize-vector A))))
(dotimes (j n)
(setf (elt A j) (* (elt A j) (elt eps j)))
)
(v+ theta A theta)
)
)
)
)
(defclass* <pgrl-environment>
() ()
)
(defmethod initial-param-set ((env <pgrl-environment>))
(float-vector 0.0)
)
(defmethod param-step ((env <pgrl-environment>))
(float-vector 0.0)
)
(defmethod run ((env <pgrl-environment>) theta)
0.0
)
(defmethod finishp ((env <pgrl-environment>))
nil
)
;; test-env1
;; f(x, y) = - x^4 - y^4 + x^2 + y^2 - 2xy の極大値を求める
;; 解: f(1, -1) = f(-1, 1) = 2
(defclass* <test-env1> (<pgrl-environment>)
()
)
(defmethod initial-param-set ((env <test-env1>))
(float-vector 0.0 0.0)
)
(defmethod param-step ((env <test-env1>))
(float-vector 0.3 0.3)
)
(defmethod run ((env <test-env1>) theta)
(let ((x (elt theta 0)) (y (elt theta 1)))
(+ (* -1 (expt x 4)) (* -1 (expt y 4)) (expt x 2) (expt y 2) (* -2 x y))
)
)
(defun test1 ()
(let* ((env (make-instance '<test-env1>))
(theta (initial-param-set env))
(agn (make-instance '<pgrl>
:environment env
:theta theta
:eps (param-step env)
:n (length theta)
:time 10
:eta 0.8
:debugp nil))
)
(process agn 100)
(format t "~%;; result : ~a = ~a~%" (get-result agn) (run env (get-result agn)))
)
)
;; test-env2
(defconstant +gravity+ 9.8)
(defvar *viewer*)
(defvar *target*)
(defvar *ball*)
(defclass* <test-env2> (<pgrl-environment>)
()
)
(defmethod initial-param-set ((env <test-env2>))
;; v0 theta phi
(float-vector 100.0 30.0 30.0)
)
(defmethod param-step ((env <test-env2>))
(float-vector 15.0 5.0 5.0)
)
(defmethod run ((env <test-env2>) param)
;; kick
(let ((time 0.0)
(v0 (elt param 0))
(theta (elt param 1))
(phi (elt param 2))
(pos (float-vector 0.0 0.0 0.0))
(nearest-dist 1000000) ;; tekitou
(nearest-point (float-vector 0.0 0.0 0.0))
)
(locate *ball* pos :world)
(objects *viewer* (list *ball* *target* *world-coords*))
(while (>= (elt pos 2) 0.0)
(incf time 0.5)
(setf pos (float-vector (* v0 (cos (deg2rad theta)) (cos (deg2rad phi)) time)
(* v0 (cos (deg2rad theta)) (sin (deg2rad phi)) time)
(+ (* v0 (sin (deg2rad theta)) time)
(* -0.5 +gravity+ time time))))
(let ((dist (distance pos (worldpos-of (coords-of *target*)))))
(when (< dist nearest-dist)
(setf nearest-dist dist)
(setf nearest-point pos)
)
)
(locate *ball* pos :world)
(let ((dist (distance (worldpos-of (coords-of *ball*))
(worldpos-of (coords-of *target*)))))
(when (< dist 25.0)
;; intersect!!
(let ((reward (* 100 (+ (/ 1.0 time)
(/ 1.0 (+ 1.0 (expt (* 0.01 dist) 2)))))))
(format t ";; hit ~a !!!!~%" reward)
(return-from run reward)
)
)
)
(draw-objects *viewer*)
)
(* -0.1 nearest-dist)
)
)
(defun test2 ()
(unless (boundp '*viewer*)
(setq *viewer* (make-komainu-viewer :loggingp nil)))
(unless (boundp '*target*)
(setq *target* (make-cube 100 100 100 :color :green))
(transparent *target* 0.5)
)
(unless (boundp '*ball*)
(setq *ball* (make-cube 50 50 50 :color :yellow))
;; (transparent *ball* 0.5)
)
(locate *target* (float-vector (+ 500 (random 500)) (+ 500 (random 500)) (+ 500 (random 500))))
(objects *viewer* (list *target* *ball* *world-coords*))
(let* ((env (make-instance '<test-env2>))
(theta (initial-param-set env))
(agn (make-instance '<pgrl>
:environment env
:theta theta
:eps (param-step env)
:n (length theta)
:time 10
:eta 0.8
:debugp nil))
)
(process agn 100)
(format t "~%;; result : ~a = ~a~%" (get-result agn) (run env (get-result agn)))
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment