Skip to content

Instantly share code, notes, and snippets.

@akabe
Created January 7, 2015 11:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save akabe/0194f623b31cc0a242f1 to your computer and use it in GitHub Desktop.
Save akabe/0194f623b31cc0a242f1 to your computer and use it in GitHub Desktop.
Recursive Neural Networks and Online Backpropagation Through Structure (BPTS)
(* recursiveNeuralNetwork.ml --- Recursive Neural Networks and
Online Backpropagation Through Structure (BPTS)
[MIT License] Copyright (C) 2015 Akinori ABE
Compilation:
$ ocamlfind ocamlopt -linkpkg -package slap recursiveNeuralNetwork.ml
This program requires Sized Linear Algebra Library (SLAP), a linear algebra
library for OCaml with static size checking for matrix operations (see
http://akabe.github.io/slap/ for details).
*)
open Format
open Slap.Io
open Slap.D
open Slap.Common
module Size = Slap.Size
type 'n tree = Node of ('n, Slap.cnt) vec * 'n tree list
type 'n params = {
wleft : ('n, 'n, Slap.cnt) mat;
wright : ('n, 'n, Slap.cnt) mat;
bias : ('n, Slap.cnt) vec;
}
let make_params feature_dim =
{
wleft = Mat.random feature_dim feature_dim;
wright = Mat.random feature_dim feature_dim;
bias = Vec.random feature_dim;
}
(* activation function *)
let sigm a = 1.0 /. (1.0 +. exp (~-. a))
let actv_f ?y x = Vec.map ?y sigm x
let actv_f' y =
let ones = Vec.make1 (Vec.dim y) in
Vec.mul y (Vec.sub ones y)
(** [feedforawd params tree] returns an output of a recursive neural network. *)
let feedforward {wleft; wright; bias} tree =
let rec calc_vec = function
| Node (x, []) -> x (* a leaf *)
| Node (y, children) -> (* a non-leaf node *)
let last_i = List.length children - 1 in
let add_child_vec i child =
let x = calc_vec child in
let cr = if last_i = 0 then 0.5 else (float i) /. (float last_i) in
let cl = 1.0 -. cr in
ignore (gemv ~trans:normal ~alpha:cl wleft x ~beta:1.0 ~y);
ignore (gemv ~trans:normal ~alpha:cr wright x ~beta:1.0 ~y)
in
ignore (Vec.copy bias ~y);
List.iteri add_child_vec children;
actv_f ~y y
in
calc_vec tree
(** [feedback grads params tree target] computes gradients by backpropagation
through structure (BPTS) and stores them into [grads].
*)
let feedback grads {wleft; wright; bias} tree target =
Mat.fill grads.wleft 0.0;
Mat.fill grads.wright 0.0;
Vec.fill grads.bias 0.0;
let rec add_grads delta = function
| [] -> () (* a leaf *)
| children -> (* a non-leaf node *)
let last_i = List.length children - 1 in
let add_grads_child i (Node (x, c_children)) =
let cr = if last_i = 0 then 0.5 else float i /. float last_i in
let cl = 1.0 -. cr in
(* Compute gradients of weights *)
ignore (ger ~alpha:cl delta x grads.wleft);
ignore (ger ~alpha:cr delta x grads.wright);
(* Compute delte for a child *)
let z = gemv ~trans:trans ~alpha:cl wleft delta in
ignore (gemv ~trans:trans ~alpha:cr wright delta ~beta:1.0 ~y:z);
let c_delta = Vec.mul z (actv_f' x) in
add_grads c_delta c_children
in
List.iteri add_grads_child children;
axpy ~alpha:1.0 ~x:delta grads.bias (* Compute gradients of biases *)
in
let Node (y, children) = tree in
let root_delta = Vec.mul (Vec.sub y target) (actv_f' y) in
add_grads root_delta children
(** [check_gradient grads params tree target] checks whether given gradients
[grads] is correct or not by comparison with results of naive numerical
differentiation. This routine is only for checking implementation.
The numerical differentiation is much slower than back propagation.
cf. http://ufldl.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization
*)
let check_gradient grads params tree target =
let epsilon = 1e-4 in
let check_digits dE1 dE2 = (* Check 4 significant digits *)
let abs_dE1 = abs_float dE1 in
if abs_dE1 < 1e-9
then abs_float dE2 < 1e-9 (* true if both `dE1' and `dE2' are nealy zero *)
else let diff = (dE1 -. dE2) *. (0.1 ** (floor (log10 abs_dE1) +. 1.0)) in
abs_float diff < epsilon (* true if 4 significant digits are the same *)
in
let calc_error () = Vec.ssqr_diff (feedforward params tree) target /. 2.0 in
let check_vec label x dx =
let check i dE2 =
let elm = Vec.get_dyn x i in
Vec.set_dyn x i (elm +. epsilon);
let pos_err = calc_error () in
Vec.set_dyn x i (elm -. epsilon);
let neg_err = calc_error () in
Vec.set_dyn x i elm; (* restore *)
let dE1 = (pos_err -. neg_err) /. (2.0 *. epsilon) in
if not (check_digits dE1 dE2)
then eprintf "WARNING: %s[%d] naive diff = %.6g, backprop = %.6g@."
label i dE1 dE2
in
Vec.iteri check dx
in
let check_mat label a da =
Mat.fold_topi (fun i () ai ->
let label' = label ^ "[" ^ (string_of_int i) ^ "]" in
check_vec label' ai (Mat.row_dyn da i)) () a
in
check_vec "dE/db" params.bias grads.bias;
check_mat "dE/dWleft" params.wleft grads.wleft;
check_mat "dE/dWright" params.wright grads.wright
(** Online training for a recursive neural network. *)
let train ~eta grads params tree target =
ignore (feedforward params tree); (* feedforward *)
feedback grads params tree target; (* feedback *)
check_gradient grads params tree target; (* gradient checking *)
(* Update parameters *)
let alpha = ~-. eta in
Mat.axpy ~alpha ~x:grads.wleft params.wleft;
Mat.axpy ~alpha ~x:grads.wright params.wright;
axpy ~alpha ~x:grads.bias params.bias
let main () =
Random.self_init ();
let module N = (val Size.of_int_dyn 5 : Size.SIZE) in
let node children = Node (Vec.create N.value, children) in
let leaf feature_vec = Node (feature_vec, []) in
let tree = node [
node [
leaf (Vec.make N.value 1.0);
node [leaf (Vec.make N.value 2.0);
leaf (Vec.make N.value 3.0)]];
leaf (Vec.make N.value 4.0);
node [leaf (Vec.make N.value 5.0);
leaf (Vec.make N.value 6.0);
leaf (Vec.make N.value 7.0)];
] in
let target = Vec.make N.value 0.6 in
let params = make_params N.value in
let grads = make_params N.value in
let eta = ref 0.02 in
for i = 1 to 1000 do
train ~eta:!eta grads params tree target;
eta := !eta *. 0.99
done;
printf "target = [ %a]; prediction = [ %a]@."
pp_rfvec target pp_rfvec (feedforward params tree)
let () = main ()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment