Kajiyu/dep.py

## dep.py
import numpy as np

class DEPNetwork:
    '''
    Neural network trained with Differential extrinsic plasticity.
    [1] Novel plasticity rule can explain the development of sensorimotor intelligence. PNAS. 2015
    [2] Behavior as broken symmetry in embodied self-organizing robots. ECAL. 2013
    '''

    def __init__(self, action_size, lr=0.1):
        self.action_size = action_size
        self.reset_params()
        self.M = np.eye(self.action_size)
        self.lr = lr

    def reset_params(self):
        self.w = np.random.randn(self.action_size, self.action_size) * 0.9
        self.b = np.random.randn(self.action_size, 1) * 0.1

    def init_train_info(self, init_states):
        train_info = {
            "prev_x": np.zeros((self.action_size, 1)),
            "prev_grad_x": np.zeros((self.action_size, 1)),
            "prev_y": np.zeros((self.action_size, 1))
        }
        return train_info

    def __call__(self, x, train_flag=True, train_info=None):
        '''
        - args:
            x: current state vector of robots (numpy 1d array)
            train_flag: whether enabling the train mode or not. (bool)
            train_info: {prev_x, prev_grad_x, prev_y} (dict)
        - outs:
            y: next motor commands (numpy 1d array)
        - forward:
            y_{i} = tanh( \sum_{j=1}^{n} w_{ij}x_{j} + b_{i})
        - Update:
            x': x(t+1)
            x: x(t)
            \dot{y} + \delta\dot{y} = F(\dot{x'})
            F(\dot{x'}) = M\dot{x'}
            w_{ij} -> w_{ij} + lr * F(\dot{x'})_{i} * \dot{x}_{j}
            b_{i} -> b_{i} - lr * y_{i}
        '''
        x = x.reshape(-1, 1)
        z = np.dot(self.w, x) + self.b
        y = np.tanh(z).reshape(-1, 1)
        if train_flag is True and train_info is not None:
            prev_x = train_info["prev_x"]
            prev_grad_x = train_info["prev_grad_x"]
            prev_y = train_info["prev_y"]
            grad_x = x - prev_x
            f_x = np.dot(self.M, grad_x)
            self.w = self.w - (self.lr*np.dot(f_x, prev_grad_x.T))
            self.b = self.b - (self.lr*prev_y)
            train_info = {
                "prev_x": x,
                "prev_grad_x": grad_x,
                "prev_y": y
            }
        return y.reshape(-1), train_info


if __name__ == '__main__':
    action_size = 10
    dep_net = DEPNetwork(action_size=action_size)
    init_states = np.random.randn(action_size, 1)
    train_info = dep_net.init_train_info(init_states)
    x = init_states
    # print("w", dep_net.w)
    # print("b", dep_net.b)
    for i in range(1000):
        y, train_info = dep_net(x, train_info=train_info)
        x = 0.8 * y
        print("time", i, ":", y)
    # print("w", dep_net.w)
    # print("b", dep_net.b)
	import numpy as np

	class DEPNetwork:
	'''
	Neural network trained with Differential extrinsic plasticity.
	[1] Novel plasticity rule can explain the development of sensorimotor intelligence. PNAS. 2015
	[2] Behavior as broken symmetry in embodied self-organizing robots. ECAL. 2013
	'''

	def __init__(self, action_size, lr=0.1):
	self.action_size = action_size
	self.reset_params()
	self.M = np.eye(self.action_size)
	self.lr = lr

	def reset_params(self):
	self.w = np.random.randn(self.action_size, self.action_size) * 0.9
	self.b = np.random.randn(self.action_size, 1) * 0.1

	def init_train_info(self, init_states):
	train_info = {
	"prev_x": np.zeros((self.action_size, 1)),
	"prev_grad_x": np.zeros((self.action_size, 1)),
	"prev_y": np.zeros((self.action_size, 1))
	}
	return train_info

	def __call__(self, x, train_flag=True, train_info=None):
	'''
	- args:
	x: current state vector of robots (numpy 1d array)
	train_flag: whether enabling the train mode or not. (bool)
	train_info: {prev_x, prev_grad_x, prev_y} (dict)
	- outs:
	y: next motor commands (numpy 1d array)
	- forward:
	y_{i} = tanh( \sum_{j=1}^{n} w_{ij}x_{j} + b_{i})
	- Update:
	x': x(t+1)
	x: x(t)
	\dot{y} + \delta\dot{y} = F(\dot{x'})
	F(\dot{x'}) = M\dot{x'}
	w_{ij} -> w_{ij} + lr * F(\dot{x'})_{i} * \dot{x}_{j}
	b_{i} -> b_{i} - lr * y_{i}
	'''
	x = x.reshape(-1, 1)
	z = np.dot(self.w, x) + self.b
	y = np.tanh(z).reshape(-1, 1)
	if train_flag is True and train_info is not None:
	prev_x = train_info["prev_x"]
	prev_grad_x = train_info["prev_grad_x"]
	prev_y = train_info["prev_y"]
	grad_x = x - prev_x
	f_x = np.dot(self.M, grad_x)
	self.w = self.w - (self.lr*np.dot(f_x, prev_grad_x.T))
	self.b = self.b - (self.lr*prev_y)
	train_info = {
	"prev_x": x,
	"prev_grad_x": grad_x,
	"prev_y": y
	}
	return y.reshape(-1), train_info


	if __name__ == '__main__':
	action_size = 10
	dep_net = DEPNetwork(action_size=action_size)
	init_states = np.random.randn(action_size, 1)
	train_info = dep_net.init_train_info(init_states)
	x = init_states
	# print("w", dep_net.w)
	# print("b", dep_net.b)
	for i in range(1000):
	y, train_info = dep_net(x, train_info=train_info)
	x = 0.8 * y
	print("time", i, ":", y)
	# print("w", dep_net.w)
	# print("b", dep_net.b)