Skip to content

Instantly share code, notes, and snippets.

@Kajiyu
Created March 7, 2019 13:47
Show Gist options
  • Save Kajiyu/abae8e73089b2319ba3caf58188367d3 to your computer and use it in GitHub Desktop.
Save Kajiyu/abae8e73089b2319ba3caf58188367d3 to your computer and use it in GitHub Desktop.
Differential extrinsic plasticity
import numpy as np
class DEPNetwork:
'''
Neural network trained with Differential extrinsic plasticity.
[1] Novel plasticity rule can explain the development of sensorimotor intelligence. PNAS. 2015
[2] Behavior as broken symmetry in embodied self-organizing robots. ECAL. 2013
'''
def __init__(self, action_size, lr=0.1):
self.action_size = action_size
self.reset_params()
self.M = np.eye(self.action_size)
self.lr = lr
def reset_params(self):
self.w = np.random.randn(self.action_size, self.action_size) * 0.9
self.b = np.random.randn(self.action_size, 1) * 0.1
def init_train_info(self, init_states):
train_info = {
"prev_x": np.zeros((self.action_size, 1)),
"prev_grad_x": np.zeros((self.action_size, 1)),
"prev_y": np.zeros((self.action_size, 1))
}
return train_info
def __call__(self, x, train_flag=True, train_info=None):
'''
- args:
x: current state vector of robots (numpy 1d array)
train_flag: whether enabling the train mode or not. (bool)
train_info: {prev_x, prev_grad_x, prev_y} (dict)
- outs:
y: next motor commands (numpy 1d array)
- forward:
y_{i} = tanh( \sum_{j=1}^{n} w_{ij}x_{j} + b_{i})
- Update:
x': x(t+1)
x: x(t)
\dot{y} + \delta\dot{y} = F(\dot{x'})
F(\dot{x'}) = M\dot{x'}
w_{ij} -> w_{ij} + lr * F(\dot{x'})_{i} * \dot{x}_{j}
b_{i} -> b_{i} - lr * y_{i}
'''
x = x.reshape(-1, 1)
z = np.dot(self.w, x) + self.b
y = np.tanh(z).reshape(-1, 1)
if train_flag is True and train_info is not None:
prev_x = train_info["prev_x"]
prev_grad_x = train_info["prev_grad_x"]
prev_y = train_info["prev_y"]
grad_x = x - prev_x
f_x = np.dot(self.M, grad_x)
self.w = self.w - (self.lr*np.dot(f_x, prev_grad_x.T))
self.b = self.b - (self.lr*prev_y)
train_info = {
"prev_x": x,
"prev_grad_x": grad_x,
"prev_y": y
}
return y.reshape(-1), train_info
if __name__ == '__main__':
action_size = 10
dep_net = DEPNetwork(action_size=action_size)
init_states = np.random.randn(action_size, 1)
train_info = dep_net.init_train_info(init_states)
x = init_states
# print("w", dep_net.w)
# print("b", dep_net.b)
for i in range(1000):
y, train_info = dep_net(x, train_info=train_info)
x = 0.8 * y
print("time", i, ":", y)
# print("w", dep_net.w)
# print("b", dep_net.b)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment