Created January 16, 2018
# Reservoir computing character prediction text generator
# Perturb a dynamic reservoir with character inputs, predict next character with linear classifier
# ________
# | |
# V | random connections with some sparsity
#input--->o--->o--->o |
# | | | |
# V V V |
# o--->o--->o--
# | | |
# V V V
# o--->o--->o--->output --> linear classifier
# This implementation is inspired in no small part by Andrej Karpathy's char-rnn gist and blog post:
# BSD Licence
import numpy as np
def schmitt(z0,z1):
# Based on a Schmitt trigger
# use tanh with hysteresis
myTemp = np.tanh(z0)
mySchmitt = np.tanh(myTemp+z1)
return mySchmitt
# Load data
# Replace with training text of your choice
data = "Why don't you try character prediction on something else?"
myFile = './wikiRC.txt'
data = open(myFile, 'r').read() # Use this source file as input for RNN
chars = sorted(list(set(data)))
dataSize, vocabSize = len(data), len(chars)
print('Data has %d characters, %d unique.' % (dataSize, vocabSize))
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}
# Training settings
myIt = 101
myStart = 0
dispIt = 10
lR = 1e-3
lRDecay = 1e-6
# Parameters
## random seed :\
mySeed = 1337
myDim = 1024
mySqrt = int(np.sqrt(myDim))
## Sparsity coefficient
spCh = 0.75
wtWt = (2**11)/myDim**2
# Reservoir trains based on dynamics over a sequence of seqLength
seqLength = 1024
# Length to sample
sampleLength= 256
# Define the Reservoir
# Layers
a0 = np.zeros((myDim,1))
# Weights
# input to a0
theta0 = np.random.random((myDim,vocabSize))
# a0 to a0
thetaJ0_0 = wtWt * (np.random.random((myDim,myDim))-0.5) * (np.random.random((myDim,myDim)) < spCh)
# weights from Reservoir output to predictions
Wry = 1e-2*np.random.random((vocabSize,myDim))
# biases
by = np.zeros((vocabSize,1))
ba0 = np.zeros((myDim,1))
def sample(a0,seed_ix, n,myIter):
sample a sequence of integers from the model
a0 is previous state of the reservoir, seed_ix is seed letter for first time step
x = np.zeros((vocabSize, 1))
x[seed_ix] = 1
ixes = []
for t in range(n):
a0 = schmitt(a0,,x) +,a0)) + ba0
y =,a0) + by
p = np.exp(y) / np.sum(np.exp(y))
ix = np.random.choice(range(vocabSize), p=p.ravel())
x = np.zeros((vocabSize, 1))
x[ix] = 1
return ixes
def funLoss(inputs,targets,a0):
xs, a0s, ys, ps = {}, {}, {}, {}
a0s[-1] = np.copy(a0)
myLoss = 0
# Forward Pass
for t in range(len(inputs)):
xs[t] = np.zeros((vocabSize,1))
xs[t][inputs[t]] = 1
a0s[t] = schmitt(a0s[t-1],,xs[t]) +,a0s[t-1])) + ba0
ys[t] =,a0s[t]) + by
ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))
myLoss += -np.log(ps[t][targets[t],0]) #CE loss
# Backward pass
da0y = np.zeros_like(Wry)
dby, = np.zeros_like(by),
for t in reversed(range(len(inputs))):
dy = np.copy(ps[t])
dy[targets[t]] -= 1 # backprop into y.
da0y +=,a0s[t].T)
dby += dy
for dparam in [dby, da0y]:
np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
return myLoss, dy, dby, da0y, a0s[len(inputs)-1]#, a1s[len(inputs)-1]
n, p = 0, 0
# memory variables for Adagrad
mWry = np.zeros_like(Wry)
mby = np.zeros_like(by)
# smooth loss at iteration 0
smooth_loss = -np.log(1.0/vocabSize)*seqLength
a0prev = np.zeros((myDim,1))
for k in range(myStart,myIt):
if (p+seqLength+1 >= len(data) or k == 0):
a0prev = np.zeros((myDim,1))
p = 0
inputs = [char_to_ix[ch] for ch in data[p:p+seqLength]]
targets = [char_to_ix[ch] for ch in data[p+1:p+seqLength+1]]
# sample occasionally
if(k % dispIt == 0):
mySampleInput = char_to_ix['A']
sample_ix = sample(a0prev,mySampleInput, sampleLength,k)
#sample_ix = sample(a0prev,inputs[int((vocabSize-1)*np.random.random(1))], sampleLength,k)
txt = ''.join(ix_to_char[ix] for ix in sample_ix)
print('----\n %s \n----' % (txt, ))
myLoss, dy, dby, da0y, a0prev = funLoss(inputs,targets,a0prev)
smooth_loss = smooth_loss * 0.999 + myLoss * 0.001
if(k % dispIt == 0): #give us a training update
print('iter %d, lR: %.3e loss: %f, smooth loss: %f' % (k, lR, myLoss, smooth_loss)) # print progress
for param, dparam, mem in zip([Wry, by],#, thetaJ0_1, thetaJ1_1, ba1, theta0, thetaJ0_0, thetaJ1_0, ba0],
[da0y, dby],#, da01, da11, dba1, dxa0, da00, da10, dba0],
[mWry, mby]):#, mJ01, mJ11, mba1, mxa0, mJ00, mJ10, mba0]):
mem += dparam * dparam
param += -lR * dparam / np.sqrt(mem + 1e-8) #Adagrad update. need to look this up:
p += seqLength
lR *= (1-lRDecay)
n += 1
