Created
January 16, 2018 12:38
Star
You must be signed in to star a gist
char-Res-Schmitt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Reservoir computing character prediction text generator | |
# Perturb a dynamic reservoir with character inputs, predict next character with linear classifier | |
# ________ | |
# | | | |
# V | random connections with some sparsity | |
#input--->o--->o--->o | | |
# | | | | | |
# V V V | | |
# o--->o--->o-- | |
# | | | | |
# V V V | |
# o--->o--->o--->output --> linear classifier | |
# | |
# This implementation is inspired in no small part by Andrej Karpathy's char-rnn gist and blog post: https://karpathy.github.io/2015/05/21/rnn-effectiveness/ | |
# | |
# https://gist.github.com/karpathy/d4dee566867f8291f086 | |
# BSD Licence | |
import numpy as np | |
def schmitt(z0,z1): | |
# Based on a Schmitt trigger | |
# use tanh with hysteresis | |
myTemp = np.tanh(z0) | |
mySchmitt = np.tanh(myTemp+z1) | |
return mySchmitt | |
# Load data | |
# Replace with training text of your choice | |
if(1): | |
data = "Why don't you try character prediction on something else?" | |
elif(1): | |
myFile = './wikiRC.txt' | |
data = open(myFile, 'r').read() # Use this source file as input for RNN | |
chars = sorted(list(set(data))) | |
dataSize, vocabSize = len(data), len(chars) | |
print('Data has %d characters, %d unique.' % (dataSize, vocabSize)) | |
char_to_ix = {ch: i for i, ch in enumerate(chars)} | |
ix_to_char = {i: ch for i, ch in enumerate(chars)} | |
# Training settings | |
myIt = 101 | |
myStart = 0 | |
dispIt = 10 | |
lR = 1e-3 | |
lRDecay = 1e-6 | |
# Parameters | |
## random seed :\ | |
mySeed = 1337 | |
myDim = 1024 | |
mySqrt = int(np.sqrt(myDim)) | |
## Sparsity coefficient | |
spCh = 0.75 | |
wtWt = (2**11)/myDim**2 | |
np.random.seed(mySeed) | |
# Reservoir trains based on dynamics over a sequence of seqLength | |
seqLength = 1024 | |
# Length to sample | |
sampleLength= 256 | |
# Define the Reservoir | |
# Layers | |
a0 = np.zeros((myDim,1)) | |
# Weights | |
# input to a0 | |
theta0 = np.random.random((myDim,vocabSize)) | |
# a0 to a0 | |
thetaJ0_0 = wtWt * (np.random.random((myDim,myDim))-0.5) * (np.random.random((myDim,myDim)) < spCh) | |
# weights from Reservoir output to predictions | |
Wry = 1e-2*np.random.random((vocabSize,myDim)) | |
# biases | |
by = np.zeros((vocabSize,1)) | |
ba0 = np.zeros((myDim,1)) | |
def sample(a0,seed_ix, n,myIter): | |
""" | |
sample a sequence of integers from the model | |
a0 is previous state of the reservoir, seed_ix is seed letter for first time step | |
""" | |
x = np.zeros((vocabSize, 1)) | |
x[seed_ix] = 1 | |
ixes = [] | |
#print(np.shape(x)) | |
for t in range(n): | |
a0 = schmitt(a0,np.dot(theta0,x) + np.dot(thetaJ0_0,a0)) + ba0 | |
y = np.dot(Wry,a0) + by | |
p = np.exp(y) / np.sum(np.exp(y)) | |
ix = np.random.choice(range(vocabSize), p=p.ravel()) | |
x = np.zeros((vocabSize, 1)) | |
x[ix] = 1 | |
ixes.append(ix) | |
return ixes | |
def funLoss(inputs,targets,a0): | |
"""""" | |
xs, a0s, ys, ps = {}, {}, {}, {} | |
a0s[-1] = np.copy(a0) | |
myLoss = 0 | |
# Forward Pass | |
for t in range(len(inputs)): | |
xs[t] = np.zeros((vocabSize,1)) | |
xs[t][inputs[t]] = 1 | |
a0s[t] = schmitt(a0s[t-1],np.dot(theta0,xs[t]) + np.dot(thetaJ0_0,a0s[t-1])) + ba0 | |
ys[t] = np.dot(Wry,a0s[t]) + by | |
ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) | |
myLoss += -np.log(ps[t][targets[t],0]) #CE loss | |
# Backward pass | |
da0y = np.zeros_like(Wry) | |
dby, = np.zeros_like(by), | |
for t in reversed(range(len(inputs))): | |
dy = np.copy(ps[t]) | |
dy[targets[t]] -= 1 # backprop into y. | |
da0y += np.dot(dy,a0s[t].T) | |
dby += dy | |
for dparam in [dby, da0y]: | |
np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients | |
return myLoss, dy, dby, da0y, a0s[len(inputs)-1]#, a1s[len(inputs)-1] | |
n, p = 0, 0 | |
# memory variables for Adagrad | |
mWry = np.zeros_like(Wry) | |
mby = np.zeros_like(by) | |
# smooth loss at iteration 0 | |
smooth_loss = -np.log(1.0/vocabSize)*seqLength | |
a0prev = np.zeros((myDim,1)) | |
for k in range(myStart,myIt): | |
if (p+seqLength+1 >= len(data) or k == 0): | |
a0prev = np.zeros((myDim,1)) | |
p = 0 | |
inputs = [char_to_ix[ch] for ch in data[p:p+seqLength]] | |
targets = [char_to_ix[ch] for ch in data[p+1:p+seqLength+1]] | |
# sample occasionally | |
if(k % dispIt == 0): | |
mySampleInput = char_to_ix['A'] | |
sample_ix = sample(a0prev,mySampleInput, sampleLength,k) | |
#sample_ix = sample(a0prev,inputs[int((vocabSize-1)*np.random.random(1))], sampleLength,k) | |
txt = ''.join(ix_to_char[ix] for ix in sample_ix) | |
print('----\n %s \n----' % (txt, )) | |
myLoss, dy, dby, da0y, a0prev = funLoss(inputs,targets,a0prev) | |
smooth_loss = smooth_loss * 0.999 + myLoss * 0.001 | |
if(k % dispIt == 0): #give us a training update | |
print('iter %d, lR: %.3e loss: %f, smooth loss: %f' % (k, lR, myLoss, smooth_loss)) # print progress | |
for param, dparam, mem in zip([Wry, by],#, thetaJ0_1, thetaJ1_1, ba1, theta0, thetaJ0_0, thetaJ1_0, ba0], | |
[da0y, dby],#, da01, da11, dba1, dxa0, da00, da10, dba0], | |
[mWry, mby]):#, mJ01, mJ11, mba1, mxa0, mJ00, mJ10, mba0]): | |
mem += dparam * dparam | |
param += -lR * dparam / np.sqrt(mem + 1e-8) #Adagrad update. need to look this up: | |
p += seqLength | |
lR *= (1-lRDecay) | |
n += 1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment