theScinder/schmittRes.py

## schmittRes.py
# Reservoir computing character prediction text generator
# Perturb a dynamic reservoir with character inputs, predict next character with linear classifier
#              ________
#              |       |
#              V       |  random connections with some sparsity
#input--->o--->o--->o  |
#         |    |    |  |
#         V    V    V  |
#         o--->o--->o--
#         |    |    |
#         V    V    V
#         o--->o--->o--->output --> linear classifier
#
# This implementation is inspired in no small part by Andrej Karpathy's char-rnn gist and blog post: https://karpathy.github.io/2015/05/21/rnn-effectiveness/
#
# https://gist.github.com/karpathy/d4dee566867f8291f086
# BSD Licence
import numpy as np

def schmitt(z0,z1):
    # Based on a Schmitt trigger
    # use tanh with hysteresis
    myTemp = np.tanh(z0)
    mySchmitt = np.tanh(myTemp+z1)
    return mySchmitt

# Load data
# Replace with training text of your choice
if(1):
  data = "Why don't you try character prediction on something else?"
elif(1):
  myFile = './wikiRC.txt'
  data = open(myFile, 'r').read()  # Use this source file as input for RNN
chars = sorted(list(set(data)))
dataSize, vocabSize = len(data), len(chars)
print('Data has %d characters, %d unique.' % (dataSize, vocabSize))
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

# Training settings
myIt = 101
myStart = 0
dispIt = 10
lR = 1e-3
lRDecay = 1e-6

# Parameters
## random seed :\
mySeed = 1337
myDim = 1024
mySqrt = int(np.sqrt(myDim))

## Sparsity coefficient
spCh = 0.75
wtWt =  (2**11)/myDim**2

np.random.seed(mySeed)

# Reservoir trains based on dynamics over a sequence of seqLength
seqLength = 1024
# Length to sample
sampleLength= 256

# Define the Reservoir
# Layers
a0 = np.zeros((myDim,1))

# Weights
# input to a0
theta0 = np.random.random((myDim,vocabSize))
# a0 to a0
thetaJ0_0 = wtWt * (np.random.random((myDim,myDim))-0.5) * (np.random.random((myDim,myDim)) < spCh)

# weights from Reservoir output to predictions
Wry = 1e-2*np.random.random((vocabSize,myDim))

# biases
by = np.zeros((vocabSize,1))
ba0 = np.zeros((myDim,1))

def sample(a0,seed_ix, n,myIter):
	"""
	sample a sequence of integers from the model
	a0 is previous state of the reservoir, seed_ix is seed letter for first time step
	"""
	x = np.zeros((vocabSize, 1))
	x[seed_ix] = 1
	ixes = []
	#print(np.shape(x))
	for t in range(n):

		a0 = schmitt(a0,np.dot(theta0,x) + np.dot(thetaJ0_0,a0)) + ba0

		y = np.dot(Wry,a0) + by
		p = np.exp(y) / np.sum(np.exp(y))

		ix = np.random.choice(range(vocabSize), p=p.ravel())
		x = np.zeros((vocabSize, 1))
		x[ix] = 1
		ixes.append(ix)

	return ixes

def funLoss(inputs,targets,a0):
	""""""
	xs, a0s, ys, ps = {}, {}, {}, {}
	a0s[-1] = np.copy(a0)

	myLoss = 0

	# Forward Pass
	for t in range(len(inputs)):
		xs[t] = np.zeros((vocabSize,1))
		xs[t][inputs[t]] = 1
		a0s[t] = schmitt(a0s[t-1],np.dot(theta0,xs[t]) + np.dot(thetaJ0_0,a0s[t-1])) + ba0

		ys[t] = np.dot(Wry,a0s[t]) + by

		ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))
		myLoss += -np.log(ps[t][targets[t],0]) #CE loss
	# Backward pass
	da0y = np.zeros_like(Wry)
	dby, = np.zeros_like(by),

	for t in reversed(range(len(inputs))):
		dy = np.copy(ps[t])
		dy[targets[t]] -= 1 # backprop into y.
		da0y +=	np.dot(dy,a0s[t].T)
		dby += dy

	for dparam in [dby, da0y]:
		np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
	return myLoss, dy, dby, da0y, a0s[len(inputs)-1]#, a1s[len(inputs)-1]

n, p = 0, 0
# memory variables for Adagrad
mWry  = np.zeros_like(Wry)
mby = np.zeros_like(by)
# smooth loss at iteration 0
smooth_loss = -np.log(1.0/vocabSize)*seqLength
a0prev = np.zeros((myDim,1))
for k in range(myStart,myIt):

	if (p+seqLength+1 >= len(data) or k == 0):
		a0prev = np.zeros((myDim,1))

		p = 0

	inputs = [char_to_ix[ch] for ch in data[p:p+seqLength]]
	targets = [char_to_ix[ch] for ch in data[p+1:p+seqLength+1]]
	# sample occasionally
	if(k % dispIt == 0):
		mySampleInput = char_to_ix['A']
		sample_ix = sample(a0prev,mySampleInput, sampleLength,k)
		#sample_ix = sample(a0prev,inputs[int((vocabSize-1)*np.random.random(1))], sampleLength,k)
		txt = ''.join(ix_to_char[ix] for ix in sample_ix)
		print('----\n %s \n----' % (txt, ))

	myLoss, dy, dby, da0y, a0prev = funLoss(inputs,targets,a0prev)

	smooth_loss = smooth_loss * 0.999 + myLoss * 0.001

	if(k % dispIt == 0): #give us a training update
		print('iter %d, lR: %.3e loss: %f, smooth loss: %f' % (k, lR, myLoss, smooth_loss)) # print progress

	for param, dparam, mem in zip([Wry, by],#, thetaJ0_1, thetaJ1_1, ba1, theta0, thetaJ0_0, thetaJ1_0, ba0],
					[da0y, dby],#, da01, da11, dba1, dxa0, da00,  da10, dba0],
					[mWry, mby]):#, mJ01, mJ11, mba1, mxa0, mJ00, mJ10, mba0]):
		mem += dparam * dparam
		param += -lR * dparam / np.sqrt(mem + 1e-8) #Adagrad update. need to look this up:
	p += seqLength
	lR *= (1-lRDecay)
	n += 1
	# Reservoir computing character prediction text generator
	# Perturb a dynamic reservoir with character inputs, predict next character with linear classifier
	# ________
	# \| \|
	# V \| random connections with some sparsity
	#input--->o--->o--->o \|
	# \| \| \| \|
	# V V V \|
	# o--->o--->o--
	# \| \| \|
	# V V V
	# o--->o--->o--->output --> linear classifier
	#
	# This implementation is inspired in no small part by Andrej Karpathy's char-rnn gist and blog post: https://karpathy.github.io/2015/05/21/rnn-effectiveness/
	#
	# https://gist.github.com/karpathy/d4dee566867f8291f086
	# BSD Licence
	import numpy as np

	def schmitt(z0,z1):
	# Based on a Schmitt trigger
	# use tanh with hysteresis
	myTemp = np.tanh(z0)
	mySchmitt = np.tanh(myTemp+z1)
	return mySchmitt

	# Load data
	# Replace with training text of your choice
	if(1):
	data = "Why don't you try character prediction on something else?"
	elif(1):
	myFile = './wikiRC.txt'
	data = open(myFile, 'r').read() # Use this source file as input for RNN
	chars = sorted(list(set(data)))
	dataSize, vocabSize = len(data), len(chars)
	print('Data has %d characters, %d unique.' % (dataSize, vocabSize))
	char_to_ix = {ch: i for i, ch in enumerate(chars)}
	ix_to_char = {i: ch for i, ch in enumerate(chars)}

	# Training settings
	myIt = 101
	myStart = 0
	dispIt = 10
	lR = 1e-3
	lRDecay = 1e-6

	# Parameters
	## random seed :\
	mySeed = 1337
	myDim = 1024
	mySqrt = int(np.sqrt(myDim))

	## Sparsity coefficient
	spCh = 0.75
	wtWt = (211)/myDim2

	np.random.seed(mySeed)

	# Reservoir trains based on dynamics over a sequence of seqLength
	seqLength = 1024
	# Length to sample
	sampleLength= 256

	# Define the Reservoir
	# Layers
	a0 = np.zeros((myDim,1))

	# Weights
	# input to a0
	theta0 = np.random.random((myDim,vocabSize))
	# a0 to a0
	thetaJ0_0 = wtWt * (np.random.random((myDim,myDim))-0.5) * (np.random.random((myDim,myDim)) < spCh)

	# weights from Reservoir output to predictions
	Wry = 1e-2*np.random.random((vocabSize,myDim))

	# biases
	by = np.zeros((vocabSize,1))
	ba0 = np.zeros((myDim,1))

	def sample(a0,seed_ix, n,myIter):
	"""
	sample a sequence of integers from the model
	a0 is previous state of the reservoir, seed_ix is seed letter for first time step
	"""
	x = np.zeros((vocabSize, 1))
	x[seed_ix] = 1
	ixes = []
	#print(np.shape(x))
	for t in range(n):

	a0 = schmitt(a0,np.dot(theta0,x) + np.dot(thetaJ0_0,a0)) + ba0

	y = np.dot(Wry,a0) + by
	p = np.exp(y) / np.sum(np.exp(y))

	ix = np.random.choice(range(vocabSize), p=p.ravel())
	x = np.zeros((vocabSize, 1))
	x[ix] = 1
	ixes.append(ix)

	return ixes

	def funLoss(inputs,targets,a0):
	""""""
	xs, a0s, ys, ps = {}, {}, {}, {}
	a0s[-1] = np.copy(a0)

	myLoss = 0

	# Forward Pass
	for t in range(len(inputs)):
	xs[t] = np.zeros((vocabSize,1))
	xs[t][inputs[t]] = 1
	a0s[t] = schmitt(a0s[t-1],np.dot(theta0,xs[t]) + np.dot(thetaJ0_0,a0s[t-1])) + ba0

	ys[t] = np.dot(Wry,a0s[t]) + by

	ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))
	myLoss += -np.log(ps[t][targets[t],0]) #CE loss
	# Backward pass
	da0y = np.zeros_like(Wry)
	dby, = np.zeros_like(by),

	for t in reversed(range(len(inputs))):
	dy = np.copy(ps[t])
	dy[targets[t]] -= 1 # backprop into y.
	da0y += np.dot(dy,a0s[t].T)
	dby += dy

	for dparam in [dby, da0y]:
	np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
	return myLoss, dy, dby, da0y, a0s[len(inputs)-1]#, a1s[len(inputs)-1]

	n, p = 0, 0
	# memory variables for Adagrad
	mWry = np.zeros_like(Wry)
	mby = np.zeros_like(by)
	# smooth loss at iteration 0
	smooth_loss = -np.log(1.0/vocabSize)*seqLength
	a0prev = np.zeros((myDim,1))
	for k in range(myStart,myIt):

	if (p+seqLength+1 >= len(data) or k == 0):
	a0prev = np.zeros((myDim,1))

	p = 0

	inputs = [char_to_ix[ch] for ch in data[p:p+seqLength]]
	targets = [char_to_ix[ch] for ch in data[p+1:p+seqLength+1]]
	# sample occasionally
	if(k % dispIt == 0):
	mySampleInput = char_to_ix['A']
	sample_ix = sample(a0prev,mySampleInput, sampleLength,k)
	#sample_ix = sample(a0prev,inputs[int((vocabSize-1)*np.random.random(1))], sampleLength,k)
	txt = ''.join(ix_to_char[ix] for ix in sample_ix)
	print('----\n %s \n----' % (txt, ))

	myLoss, dy, dby, da0y, a0prev = funLoss(inputs,targets,a0prev)

	smooth_loss = smooth_loss * 0.999 + myLoss * 0.001

	if(k % dispIt == 0): #give us a training update
	print('iter %d, lR: %.3e loss: %f, smooth loss: %f' % (k, lR, myLoss, smooth_loss)) # print progress

	for param, dparam, mem in zip([Wry, by],#, thetaJ0_1, thetaJ1_1, ba1, theta0, thetaJ0_0, thetaJ1_0, ba0],
	[da0y, dby],#, da01, da11, dba1, dxa0, da00, da10, dba0],
	[mWry, mby]):#, mJ01, mJ11, mba1, mxa0, mJ00, mJ10, mba0]):
	mem += dparam * dparam
	param += -lR * dparam / np.sqrt(mem + 1e-8) #Adagrad update. need to look this up:
	p += seqLength
	lR *= (1-lRDecay)
	n += 1