Skip to content

Instantly share code, notes, and snippets.

@keppel
Last active May 14, 2017 02:12
Show Gist options
  • Save keppel/19e2a095cbc08d26ee418c0b7bc16c23 to your computer and use it in GitHub Desktop.
Save keppel/19e2a095cbc08d26ee418c0b7bc16c23 to your computer and use it in GitHub Desktop.
evolving a character-level GRU in vanilla ndarray
/*
evolving a character-level RNN (GRU)
in vanilla ndarray
*/
let ndarray = require('ndarray')
let ops = require('ndarray-ops')
let nj = require('numjs') // just discovered this. todo: rewrite gru to use nj methods
let gaussian = require('gaussian')
let distribution = gaussian(0, 1)
let { gemv } = require('ndarray-blas-level2')
function unpack(arr, num) {
if (num > arr.length) {
throw new Error('tried to get too many parameters')
}
return arr.splice(arr.length - num, num)
}
function jitter(p, sigma) {
let noise = Array(p.length).fill(0).map(() => distribution.ppf(Math.random()))
let params = p.map((v, k) => v + sigma * noise[k])
return { noise, params }
}
function buildModel(initialParams, hiddenSize, dictSize, gruParamSize) {
let params = initialParams.slice()
let rnn = gru(unpack(params, gruParamSize), hiddenSize, dictSize)
let w1 = ndarray(unpack(params, hiddenSize * dictSize), [
dictSize,
hiddenSize
])
return { rnn, w1 }
}
function forward(m, x, h0) {
let h1 = m.rnn(x, h0)
let y = softmax(ff(m.w1, h1))
return [y, h1]
}
function ff(w, x, b) {
let y = ndarray(Array(w.shape[0]).fill(0))
if (b) {
ops.assign(y, b)
}
gemv(1, w, x, 1, y)
return y
}
function gruInit(hiddenSize, xSize) {
return Array(6 * hiddenSize)
.fill(0)
.concat(
Array(3 * xSize * hiddenSize + 3 * hiddenSize * hiddenSize)
.fill(0)
.map(
() => distribution.ppf(Math.random()) / Math.sqrt(xSize * hiddenSize)
)
)
}
function gru(params, hiddenSize, xSize) {
// params required: 3(x * h) + 3(h * h) + 6(h)
let paramsRequired =
3 * xSize * hiddenSize + 3 * hiddenSize * hiddenSize + 6 * hiddenSize
let bir = ndarray(unpack(params, hiddenSize))
let bin = ndarray(unpack(params, hiddenSize))
let bii = ndarray(unpack(params, hiddenSize))
let bhn = ndarray(unpack(params, hiddenSize))
let bhr = ndarray(unpack(params, hiddenSize))
let bhi = ndarray(unpack(params, hiddenSize))
let Wir = ndarray(unpack(params, xSize * hiddenSize), [hiddenSize, xSize])
let Whr = ndarray(unpack(params, hiddenSize * hiddenSize), [
hiddenSize,
hiddenSize
])
let Wii = ndarray(unpack(params, xSize * hiddenSize), [hiddenSize, xSize])
let Whi = ndarray(unpack(params, hiddenSize * hiddenSize), [
hiddenSize,
hiddenSize
])
let Win = ndarray(unpack(params, xSize * hiddenSize), [hiddenSize, xSize])
let Whn = ndarray(unpack(params, hiddenSize * hiddenSize), [
hiddenSize,
hiddenSize
])
return (x, h0) => {
// rt=sigmoid(Wirxt+bir+Whrh(t−1)+bhr)
let rt = ndarray(Array(hiddenSize).fill(0))
ops.add(rt, ff(Wir, x), ff(Whr, h0), bir, bhr)
rt = sigmoid(rt)
// it=sigmoid(Wiixt+bii+Whih(t−1)+bhi)
let it = ndarray(Array(hiddenSize).fill(0))
ops.add(it, ff(Wii, x), ff(Whi, h0), bii, bhi)
it = sigmoid(it)
// nt=tanh(Winxt+bin+rt∗(Whnh(t−1)+bhn))
let nt = ndarray(Array(hiddenSize).fill(0))
ops.mul(rt, rt, ff(Whn, h0, bhn))
ops.add(nt, ff(Win, x), rt, bin)
nt = tanh(nt)
// ht=(1−it)∗nt+it∗h(t−1)
let h1 = ndarray(Array(hiddenSize).fill(0))
h1.data.forEach((v, k) => {
h1.data[k] += (1 - it.data[k]) * nt.data[k] + it.data[k] * h0.data[k]
})
return h1
}
}
function tanh(ndarr) {
return ndarray(ndarr.data.slice().map(v => Math.tanh(v)), ndarr.shape)
}
function sigmoid(ndarr) {
return ndarray(
ndarr.data.slice().map(v => 1 / (1 + Math.exp(-v))),
ndarr.shape
)
}
const EOS = '∆'
function buildDictionaries(corpus) {
let charactersSeen = new Set()
let charToIndex = { [EOS]: 0 }
let indexToChar = { 0: EOS }
charactersSeen.add(EOS)
for (let i = 0; i < corpus.length; i++) {
if (!charactersSeen.has(corpus[i])) {
charToIndex[corpus[i]] = charactersSeen.size
indexToChar[charactersSeen.size] = corpus[i]
}
charactersSeen.add(corpus[i])
}
return { charToIndex, indexToChar, size: charactersSeen.size }
}
function onehot(size, index) {
let embedded = ndarray(Array(size).fill(0))
embedded.set(index, 1)
return embedded
}
function softmax(nd) {
let expSum = 0
nd.data.forEach(n => {
expSum += Math.exp(n)
})
let softmaxedValues = nd.data.map(n => Math.exp(n) / expSum)
return ndarray(softmaxedValues)
}
// let data =
// 'how does a bastard, orphan, son of a whore and a scotsman, dropped in the middle of a forgotten spot'
let data = 'generate me'
// let data = 'yolo'
// let data = '1234'
let dicts = buildDictionaries(data)
let hiddenSize = 10
let gruInitialParams = gruInit(hiddenSize, dicts.size)
let initialParams = Array(hiddenSize * dicts.size)
.fill(0)
.map(() => distribution.ppf(Math.random()))
.concat(gruInitialParams)
function fitness(params) {
let model = buildModel(
params,
hiddenSize,
dicts.size,
gruInitialParams.length
)
let h = ndarray(Array(hiddenSize).fill(0))
let score = 0
let text = data + EOS
for (let i = 0; i < text.length - 1; i++) {
// off-by-one judd, fix this
let index = dicts.charToIndex[text[i]]
let indexNext = dicts.charToIndex[text[i + 1]]
let x = onehot(dicts.size, index)
let out = forward(model, x, h)
y = out[0]
h = out[1]
score += Math.log(y.get(indexNext))
}
return score / text.length
}
function adam(beta1 = 0.9, beta2 = 0.999, eps = 1e-8) {
let m = []
let v = []
return (gradient, key) => {
m[key] = beta1 * (m[key] || 0) + (1 - beta1) * gradient
v[key] = beta2 * (v[key] || 0) + (1 - beta2) * Math.pow(gradient, 2)
return m[key] / (Math.sqrt(v[key]) + eps)
}
}
let headParams = initialParams.slice()
let normalizationWindow = 50
let sigma = 0.01
let alpha = 0.001
let mean = 0
let sd = 1e-8
let optimizer = adam()
for (let i = 0; i < 50000; i++) {
let { params, noise } = jitter(headParams, sigma)
let score = fitness(params)
const n = 1 / normalizationWindow
mean = (1 - n) * mean + n * score
sd = (1 - n) * sd + n * Math.pow(score - mean, 2)
let advantage = (score - mean) / (sd + 1e-8)
if (i > normalizationWindow) {
headParams.forEach((v, k) => {
headParams[k] +=
alpha *
optimizer(advantage * noise[k], k) /
(sigma * normalizationWindow)
// weight decay
// headParams[k] *= 1 - 1e-5
})
}
if (!(i % 100)) {
console.log(mean)
}
}
function sample(params, seed) {
let model = buildModel(
params,
hiddenSize,
dicts.size,
gruInitialParams.length
)
let maxLength = 120
let h = ndarray(Array(hiddenSize).fill(0))
let generated = seed
while (
generated[generated.length - 1] !== EOS && generated.length < maxLength
) {
let index = dicts.charToIndex[generated[generated.length - 1]]
let x = onehot(dicts.size, index)
let out = forward(model, x, h)
y = out[0]
h = out[1]
generated += dicts.indexToChar[ops.argmax(y)]
}
return generated
}
console.log(sample(headParams, 'g'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment