sunsided/caffe-lstm.py

## caffe-lstm.py
# http://christopher5106.github.io/deep/learning/2016/06/07/recurrent-neural-net-with-Caffe.html
# https://github.com/BVLC/caffe/pull/3948

# https://github.com/junhyukoh/caffe-lstm/blob/master/examples/lstm_sequence/lstm_sequence.cpp
# https://github.com/BVLC/caffe/issues/4547

import caffe
import numpy as np
import matplotlib.pyplot as plt

# noinspection PyUnresolvedReferences
import seaborn as sns

# generate data
a = np.arange(0, 32, 0.01)
d = 0.5 * np.sin(2 * a) - 0.05 * np.cos(17 * a + 0.8) + 0.05 * np.sin(25 * a + 10) - 0.02 * np.cos(45 * a + 0.3)
d = d / max(np.max(d), -np.min(d))
d = d - np.mean(d)

caffe.set_mode_gpu()
solver = caffe.SGDSolver('solver.prototxt')

# train the network
print('Training network ...')
#niter = 5000
niter = 500
train_loss = np.zeros(niter)

# Set the bias to the forget gate to 5.0 as explained in the clockwork RNN paper
solver.net.params['lstm1'][2].data[15:30] = 5

solver.net.blobs['clip'].data[...] = 1
for i in range(niter):
    seq_idx = i % (len(d) / 320)
    solver.net.blobs['clip'].data[0] = seq_idx > 0
    solver.net.blobs['label'].data[:, 0] = d[seq_idx * 320: (seq_idx + 1) * 320]
    solver.step(1)
    train_loss[i] = solver.net.blobs['loss'].data

print('Done training network.')

# TODO: Losses are bad

# plot the training loss
plt.plot(np.arange(niter), train_loss)
plt.show()

# TODO: It will fail below this line

# test the network
print('Testing network ...')
solver.test_nets[0].blobs['data'].reshape(2, 1)
solver.test_nets[0].blobs['clip'].reshape(2, 1)
solver.test_nets[0].reshape()
solver.test_nets[0].blobs['clip'].data[...] = 1
preds = np.zeros(len(d))
for i in range(len(d)):
    solver.test_nets[0].blobs['clip'].data[0] = i > 0
    preds[i] = solver.test_nets[0].forward()['ip1'][0][0]

print('Done testing network.')

# plot the training output
plt.plot(np.arange(len(d)), preds)
plt.plot(np.arange(len(d)), d)
plt.show()

## lstm.prototxt
name: "LSTM"

# T = 320 time_steps, N = 1 streams, 1-D data
input: "data"
input_shape { dim: 320 dim: 1 dim: 1 }

input: "clip"
input_shape { dim: 320 dim: 1 }

input: "label"
input_shape { dim: 320 dim: 1 }

layer {
  name: "Silence"
  type: "Silence"
  bottom: "label"
  include: { phase: TEST }
}
layer {
  name: "lstm1"
  type: "LSTM"
  bottom: "data"
  bottom: "clip"
  top: "lstm1"

  recurrent_param {
    num_output: 15
    weight_filler {
      type: "uniform"
      min: -0.01
      max: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "ip1"
  type: "InnerProduct"
  bottom: "lstm1"
  top: "ip1"

  inner_product_param {
    num_output: 1
    weight_filler {
      type: "gaussian"
      std: 0.1
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "loss"
  type: "EuclideanLoss"
  bottom: "ip1"
  bottom: "label"
  top: "loss"
  include: { phase: TRAIN }
}

## solver.prototxt
net: "lstm.prototxt"
test_iter: 10
test_interval: 2000000
base_lr: 0.0001
momentum: 0.95
clip_gradients: 0.1
lr_policy: "fixed"
display: 200
max_iter: 100000
solver_mode: CPU
average_loss: 200
debug_info: false
	# http://christopher5106.github.io/deep/learning/2016/06/07/recurrent-neural-net-with-Caffe.html
	# https://github.com/BVLC/caffe/pull/3948

	# https://github.com/junhyukoh/caffe-lstm/blob/master/examples/lstm_sequence/lstm_sequence.cpp
	# https://github.com/BVLC/caffe/issues/4547

	import caffe
	import numpy as np
	import matplotlib.pyplot as plt

	# noinspection PyUnresolvedReferences
	import seaborn as sns

	# generate data
	a = np.arange(0, 32, 0.01)
	d = 0.5 * np.sin(2 * a) - 0.05 * np.cos(17 * a + 0.8) + 0.05 * np.sin(25 * a + 10) - 0.02 * np.cos(45 * a + 0.3)
	d = d / max(np.max(d), -np.min(d))
	d = d - np.mean(d)

	caffe.set_mode_gpu()
	solver = caffe.SGDSolver('solver.prototxt')

	# train the network
	print('Training network ...')
	#niter = 5000
	niter = 500
	train_loss = np.zeros(niter)

	# Set the bias to the forget gate to 5.0 as explained in the clockwork RNN paper
	solver.net.params['lstm1'][2].data[15:30] = 5

	solver.net.blobs['clip'].data[...] = 1
	for i in range(niter):
	seq_idx = i % (len(d) / 320)
	solver.net.blobs['clip'].data[0] = seq_idx > 0
	solver.net.blobs['label'].data[:, 0] = d[seq_idx * 320: (seq_idx + 1) * 320]
	solver.step(1)
	train_loss[i] = solver.net.blobs['loss'].data

	print('Done training network.')

	# TODO: Losses are bad

	# plot the training loss
	plt.plot(np.arange(niter), train_loss)
	plt.show()

	# TODO: It will fail below this line

	# test the network
	print('Testing network ...')
	solver.test_nets[0].blobs['data'].reshape(2, 1)
	solver.test_nets[0].blobs['clip'].reshape(2, 1)
	solver.test_nets[0].reshape()
	solver.test_nets[0].blobs['clip'].data[...] = 1
	preds = np.zeros(len(d))
	for i in range(len(d)):
	solver.test_nets[0].blobs['clip'].data[0] = i > 0
	preds[i] = solver.test_nets[0].forward()['ip1'][0][0]

	print('Done testing network.')

	# plot the training output
	plt.plot(np.arange(len(d)), preds)
	plt.plot(np.arange(len(d)), d)
	plt.show()
	name: "LSTM"

	# T = 320 time_steps, N = 1 streams, 1-D data
	input: "data"
	input_shape { dim: 320 dim: 1 dim: 1 }

	input: "clip"
	input_shape { dim: 320 dim: 1 }

	input: "label"
	input_shape { dim: 320 dim: 1 }

	layer {
	name: "Silence"
	type: "Silence"
	bottom: "label"
	include: { phase: TEST }
	}
	layer {
	name: "lstm1"
	type: "LSTM"
	bottom: "data"
	bottom: "clip"
	top: "lstm1"

	recurrent_param {
	num_output: 15
	weight_filler {
	type: "uniform"
	min: -0.01
	max: 0.01
	}
	bias_filler {
	type: "constant"
	value: 0
	}
	}
	}
	layer {
	name: "ip1"
	type: "InnerProduct"
	bottom: "lstm1"
	top: "ip1"

	inner_product_param {
	num_output: 1
	weight_filler {
	type: "gaussian"
	std: 0.1
	}
	bias_filler {
	type: "constant"
	}
	}
	}
	layer {
	name: "loss"
	type: "EuclideanLoss"
	bottom: "ip1"
	bottom: "label"
	top: "loss"
	include: { phase: TRAIN }
	}
	net: "lstm.prototxt"
	test_iter: 10
	test_interval: 2000000
	base_lr: 0.0001
	momentum: 0.95
	clip_gradients: 0.1
	lr_policy: "fixed"
	display: 200
	max_iter: 100000
	solver_mode: CPU
	average_loss: 200
	debug_info: false