inferrna/convert2lmdb.py

## convert2lmdb.py
# Usage: python3.5 -i convert2lmdb.py train_samples test_samples /tmp/data.npy /tmp/result.npy trainsin_lmdb testsin_lmdb
# Example: python3.5 -i convert2lmdb.py 1000 -1 /tmp/data.npy /tmp/result.npy trainsin_lmdb testsin_lmdb

import numpy as np
import lmdb
import caffe
import sys


def np2lmdb(fname, arr):
    map_size = max(arr.nbytes*10,  65536*1024)
    env = lmdb.open(fname, map_size=map_size)
# txn is a Transaction object
    with env.begin(write=True) as txn:
        for i in range(0, len(arr)):
            str_id = '{:08}'.format(i)
            datum = caffe.io.array_to_datum(arr[i])
            # The encode is only essential in Python 3
            txn.put(str_id.encode('ascii'), datum.SerializeToString())\
        #envd.close()

_N, _n, fnmdata, fnmres, fnmtrain, fnmtest = sys.argv[1:]

N = int(_N)
n = int(_n)

# Let's pretend this is interesting data
X = np.load(fnmdata)
y = np.load(fnmres)

if n==-1: n = X.shape[0] - N

assert X.shape[0] == y.shape[0], "Input data and result lens does not matched"
assert X.shape[0] > N, "Input data len is less than provided train len"
assert X.shape[0] >= N+n, "Input data len is less than train and test lengths together"

# We need to prepare the database for the size. We'll set it 10 times
# greater than what we theoretically need. There is little drawback to
# setting this too big. If you still run into problem after raising
# this, you might want to try saving fewer entries in a single
# transaction.
map_size = X[:N].nbytes * 10

#Train
newshape = list(X[:N].shape)
while len(newshape)<4:
    newshape.insert(1,1)
np2lmdb(fnmtrain+'d', X[:N].reshape(newshape))
newshape = list(y[:N].shape)
while len(newshape)<4:
    newshape.insert(1,1)
np2lmdb(fnmtrain+'r', y[:N].reshape(newshape))

#Test
newshape = list(X[N:N+n].shape)
while len(newshape)<4:
    newshape.insert(1,1)
np2lmdb(fnmtest+'d', X[N:N+n].reshape(newshape))
newshape = list(y[N:N+n].shape)
while len(newshape)<4:
    newshape.insert(1,1)
np2lmdb(fnmtest+'r', y[N:N+n].reshape(newshape))

## lenet_sin_auto_solver.prototxt
# The train/test net protocol buffer definition
train_net: "lenet_sin_auto_train.prototxt"
test_net: "lenet_sin_auto_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 5000
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.012
momentum: 0.0
weight_decay: 0.0005
# The learning rate policy
lr_policy: "step"
gamma: 0.999
stepsize: 100
power: 0.75
# Display every 100 iterations
display: 5000
# The maximum number of iterations
max_iter: 600
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "snaps/solve_sin"
solver_mode: GPU

## nrsine.py
import neurolab as nl
import numpy as np


N = 1099
n = 99

data = np.random.rand(N,1).astype(np.float32)
results = np.sin(data).astype(np.float32)

firstl  = data[0].size
secondl = 32
thirdl = 16
lastl = results[0].size
layers = [secondl, thirdl, lastl]

print("Layers is {0}".format(layers))
net = nl.net.newff(np.array([[0, 1]]*firstl), layers)
net.errorf = nl.error.MSE()
net.trainf = nl.train.train_rprop
inp = data[:-n]
tar = results[:-n]
error = net.train(inp, tar, epochs=120000, show=500, goal=0.000001)

netres = net.sim(data[-n:])
print("Mean error is {0}".format(np.abs(results[-n:]-netres).mean()))

## trainsin_caffe.py
import caffe
from caffe import layers as L
from caffe import params as P
import numpy as np
from PIL import Image
from time import sleep

def lenet(lmdb, batch_size):
    # our version of LeNet: a series of linear and simple nonlinear transformations
    n = caffe.NetSpec()
    n.data =  L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb+'d', ntop=1)
    n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb+'r', ntop=1)
    #print "Shapes is:", n.data.shape, n.label.shape
    #n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=64, weight_filler=dict(type='xavier'))
    #n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.ip1 = L.InnerProduct(n.data, num_output=32, weight_filler=dict(type='xavier'))
    n.ip2 = L.InnerProduct(n.ip1, num_output=16, weight_filler=dict(type='xavier'))
    n.ip3 = L.InnerProduct(n.ip2, num_output=1)
    n.loss = L.EuclideanLoss(n.ip3, n.label)
    return n.to_proto()

with open('lenet_sin_auto_train.prototxt', 'w') as f:
    f.write(str(lenet('trainsin_lmdb', 1000)))

with open('lenet_sin_auto_test.prototxt', 'w') as f:
    f.write(str(lenet('testsin_lmdb', 99)))

caffe.set_device(0)
caffe.set_mode_cpu()
solver = caffe.RMSPropSolver("lenet_sin_auto_solver.prototxt")
solver.step(13000000)
solver.net.forward()  # train net
solver.test_nets[0].forward()  # test net (there can be more than one)
	# Usage: python3.5 -i convert2lmdb.py train_samples test_samples /tmp/data.npy /tmp/result.npy trainsin_lmdb testsin_lmdb
	# Example: python3.5 -i convert2lmdb.py 1000 -1 /tmp/data.npy /tmp/result.npy trainsin_lmdb testsin_lmdb

	import numpy as np
	import lmdb
	import caffe
	import sys


	def np2lmdb(fname, arr):
	map_size = max(arr.nbytes10, 655361024)
	env = lmdb.open(fname, map_size=map_size)
	# txn is a Transaction object
	with env.begin(write=True) as txn:
	for i in range(0, len(arr)):
	str_id = '{:08}'.format(i)
	datum = caffe.io.array_to_datum(arr[i])
	# The encode is only essential in Python 3
	txn.put(str_id.encode('ascii'), datum.SerializeToString())\
	#envd.close()

	_N, _n, fnmdata, fnmres, fnmtrain, fnmtest = sys.argv[1:]

	N = int(_N)
	n = int(_n)

	# Let's pretend this is interesting data
	X = np.load(fnmdata)
	y = np.load(fnmres)

	if n==-1: n = X.shape[0] - N

	assert X.shape[0] == y.shape[0], "Input data and result lens does not matched"
	assert X.shape[0] > N, "Input data len is less than provided train len"
	assert X.shape[0] >= N+n, "Input data len is less than train and test lengths together"

	# We need to prepare the database for the size. We'll set it 10 times
	# greater than what we theoretically need. There is little drawback to
	# setting this too big. If you still run into problem after raising
	# this, you might want to try saving fewer entries in a single
	# transaction.
	map_size = X[:N].nbytes * 10

	#Train
	newshape = list(X[:N].shape)
	while len(newshape)<4:
	newshape.insert(1,1)
	np2lmdb(fnmtrain+'d', X[:N].reshape(newshape))
	newshape = list(y[:N].shape)
	while len(newshape)<4:
	newshape.insert(1,1)
	np2lmdb(fnmtrain+'r', y[:N].reshape(newshape))

	#Test
	newshape = list(X[N:N+n].shape)
	while len(newshape)<4:
	newshape.insert(1,1)
	np2lmdb(fnmtest+'d', X[N:N+n].reshape(newshape))
	newshape = list(y[N:N+n].shape)
	while len(newshape)<4:
	newshape.insert(1,1)
	np2lmdb(fnmtest+'r', y[N:N+n].reshape(newshape))
	# The train/test net protocol buffer definition
	train_net: "lenet_sin_auto_train.prototxt"
	test_net: "lenet_sin_auto_test.prototxt"
	# test_iter specifies how many forward passes the test should carry out.
	# In the case of MNIST, we have test batch size 100 and 100 test iterations,
	# covering the full 10,000 testing images.
	test_iter: 100
	# Carry out testing every 500 training iterations.
	test_interval: 5000
	# The base learning rate, momentum and the weight decay of the network.
	base_lr: 0.012
	momentum: 0.0
	weight_decay: 0.0005
	# The learning rate policy
	lr_policy: "step"
	gamma: 0.999
	stepsize: 100
	power: 0.75
	# Display every 100 iterations
	display: 5000
	# The maximum number of iterations
	max_iter: 600
	# snapshot intermediate results
	snapshot: 5000
	snapshot_prefix: "snaps/solve_sin"
	solver_mode: GPU
	import neurolab as nl
	import numpy as np


	N = 1099
	n = 99

	data = np.random.rand(N,1).astype(np.float32)
	results = np.sin(data).astype(np.float32)

	firstl = data[0].size
	secondl = 32
	thirdl = 16
	lastl = results[0].size
	layers = [secondl, thirdl, lastl]

	print("Layers is {0}".format(layers))
	net = nl.net.newff(np.array([[0, 1]]*firstl), layers)
	net.errorf = nl.error.MSE()
	net.trainf = nl.train.train_rprop
	inp = data[:-n]
	tar = results[:-n]
	error = net.train(inp, tar, epochs=120000, show=500, goal=0.000001)

	netres = net.sim(data[-n:])
	print("Mean error is {0}".format(np.abs(results[-n:]-netres).mean()))
	import caffe
	from caffe import layers as L
	from caffe import params as P
	import numpy as np
	from PIL import Image
	from time import sleep

	def lenet(lmdb, batch_size):
	# our version of LeNet: a series of linear and simple nonlinear transformations
	n = caffe.NetSpec()
	n.data = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb+'d', ntop=1)
	n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb+'r', ntop=1)
	#print "Shapes is:", n.data.shape, n.label.shape
	#n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=64, weight_filler=dict(type='xavier'))
	#n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
	n.ip1 = L.InnerProduct(n.data, num_output=32, weight_filler=dict(type='xavier'))
	n.ip2 = L.InnerProduct(n.ip1, num_output=16, weight_filler=dict(type='xavier'))
	n.ip3 = L.InnerProduct(n.ip2, num_output=1)
	n.loss = L.EuclideanLoss(n.ip3, n.label)
	return n.to_proto()

	with open('lenet_sin_auto_train.prototxt', 'w') as f:
	f.write(str(lenet('trainsin_lmdb', 1000)))

	with open('lenet_sin_auto_test.prototxt', 'w') as f:
	f.write(str(lenet('testsin_lmdb', 99)))

	caffe.set_device(0)
	caffe.set_mode_cpu()
	solver = caffe.RMSPropSolver("lenet_sin_auto_solver.prototxt")
	solver.step(13000000)
	solver.net.forward() # train net
	solver.test_nets[0].forward() # test net (there can be more than one)