nudles/.gitignore

## .gitignore
*.pyc

## README.md

      
    Raw
  

              README.md
            
          
    Train Char-RNN over Text

Recurrent neural networks (RNN) are widely used for modelling sequential data,
e.g., natural language sentences. This example describes how to implement a RNN
application (or model) using SINGA's RNN layers.
We will use the char-rnn model as an
example, which trains over sentences or
source code, with each character as an input unit. Particularly, we will train
a RNN using GRU over Linux kernel source code. After training, we expect to
generate meaningful code from the model.
SINGA version

Apache SINGA-beb5e72
Folder layout


README.md  This file.
train.py The training code.
serve.py The serving code which predicts the next 1000 characters given a seeding string.
index.html The webpage for displaying the prediction results and getting user's seeding string.

Instructions

Local mode

Currently the RNN implementation depends on Cudnn with version >= 5.05.
If you get 'Error: status == CUDNN_STATUS_SUCCESS (8 vs. 0) CUDNN_STATUS_EXECUTION_FAILED ',
please export the gpu card,
export CUDA_VISIBLE_DEVICES=0

Training

python train.py

Some hyper-parameters could be set through command line,
python train.py -h

Serving

Sample 1000 characters from the model by providing the seeding string.
python sample.py &
curl -i -F image=@'#incl' http://localhost:9999/api
curl -i -F image=@'int i' http://localhost:9999/api

To see other command line arguments, please run
python sample.py -h


## serve.py
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
'''Sample characters from the pre-trained model'''
import sys
import cPickle as pickle
import numpy as np
import argparse
import urllib2
import traceback
import time
import sys

from singa import layer
from singa import tensor
from singa import device
from singa.proto import model_pb2
from rafiki.agent import Agent, MsgType


def sample(model_url, nsamples, seed_text, do_sample=True, port=9999):
    d = pickle.load(urllib2.urlopen(model_url))
    rnn_w = tensor.from_numpy(d['rnn_w'])
    idx_to_char = d['idx_to_char']
    char_to_idx = d['char_to_idx']
    vocab_size = len(idx_to_char)
    dense_w = tensor.from_numpy(d['dense_w'].T)
    dense_b = tensor.from_numpy(d['dense_b'])
    hidden_size = d['hidden_size']
    num_stacks = d['num_stacks']
    dropout = d['dropout']

    cuda = device.create_cuda_gpu()
    rnn = layer.LSTM(name='lstm', hidden_size=hidden_size,
                     num_stacks=num_stacks, dropout=dropout,
                     input_sample_shape=(len(idx_to_char),))
    rnn.to_device(cuda)
    rnn.param_values()[0].copy_data(rnn_w)
    dense = layer.Dense('dense', vocab_size, input_sample_shape=(hidden_size,))
    dense.to_device(cuda)
    dense.param_values()[0].copy_data(dense_w)
    dense.param_values()[1].copy_data(dense_b)
    hx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
    cx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
    hx.set_value(0.0)
    cx.set_value(0.0)

    agent = Agent(port=port)
    while True:
        key, val = agent.pull()
        if key is None:
            time.sleep(0.1)
            continue
        msg_type = MsgType.parse(key)
        if msg_type.is_request():
            try:
                response = seed_text
                if len(seed_text) > 0:
                    for c in seed_text:
                        x = np.zeros((1, vocab_size), dtype=np.float32)
                        x[0, char_to_idx[c]] = 1
                        tx = tensor.from_numpy(x)
                        tx.to_device(cuda)
                        inputs = [tx, hx, cx]
                        outputs = rnn.forward(model_pb2.kEval, inputs)
                        y = dense.forward(model_pb2.kEval, outputs[0])
                        y = tensor.softmax(y)
                        hx = outputs[1]
                        cx = outputs[2]
                else:
                    y = tensor.Tensor((1, vocab_size), cuda)
                    y.set_value(1.0 / vocab_size)

                for i in range(nsamples):
                    y.to_host()
                    prob = tensor.to_numpy(y)[0]
                    if do_sample:
                        cur = np.random.choice(vocab_size, 1, p=prob)[0]
                    else:
                        cur = np.argmax(prob)
                    response += idx_to_char[cur]
                    x = np.zeros((1, vocab_size), dtype=np.float32)
                    x[0, cur] = 1
                    tx = tensor.from_numpy(x)
                    tx.to_device(cuda)
                    inputs = [tx, hx, cx]
                    outputs = rnn.forward(model_pb2.kEval, inputs)
                    y = dense.forward(model_pb2.kEval, outputs[0])
                    y = tensor.softmax(y)
                    hx = outputs[1]
                    cx = outputs[2]
                agent.push(MsgType.kResponse, response)
        elif MsgType.kCommandStop.equal(msg_type):
            print 'Get stop command'
            agent.push(MsgType.kStatus, 'Success')
            break
        else:
            print 'get unsupported message %s' % str(msg_type)
            agent.push(MsgType.kStatus, "Unknown command")
            break
    # while loop
    print "server stop"

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='sample chars from char-rnn')
    parser.add_argument('seed', help='seed text string which warms up the '
                        ' rnn states for sampling', default='')
    parser.add_argument('-n', type=int, default=1000, help='num of characters to sample')
    args = parser.parse_args()
    assert args.n > 0, 'n must > 0'
    model_url = 'http://www.comp.nus.edu.sg/~wangwei/assets/file/char-rnn.bin'
    sample(model_url, args.n, seed_text=args.seed)

## train.py
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
import cPickle as pickle
import numpy as np
import argparse
import urllib2
import traceback
import time
import sys

from singa import layer
from singa import loss
from singa import device
from singa import tensor
from singa import optimizer
from singa import initializer
from singa.proto import model_pb2
from singa import utils
from rafiki.agent import Agent, MsgType


class Data(object):

    def __init__(self, file_url, batch_size=32, seq_length=100, train_ratio=0.8):
        '''Data object for loading a plain text file.

        Args:
            fpath, path to the text file.
            train_ratio, split the text file into train and test sets, where
                train_ratio of the characters are in the train set.
        '''
        self.raw_data = urllib2.urlopen(file_url).read()
        chars = list(set(self.raw_data))
        self.vocab_size = len(chars)
        self.char_to_idx = {ch: i for i, ch in enumerate(chars)}
        self.idx_to_char = {i: ch for i, ch in enumerate(chars)}
        data = [self.char_to_idx[c] for c in self.raw_data]
        # seq_length + 1 for the data + label
        nsamples = len(data) / (1 + seq_length)
        data = data[0:nsamples * (1 + seq_length)]
        data = np.asarray(data, dtype=np.int32)
        data = np.reshape(data, (-1, seq_length + 1))
        # shuffle all sequences
        np.random.shuffle(data)
        self.train_dat = data[0:int(data.shape[0]*train_ratio)]
        self.num_train_batch = self.train_dat.shape[0] / batch_size
        self.val_dat = data[self.train_dat.shape[0]:]
        self.num_test_batch = self.val_dat.shape[0] / batch_size
        print 'train dat', self.train_dat.shape
        print 'val dat', self.val_dat.shape


def numpy2tensors(npx, npy, dev):
    '''batch, seq, dim -- > seq, batch, dim'''
    tmpx = np.swapaxes(npx, 0, 1)
    tmpy = np.swapaxes(npy, 0, 1)
    inputs = []
    labels = []
    for t in range(tmpx.shape[0]):
        x = tensor.from_numpy(tmpx[t])
        y = tensor.from_numpy(tmpy[t])
        x.to_device(dev)
        y.to_device(dev)
        inputs.append(x)
        labels.append(y)
    return inputs, labels


def convert(batch, batch_size, seq_length, vocab_size, dev):
    '''convert a batch of data into a sequence of input tensors'''
    y = batch[:, 1:]
    x1 = batch[:, :seq_length]
    x = np.zeros((batch_size, seq_length, vocab_size), dtype=np.float32)
    for b in range(batch_size):
        for t in range(seq_length):
            c = x1[b, t]
            x[b, t, c] = 1
    return numpy2tensors(x, y, dev)


def get_lr(epoch):
    return 0.001 / float(1 << (epoch / 50))


def handle_cmd(agent):
    pause = False
    stop = False
    while not stop:
        key, val = agent.pull()
        if key is not None:
            msg_type = MsgType.parse(key)
            if msg_type.is_command():
                if MsgType.kCommandPause.equal(msg_type):
                    agent.push(MsgType.kStatus,"Success")
                    pause = True
                elif MsgType.kCommandResume.equal(msg_type):
                    agent.push(MsgType.kStatus, "Success")
                    pause = False
                elif MsgType.kCommandStop.equal(msg_type):
                    agent.push(MsgType.kStatus,"Success")
                    stop = True
                else:
                    agent.push(MsgType.kStatus,"Warning, unkown message type")
                    print "Unsupported command %s" % str(msg)
        if pause and not stop:
            time.sleep(0.1)
        else:
            break
    return stop

def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
          num_stacks=1, dropout=0.5, model_path='model', port=9999):
    # SGD with L2 gradient normalization
    opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))
    cuda = device.create_cuda_gpu()
    rnn = layer.LSTM(
        name='lstm',
        hidden_size=hidden_size,
        num_stacks=num_stacks,
        dropout=dropout,
        input_sample_shape=(
            data.vocab_size,
        ))
    rnn.to_device(cuda)
    rnn_w = rnn.param_values()[0]
    rnn_w.uniform(-0.08, 0.08)  # init all rnn parameters
    dense = layer.Dense(
        'dense',
        data.vocab_size,
        input_sample_shape=(
            hidden_size,
        ))
    dense.to_device(cuda)
    dense_w = dense.param_values()[0]
    dense_b = dense.param_values()[1]
    print 'dense w ', dense_w.shape
    print 'dense b ', dense_b.shape
    initializer.uniform(dense_w, dense_w.shape[0], 0)
    dense_b.set_value(0)

    g_dense_w = tensor.Tensor(dense_w.shape, cuda)
    g_dense_b = tensor.Tensor(dense_b.shape, cuda)

    agent = Agent(port)
    lossfun = loss.SoftmaxCrossEntropy()
    for epoch in range(max_epoch):
        train_loss = 0
        for b in range(data.num_train_batch):
            batch = data.train_dat[b * batch_size: (b + 1) * batch_size]
            inputs, labels = convert(batch, batch_size, seq_length,
                                     data.vocab_size, cuda)
            inputs.append(tensor.Tensor())
            inputs.append(tensor.Tensor())

            outputs = rnn.forward(model_pb2.kTrain, inputs)[0:-2]
            grads = []
            batch_loss = 0
            g_dense_w.set_value(0.0)
            g_dense_b.set_value(0.0)
            for output, label in zip(outputs, labels):
                act = dense.forward(model_pb2.kTrain, output)
                lvalue = lossfun.forward(model_pb2.kTrain, act, label)
                batch_loss += lvalue.l1()
                grad = lossfun.backward()
                grad /= batch_size
                grad, gwb = dense.backward(model_pb2.kTrain, grad)
                grads.append(grad)
                g_dense_w += gwb[0]
                g_dense_b += gwb[1]
                # print output.l1(), act.l1()
            utils.update_progress(
                b * 1.0 / data.num_train_batch, 'training loss = %f' %
                (batch_loss / seq_length))
            train_loss += batch_loss / seq_length

            grads.append(tensor.Tensor())
            grads.append(tensor.Tensor())
            g_rnn_w = rnn.backward(model_pb2.kTrain, grads)[1][0]
            dense_w, dense_b = dense.param_values()
            opt.apply_with_lr(epoch, get_lr(epoch), g_rnn_w, rnn_w, 'rnnw')
            opt.apply_with_lr(
                epoch, get_lr(epoch),
                g_dense_w, dense_w, 'dense_w')
            opt.apply_with_lr(
                epoch, get_lr(epoch),
                g_dense_b, dense_b, 'dense_b')
            if (b+1) % 20 == 0:
                info = dict(phase='train', step=epoch, loss=train_loss / b,
                    timestamp=time.time())
                agent.push(MsgType.kInfoMetric, info)
                if handle_cmd(agent):
                    break
        avg_loss = train_loss / data.num_train_batch
        print '\nEpoch %d, train loss is %f' %  (epoch, avg_loss)
        eval_loss = 0
        for b in range(data.num_test_batch):
            batch = data.val_dat[b * batch_size: (b + 1) * batch_size]
            inputs, labels = convert(batch, batch_size, seq_length,
                                     data.vocab_size, cuda)
            inputs.append(tensor.Tensor())
            inputs.append(tensor.Tensor())
            outputs = rnn.forward(model_pb2.kEval, inputs)[0:-2]
            for output, label in zip(outputs, labels):
                output = dense.forward(model_pb2.kEval, output)
                eval_loss += lossfun.forward(model_pb2.kEval,
                                             output, label).l1()
        avg_loss = eval_loss / data.num_test_batch / seq_length
        print 'Epoch %d, evaluation loss is %f' % (epoch, avg_loss)
        info = dict(phase='test', step=epoch, loss=avg_loss,
                timestamp=time.time())
        agent.push(MsgType.kInfoMetric, info)

        if (epoch + 1) % 30 == 0:
            # checkpoint the file model
            with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:
                print 'saving model to %s' % model_path
                d = {}
                for name, w in zip(
                        ['rnn_w', 'dense_w', 'dense_b'],
                        [rnn_w, dense_w, dense_b]):
                    w.to_host()
                    d[name] = tensor.to_numpy(w)
                    w.to_device(cuda)
                d['idx_to_char'] = data.idx_to_char
                d['char_to_idx'] = data.char_to_idx
                d['hidden_size'] = hidden_size
                d['num_stacks'] = num_stacks
                d['dropout'] = dropout

                pickle.dump(d, fd)

    agent.stop()

if __name__ == '__main__':
    try:
        parser = argparse.ArgumentParser( description='Train multi-stack LSTM')
        # parser.add_argument('data', type=str, help='training file')
        parser.add_argument('-b', type=int, default=32, help='batch_size')
        parser.add_argument('-l', type=int, default=64, help='sequence length')
        parser.add_argument('-d', type=int, default=128, help='hidden size')
        parser.add_argument('-s', type=int, default=2, help='num of stacks')
        parser.add_argument('-m', type=int, default=80, help='max num of epoch')
        args = parser.parse_args()
        file_url = 'http://cs.stanford.edu/people/karpathy/char-rnn/linux_input.txt'
        data = Data(file_url, batch_size=args.b, seq_length=args.l)
        train(data, args.m, hidden_size=args.d, num_stacks=args.s,
                seq_length=args.l, batch_size=args.b)
    except SystemExit:
        pass
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# =============================================================================
	'''Sample characters from the pre-trained model'''
	import sys
	import cPickle as pickle
	import numpy as np
	import argparse
	import urllib2
	import traceback
	import time
	import sys

	from singa import layer
	from singa import tensor
	from singa import device
	from singa.proto import model_pb2
	from rafiki.agent import Agent, MsgType


	def sample(model_url, nsamples, seed_text, do_sample=True, port=9999):
	d = pickle.load(urllib2.urlopen(model_url))
	rnn_w = tensor.from_numpy(d['rnn_w'])
	idx_to_char = d['idx_to_char']
	char_to_idx = d['char_to_idx']
	vocab_size = len(idx_to_char)
	dense_w = tensor.from_numpy(d['dense_w'].T)
	dense_b = tensor.from_numpy(d['dense_b'])
	hidden_size = d['hidden_size']
	num_stacks = d['num_stacks']
	dropout = d['dropout']

	cuda = device.create_cuda_gpu()
	rnn = layer.LSTM(name='lstm', hidden_size=hidden_size,
	num_stacks=num_stacks, dropout=dropout,
	input_sample_shape=(len(idx_to_char),))
	rnn.to_device(cuda)
	rnn.param_values()[0].copy_data(rnn_w)
	dense = layer.Dense('dense', vocab_size, input_sample_shape=(hidden_size,))
	dense.to_device(cuda)
	dense.param_values()[0].copy_data(dense_w)
	dense.param_values()[1].copy_data(dense_b)
	hx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
	cx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
	hx.set_value(0.0)
	cx.set_value(0.0)

	agent = Agent(port=port)
	while True:
	key, val = agent.pull()
	if key is None:
	time.sleep(0.1)
	continue
	msg_type = MsgType.parse(key)
	if msg_type.is_request():
	try:
	response = seed_text
	if len(seed_text) > 0:
	for c in seed_text:
	x = np.zeros((1, vocab_size), dtype=np.float32)
	x[0, char_to_idx[c]] = 1
	tx = tensor.from_numpy(x)
	tx.to_device(cuda)
	inputs = [tx, hx, cx]
	outputs = rnn.forward(model_pb2.kEval, inputs)
	y = dense.forward(model_pb2.kEval, outputs[0])
	y = tensor.softmax(y)
	hx = outputs[1]
	cx = outputs[2]
	else:
	y = tensor.Tensor((1, vocab_size), cuda)
	y.set_value(1.0 / vocab_size)

	for i in range(nsamples):
	y.to_host()
	prob = tensor.to_numpy(y)[0]
	if do_sample:
	cur = np.random.choice(vocab_size, 1, p=prob)[0]
	else:
	cur = np.argmax(prob)
	response += idx_to_char[cur]
	x = np.zeros((1, vocab_size), dtype=np.float32)
	x[0, cur] = 1
	tx = tensor.from_numpy(x)
	tx.to_device(cuda)
	inputs = [tx, hx, cx]
	outputs = rnn.forward(model_pb2.kEval, inputs)
	y = dense.forward(model_pb2.kEval, outputs[0])
	y = tensor.softmax(y)
	hx = outputs[1]
	cx = outputs[2]
	agent.push(MsgType.kResponse, response)
	elif MsgType.kCommandStop.equal(msg_type):
	print 'Get stop command'
	agent.push(MsgType.kStatus, 'Success')
	break
	else:
	print 'get unsupported message %s' % str(msg_type)
	agent.push(MsgType.kStatus, "Unknown command")
	break
	# while loop
	print "server stop"

	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='sample chars from char-rnn')
	parser.add_argument('seed', help='seed text string which warms up the '
	' rnn states for sampling', default='')
	parser.add_argument('-n', type=int, default=1000, help='num of characters to sample')
	args = parser.parse_args()
	assert args.n > 0, 'n must > 0'
	model_url = 'http://www.comp.nus.edu.sg/~wangwei/assets/file/char-rnn.bin'
	sample(model_url, args.n, seed_text=args.seed)