Last active
December 29, 2015 11:37
-
-
Save ssampang/6364265fe9cfed8d749b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cPickle, time, random, operator, os, math, numpy as np | |
import lasagne, theano, theano.tensor as T | |
#learning_rate = 0.01 | |
#momentum = 0.9 | |
class HBNet: | |
batch_size = 256 | |
def __init__(self, name, network, output_layer, input_var, reg_layers, learning_rate, lr_decay, momentum, reg_lambda, training_sets): | |
self.name = name | |
self.network = network | |
self.output_layer = output_layer | |
self.original_weights = lasagne.layers.get_all_param_values(self.output_layer) | |
self.reg_layers = reg_layers | |
self.original_learning_rate = np.array(learning_rate, dtype = theano.config.floatX) | |
self.learning_rate = theano.shared( self.original_learning_rate ) | |
self.lr_decay = np.array(lr_decay, dtype = theano.config.floatX) | |
self.momentum = momentum | |
self.reg_lambda = reg_lambda | |
self.training_sets = training_sets | |
self.input_var = input_var | |
self.target_var = T.imatrix('targets') | |
self.dir_name = self.name + '-' + str(self.reg_lambda).replace('.','_') | |
self.train_fn, self.val_fn = self.setup_functions() | |
def iterate_minibatches(self, inputs, targets, batchsize, shuffle=False): | |
assert len(inputs) == len(targets) | |
if shuffle: | |
indices = np.arange(len(inputs)) | |
np.random.shuffle(indices) | |
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): | |
if shuffle: | |
excerpt = indices[start_idx:start_idx + batchsize] | |
else: | |
excerpt = slice(start_idx, start_idx + batchsize) | |
yield inputs[excerpt], targets[excerpt] | |
def save_network(self, filename,param_values): | |
f = file(self.dir_name + '/' + filename, 'wb') | |
cPickle.dump(param_values,f,protocol=cPickle.HIGHEST_PROTOCOL) | |
f.close() | |
def setup_functions(self): | |
prediction = lasagne.layers.get_output(self.output_layer) | |
loss = lasagne.objectives.binary_crossentropy(prediction,self.target_var) | |
loss = loss.mean() | |
regularization = lasagne.regularization.regularize_layer_params(self.reg_layers, lasagne.regularization.l2) | |
loss = loss + self.reg_lambda*regularization | |
params = lasagne.layers.get_all_params(self.output_layer, trainable=True) | |
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate = self.learning_rate, momentum=self.momentum) | |
test_prediction = lasagne.layers.get_output(self.output_layer, deterministic=True) | |
test_loss = lasagne.objectives.binary_crossentropy(test_prediction,self.target_var) | |
test_loss = test_loss.mean() | |
test_acc = T.mean(T.eq(T.gt(test_prediction,0.5),self.target_var),dtype=theano.config.floatX) | |
train_fn = theano.function([self.input_var,self.target_var],loss, updates=updates) | |
val_fn = theano.function([self.input_var,self.target_var],[test_loss,test_acc]) | |
return train_fn, val_fn | |
def train(self, num_epochs, save_best_network): | |
os.system('mkdir ' + self.dir_name) | |
print 'Training ' + self.name + ' with regularization lambda ' + str(self.reg_lambda) | |
stats = [ [] for i in range( len(self.training_sets) )] | |
for i in range( len(self.training_sets) ): | |
print 'Cross validation fold ' + str(i+1) | |
X_val, y_val = self.training_sets[i] | |
X_train = [ self.training_sets[j][0] for j in range( len(self.training_sets) ) if j!=i] | |
X_train = reduce(lambda x,y: np.concatenate( (x,y) ) , X_train) | |
y_train = [ self.training_sets[j][1] for j in range( len(self.training_sets) ) if j!=i] | |
y_train = reduce(lambda x,y: np.concatenate( (x,y) ) , y_train) | |
self.learning_rate.set_value( self.original_learning_rate ) | |
lasagne.layers.set_all_param_values(self.output_layer, self.original_weights) | |
try: | |
best_params = [] | |
best_val_acc = 0.0 | |
total_val_batches = math.ceil(len(X_val)/self.batch_size) | |
best_epoch = 0 | |
epoch = 0 | |
training_start_time = time.time() | |
run_forever = False | |
if num_epochs == -1: | |
run_forever = True | |
while(epoch < num_epochs or run_forever): | |
train_err = 0 | |
train_batches = 0 | |
start_time = time.time() | |
for batch in self.iterate_minibatches(X_train,y_train,self.batch_size): | |
inputs,targets = batch | |
train_err += self.train_fn(inputs,targets) | |
train_batches += 1 | |
val_err = 0 | |
val_acc = 0 | |
val_batches = 0 | |
for batch in self.iterate_minibatches(X_val,y_val,min(self.batch_size,len(X_val))): | |
inputs,targets = batch | |
err,acc = self.val_fn(inputs,targets) | |
val_err += err | |
val_acc += acc | |
val_batches += 1 | |
if epoch % 1 == 0: | |
print("Epoch {} of {} took {:.3f}s training loss: {:.6f}\t validation loss: {:.6f}\t validation accuracy:{:.2f} %".format(epoch + 1, num_epochs, time.time() - start_time, train_err / train_batches, val_err / val_batches, val_acc / val_batches * 100)) | |
stats[i].append( (time.time() - training_start_time, train_err / train_batches, val_err / val_batches, val_acc / val_batches)) | |
if val_acc > best_val_acc: | |
best_val_acc = val_acc | |
best_params = lasagne.layers.get_all_param_values(self.output_layer) | |
total_val_batches = val_batches | |
best_epoch = epoch | |
epoch += 1 | |
self.learning_rate.set_value( max( np.array(0.001, dtype = theano.config.floatX) , self.learning_rate.get_value() - self.lr_decay) ) | |
#if epoch % 1000 ==0: | |
# self.save_network('network' + self.name + '-' + str(epoch)+'.pkl',lasagne.layers.get_all_param_values(output_layer)) | |
finally: | |
print 'Best network from epoch '+str(best_epoch) + ' with validation accuracy '+str( best_val_acc / total_val_batches * 100) | |
print 'Total elapsed time for this fold is ' + str( time.time() - training_start_time) | |
if save_best_network: | |
print 'Saving best network...' | |
self.save_network('network-' + self.name + '-' + 'cross_val-' + str(i) + '-' +str(best_epoch)+'.pkl',best_params) | |
saveStats = open(self.dir_name + '/stats.pkl', 'wb') | |
cPickle.dump(stats, saveStats) | |
saveStats.close() | |
#res = open(self.dir_name + '/res.txt', 'wb') | |
#res.write('Best network from epoch '+str(best_epoch) + ' with validation accuracy '+str( best_val_acc / total_val_batches * 100)) | |
#res.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cPickle, lasagne, theano, theano.tensor as T | |
from HBNet import HBNet | |
import numpy as np | |
trainingFileName = 'BinaryTrain80.pkl' | |
testFileName = 'BinaryTest20.pkl' | |
cross_val_fold_size = 4 | |
num_epochs = 500 | |
learning_rate = 0.01 | |
lr_decay = (learning_rate - 0.001)/num_epochs | |
momentum = 0.9 | |
reg_lambda = 0.005 | |
def loadData(filename): | |
f = open(filename) | |
return cPickle.load(f) | |
def addLayer(network,layer,name): | |
network[name] = layer | |
print lasagne.layers.get_output_shape(layer) | |
def build_cnn(input_var=None): | |
network = {} | |
addLayer(network, lasagne.layers.InputLayer(shape=(None,1,361,2), input_var = input_var), 'Input') | |
addLayer(network, lasagne.layers.Conv2DLayer(network['Input'],num_filters=50,filter_size=(30,1)), 'Conv1') | |
addLayer(network, lasagne.layers.MaxPool2DLayer(network['Conv1'], pool_size=(5,1)), 'MaxPool1') | |
addLayer(network, lasagne.layers.DenseLayer(lasagne.layers.dropout(network['MaxPool1'], p=0.5), num_units=100), 'FC1') | |
addLayer(network, lasagne.layers.DenseLayer(network['FC1'],num_units=1,nonlinearity = lasagne.nonlinearities.sigmoid),'Sigmoid') | |
return (network, network['Sigmoid'], [network['Conv1'], network['FC1']]) | |
def modify_cnn(network): | |
addLayer(network, lasagne.layers.DenseLayer(lasagne.layers.dropout(network['FC1'], p=0.2), num_units=300), 'FC2') | |
addLayer(network, lasagne.layers.DenseLayer(network['FC2'],num_units=1,nonlinearity = lasagne.nonlinearities.sigmoid),'Sigmoid') | |
return (network, network['Sigmoid'], [network['Conv1'], network['FC1'], network['FC2']]) | |
print('Loading data...') | |
X_train, y_train = loadData(trainingFileName) | |
X_test, y_test = loadData(testFileName) | |
print('Done loading data') | |
X_train = np.swapaxes(X_train,2,3) | |
X_test = np.swapaxes(X_test,2,3) | |
training_sets = [] | |
num_examples = len( X_train ) | |
indices = [ num_examples*i/4 for i in range(cross_val_fold_size)] | |
for i in range( len(indices) ): | |
if i < len(indices)-1: | |
training_sets.append( (X_train[ indices[i] : indices[i+1] ], y_train[ indices[i] : indices[i+1] ]) ) | |
else: | |
training_sets.append( (X_train[ indices[i] : ], y_train[ indices[i] : ]) ) | |
for i in range(10): | |
reg_lambda = 0.001*(1+i) | |
input_var = T.tensor4('inputs') | |
network, output_layer, reg_layers = build_cnn( input_var) | |
net = HBNet( 'CNN', network, output_layer, input_var, reg_layers, learning_rate, lr_decay, momentum, reg_lambda, training_sets) | |
net.train(num_epochs, True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment