armhold/convnet.py

## convnet.py
import numpy as np

from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.layer_utils import *

# a Convolutional Network by armhold

class GeorgeNet(object):
  """
  An MxN layer convolutional network with the following architecture:

  [conv-relu-pool] x N - [affine] x M - [softmax]

  The network operates on minibatches of data that have shape (N, C, H, W)
  consisting of N images, each with height H and width W and with C input
  channels.
  """

  def __init__(self, input_dim=(3, 32, 32), num_filters=32, filter_size=3,
               num_convnets=2, num_affine=2, hidden_dim=100, num_classes=10,
               use_batchnorm=False,
               weight_scale=1e-3, reg=0.0, dtype=np.float32):
    """
    Initialize a new network.

    Inputs:
    - input_dim: Tuple (C, H, W) giving size of input data
    - num_filters: Number of filters to use in the convolutional layer
    - filter_size: Size of filters to use in the convolutional layer
    - hidden_dim: Number of units to use in the fully-connected hidden layer
    - num_classes: Number of scores to produce from the final affine layer.
      of weights.
    - reg: Scalar giving L2 regularization strength
    - dtype: numpy datatype to use for computation.
    """
    self.params = {}
    self.use_batchnorm = use_batchnorm
    self.reg = reg
    self.dtype = dtype

    C, H, W = input_dim

    # these values *were* hard-coded in loss()
    conv_stride = 1
    conv_pad = (filter_size - 1) / 2
    self.conv_param = {'stride': conv_stride, 'pad': conv_pad}

    # pass pool_param to the forward pass for the max-pooling layer
    pool_height = 2
    pool_width = 2
    pool_stride = 2
    self.pool_param = {'pool_height': pool_height, 'pool_width': pool_width, 'stride': pool_stride}


    self.num_convnets = num_convnets
    self.num_affine = num_affine

    incoming_h, incoming_w = conv_outputs(H, W, filter_size, conv_pad, conv_stride)

    # create weights & biases for conv layers
    #
    layer = 0
    for i in range(self.num_convnets):
      layer += 1
      weightK = 'W' + str(layer)
      biasK = 'b' + str(layer)
      depth = C if i == 0 else num_filters
      self.params[weightK] = weight_scale * np.random.randn(num_filters, depth, filter_size, filter_size)
      self.params[biasK] = np.zeros((num_filters))
      incoming_h, incoming_w = pool_outputs(incoming_h, incoming_w, pool_height, pool_width, pool_stride)
      if self.use_batchnorm:
        self.params['gamma' + str(layer)] = np.ones(num_filters)
        self.params['beta' + str(layer)] = np.zeros(num_filters)
        # print "layer: %d, weights shape: %s, gamma shape: %s" % (layer, self.params[weightK].shape, self.params['gamma' + str(layer)].shape)


    # create weights & biases for affine layers
    #
    for i in range(self.num_affine):
      layer += 1
      weightK = 'W' + str(layer)
      biasK = 'b' + str(layer)
      prev_layer_dim = num_filters * incoming_h * incoming_w if i == 0 else hidden_dim

      assert (incoming_h > 0), 'incoming_h must be > 0... network too deep?'
      assert (incoming_w > 0), 'incoming_w must be > 0... network too deep?'

      # if this is the last layer, get ready to feed softmax, else just another hidden layer
      output_dim = num_classes if i == self.num_affine - 1 else hidden_dim

      self.params[weightK] = weight_scale * np.random.randn(prev_layer_dim, output_dim)
      self.params[biasK] = np.zeros((output_dim))


      if self.use_batchnorm and i < self.num_affine - 1:
        self.params['gamma' + str(layer)] = np.ones(output_dim)
        self.params['beta' + str(layer)] = np.zeros(output_dim)
        # print "layer: %d, weights shape: %s, gamma shape: %s" % (layer, self.params[weightK].shape, self.params['gamma' + str(layer)].shape)


    print

    # With batch normalization we need to keep track of running means and
    # variances, so we need to pass a special bn_param object to each batch
    # normalization layer. You should pass self.bn_params[0] to the forward pass
    # of the first batch normalization layer, self.bn_params[1] to the forward
    # pass of the second batch normalization layer, etc.
    self.bn_params = []
    if self.use_batchnorm:
        self.bn_params = [{'mode': 'train'} for i in xrange(self.num_convnets + self.num_affine - 1)]

    for k, v in self.params.iteritems():
      self.params[k] = v.astype(dtype)


  def loss(self, X, y=None):
    """
    Evaluate loss and gradient for the MxN layer convolutional network.
    """

    # Set train/test mode for batchnorm params and dropout param since they
    # behave differently during training and testing.
    mode = 'test' if y is None else 'train'
    if self.use_batchnorm:
      for bn_param in self.bn_params:
        bn_param['mode'] = mode   # fixed by armhold

    scores = None
    out = None

    caches = []

    # forward pass: conv-relu-pool x N
    #
    layer = 0
    input = X
    # print "forward pass: conv layers..."
    for i in range(self.num_convnets):
      layer += 1
      weights = self.params['W' + str(layer)]
      bias = self.params['b' + str(layer)]

      cache = None
      if self.use_batchnorm:
        bnp = self.bn_params[i]
        gamma = self.params['gamma' + str(layer)]
        beta = self.params['beta' + str(layer)]
        out, cache = conv_batchnorm_relu_pool_forward(input, weights, bias, gamma, beta, self.conv_param, self.pool_param, bnp)
      else:
        out, cache = conv_relu_pool_forward(input, weights, bias, self.conv_param, self.pool_param)

      caches.append(cache)
      input = out

    # forward pass: affine x M
    #
    # print "forward pass: affine layers..."
    for i in range(self.num_affine):
      layer += 1
      weights = self.params['W' + str(layer)]
      bias = self.params['b' + str(layer)]

      cache = None
      if self.use_batchnorm and i < self.num_affine - 1:
        bnp = self.bn_params[i + self.num_convnets]  # add offset from num_convnets
        gamma = self.params['gamma' + str(layer)]
        beta = self.params['beta' + str(layer)]
        out, cache = affine_batchnorm_forward(input, weights, bias, gamma, beta, bnp)
      else:
        out, cache = affine_forward(input, weights, bias)

      caches.append(cache)
      input = out

    # print "forward pass: DONE with affine layers."

    scores = out

    if y is None:
      return scores

    data_loss, dscores = softmax_loss(scores, y)

    # compute regularization loss
    reg_loss = 0
    layer = 0
    for i in range(self.num_convnets + self.num_affine):
      layer += 1
      weights = self.params['W' + str(layer)]
      reg_loss += 0.5 * self.reg * np.sum(weights * weights)

    loss = data_loss + reg_loss

    grads = {}

    dout = dscores
    for i in reversed(xrange(self.num_affine)):
      if self.use_batchnorm and i < self.num_affine - 1:
        dx, dw, db, dgamma, dbeta = affine_batchnorm_backward(dout, caches.pop())
        grads['gamma' + str(layer)] = dgamma
        grads['beta' + str(layer)] = dbeta
      else:
        dx, dw, db = affine_backward(dout, caches.pop())

      weights = self.params['W' + str(layer)]
      grads['W' + str(layer)] = dw + self.reg * weights
      grads['b' + str(layer)] = db
      dout = dx
      layer -= 1

    for i in reversed(xrange(self.num_convnets)):
      if self.use_batchnorm:
        dx, dw, db, dgamma, dbeta = conv_batchnorm_relu_pool_backward(dout, caches.pop())
        grads['gamma' + str(layer)] = dgamma
        grads['beta' + str(layer)] = dbeta
      else:
        dx, dw, db = conv_relu_pool_backward(dout, caches.pop())

      weights = self.params['W' + str(layer)]
      grads['W' + str(layer)] = dw + self.reg * weights
      grads['b' + str(layer)] = db
      dout = dx
      layer -= 1

    return loss, grads

# conv volume size formula W2 = (W1 - F + 2P) / S + 1
# from http://cs231n.github.io/convolutional-networks/
def conv_outputs(H, W, filter_size, pad, stride):
    out_h = (H - filter_size + 2 * pad) / stride + 1
    out_w = (W - filter_size + 2 * pad) / stride + 1
    return (out_h, out_w)


# pooling volume formula:  W2 = (W1 - F) / S + 1
def pool_outputs(H, W, pool_height, pool_width, stride):
    out_h = (H - pool_height) / stride + 1
    out_w = (W - pool_width) / stride + 1
    return (out_h, out_w)


def conv_batchnorm_relu_pool_forward(x, w, b, gamma, beta, conv_param, pool_param, bn_param):
    """
    Convenience layer that performs a convolution, spatial batchnorm, a ReLU, and a pool.

    Inputs:
    - x: Input to the convolutional layer
    - w, b, conv_param: Weights and parameters for the convolutional layer
    - pool_param: Parameters for the pooling layer

    Returns a tuple of:
    - out: Output from the pooling layer
    - cache: Object to give to the backward pass
    """
    conv_out, conv_cache = conv_forward_fast(x, w, b, conv_param)
    # print "conv_out.shape: %s, gamma.shape: %s, beta.shape: %s" % (conv_out.shape, gamma.shape, beta.shape)

    batch_out, batch_cache = spatial_batchnorm_forward(conv_out, gamma, beta, bn_param)
    relu_out, relu_cache = relu_forward(batch_out)
    pool_out, pool_cache = max_pool_forward_fast(relu_out, pool_param)
    out = pool_out
    cache = (conv_cache, batch_cache, relu_cache, pool_cache)

    return out, cache

def conv_batchnorm_relu_pool_backward(dout, cache):
    """
    Backward pass for the conv-batchnorm-relu-pool convenience layer
    """
    conv_cache, batch_cache, relu_cache, pool_cache = cache
    dpool = max_pool_backward_fast(dout, pool_cache)
    drelu = relu_backward(dpool, relu_cache)
    dbatch, dgamma, dbeta = spatial_batchnorm_backward(drelu, batch_cache)
    dx, dw, db = conv_backward_fast(dbatch, conv_cache)

    return dx, dw, db, dgamma, dbeta


def affine_batchnorm_forward(x, w, b, gamma, beta, bn_param):
    """
    Convenience layer that performs an affine transform, followed by batch normalization
    """
    af_out, af_cache = affine_forward(x, w, b)
    # print "af_out.shape: %s, gamma.shape: %s, beta.shape: %s" % (af_out.shape, gamma.shape, beta.shape)

    bn_out, bn_cache = batchnorm_forward(af_out, gamma, beta, bn_param)

    cache = (af_cache, bn_cache)
    return bn_out, cache


def affine_batchnorm_backward(dout, cache):
    """
    Backward pass for the affine-batchnorm convenience layer
    """
    af_cache, bn_cache = cache

    dbatch, dgamma, dbeta = batchnorm_backward(dout, bn_cache)
    dx, dw, db = affine_backward(dbatch, af_cache)

    return dx, dw, db, dgamma, dbeta
	import numpy as np

	from cs231n.layers import *
	from cs231n.fast_layers import *
	from cs231n.layer_utils import *

	# a Convolutional Network by armhold

	class GeorgeNet(object):
	"""
	An MxN layer convolutional network with the following architecture:

	[conv-relu-pool] x N - [affine] x M - [softmax]

	The network operates on minibatches of data that have shape (N, C, H, W)
	consisting of N images, each with height H and width W and with C input
	channels.
	"""

	def __init__(self, input_dim=(3, 32, 32), num_filters=32, filter_size=3,
	num_convnets=2, num_affine=2, hidden_dim=100, num_classes=10,
	use_batchnorm=False,
	weight_scale=1e-3, reg=0.0, dtype=np.float32):
	"""
	Initialize a new network.

	Inputs:
	- input_dim: Tuple (C, H, W) giving size of input data
	- num_filters: Number of filters to use in the convolutional layer
	- filter_size: Size of filters to use in the convolutional layer
	- hidden_dim: Number of units to use in the fully-connected hidden layer
	- num_classes: Number of scores to produce from the final affine layer.
	of weights.
	- reg: Scalar giving L2 regularization strength
	- dtype: numpy datatype to use for computation.
	"""
	self.params = {}
	self.use_batchnorm = use_batchnorm
	self.reg = reg
	self.dtype = dtype

	C, H, W = input_dim

	# these values were hard-coded in loss()
	conv_stride = 1
	conv_pad = (filter_size - 1) / 2
	self.conv_param = {'stride': conv_stride, 'pad': conv_pad}

	# pass pool_param to the forward pass for the max-pooling layer
	pool_height = 2
	pool_width = 2
	pool_stride = 2
	self.pool_param = {'pool_height': pool_height, 'pool_width': pool_width, 'stride': pool_stride}


	self.num_convnets = num_convnets
	self.num_affine = num_affine

	incoming_h, incoming_w = conv_outputs(H, W, filter_size, conv_pad, conv_stride)

	# create weights & biases for conv layers
	#
	layer = 0
	for i in range(self.num_convnets):
	layer += 1
	weightK = 'W' + str(layer)
	biasK = 'b' + str(layer)
	depth = C if i == 0 else num_filters
	self.params[weightK] = weight_scale * np.random.randn(num_filters, depth, filter_size, filter_size)
	self.params[biasK] = np.zeros((num_filters))
	incoming_h, incoming_w = pool_outputs(incoming_h, incoming_w, pool_height, pool_width, pool_stride)
	if self.use_batchnorm:
	self.params['gamma' + str(layer)] = np.ones(num_filters)
	self.params['beta' + str(layer)] = np.zeros(num_filters)
	# print "layer: %d, weights shape: %s, gamma shape: %s" % (layer, self.params[weightK].shape, self.params['gamma' + str(layer)].shape)


	# create weights & biases for affine layers
	#
	for i in range(self.num_affine):
	layer += 1
	weightK = 'W' + str(layer)
	biasK = 'b' + str(layer)
	prev_layer_dim = num_filters * incoming_h * incoming_w if i == 0 else hidden_dim

	assert (incoming_h > 0), 'incoming_h must be > 0... network too deep?'
	assert (incoming_w > 0), 'incoming_w must be > 0... network too deep?'

	# if this is the last layer, get ready to feed softmax, else just another hidden layer
	output_dim = num_classes if i == self.num_affine - 1 else hidden_dim

	self.params[weightK] = weight_scale * np.random.randn(prev_layer_dim, output_dim)
	self.params[biasK] = np.zeros((output_dim))


	if self.use_batchnorm and i < self.num_affine - 1:
	self.params['gamma' + str(layer)] = np.ones(output_dim)
	self.params['beta' + str(layer)] = np.zeros(output_dim)
	# print "layer: %d, weights shape: %s, gamma shape: %s" % (layer, self.params[weightK].shape, self.params['gamma' + str(layer)].shape)


	print

	# With batch normalization we need to keep track of running means and
	# variances, so we need to pass a special bn_param object to each batch
	# normalization layer. You should pass self.bn_params[0] to the forward pass
	# of the first batch normalization layer, self.bn_params[1] to the forward
	# pass of the second batch normalization layer, etc.
	self.bn_params = []
	if self.use_batchnorm:
	self.bn_params = [{'mode': 'train'} for i in xrange(self.num_convnets + self.num_affine - 1)]

	for k, v in self.params.iteritems():
	self.params[k] = v.astype(dtype)


	def loss(self, X, y=None):
	"""
	Evaluate loss and gradient for the MxN layer convolutional network.
	"""

	# Set train/test mode for batchnorm params and dropout param since they
	# behave differently during training and testing.
	mode = 'test' if y is None else 'train'
	if self.use_batchnorm:
	for bn_param in self.bn_params:
	bn_param['mode'] = mode # fixed by armhold

	scores = None
	out = None

	caches = []

	# forward pass: conv-relu-pool x N
	#
	layer = 0
	input = X
	# print "forward pass: conv layers..."
	for i in range(self.num_convnets):
	layer += 1
	weights = self.params['W' + str(layer)]
	bias = self.params['b' + str(layer)]

	cache = None
	if self.use_batchnorm:
	bnp = self.bn_params[i]
	gamma = self.params['gamma' + str(layer)]
	beta = self.params['beta' + str(layer)]
	out, cache = conv_batchnorm_relu_pool_forward(input, weights, bias, gamma, beta, self.conv_param, self.pool_param, bnp)
	else:
	out, cache = conv_relu_pool_forward(input, weights, bias, self.conv_param, self.pool_param)

	caches.append(cache)
	input = out

	# forward pass: affine x M
	#
	# print "forward pass: affine layers..."
	for i in range(self.num_affine):
	layer += 1
	weights = self.params['W' + str(layer)]
	bias = self.params['b' + str(layer)]

	cache = None
	if self.use_batchnorm and i < self.num_affine - 1:
	bnp = self.bn_params[i + self.num_convnets] # add offset from num_convnets
	gamma = self.params['gamma' + str(layer)]
	beta = self.params['beta' + str(layer)]
	out, cache = affine_batchnorm_forward(input, weights, bias, gamma, beta, bnp)
	else:
	out, cache = affine_forward(input, weights, bias)

	caches.append(cache)
	input = out

	# print "forward pass: DONE with affine layers."

	scores = out

	if y is None:
	return scores

	data_loss, dscores = softmax_loss(scores, y)

	# compute regularization loss
	reg_loss = 0
	layer = 0
	for i in range(self.num_convnets + self.num_affine):
	layer += 1
	weights = self.params['W' + str(layer)]
	reg_loss += 0.5 * self.reg * np.sum(weights * weights)

	loss = data_loss + reg_loss

	grads = {}

	dout = dscores
	for i in reversed(xrange(self.num_affine)):
	if self.use_batchnorm and i < self.num_affine - 1:
	dx, dw, db, dgamma, dbeta = affine_batchnorm_backward(dout, caches.pop())
	grads['gamma' + str(layer)] = dgamma
	grads['beta' + str(layer)] = dbeta
	else:
	dx, dw, db = affine_backward(dout, caches.pop())

	weights = self.params['W' + str(layer)]
	grads['W' + str(layer)] = dw + self.reg * weights
	grads['b' + str(layer)] = db
	dout = dx
	layer -= 1

	for i in reversed(xrange(self.num_convnets)):
	if self.use_batchnorm:
	dx, dw, db, dgamma, dbeta = conv_batchnorm_relu_pool_backward(dout, caches.pop())
	grads['gamma' + str(layer)] = dgamma
	grads['beta' + str(layer)] = dbeta
	else:
	dx, dw, db = conv_relu_pool_backward(dout, caches.pop())

	weights = self.params['W' + str(layer)]
	grads['W' + str(layer)] = dw + self.reg * weights
	grads['b' + str(layer)] = db
	dout = dx
	layer -= 1

	return loss, grads

	# conv volume size formula W2 = (W1 - F + 2P) / S + 1
	# from http://cs231n.github.io/convolutional-networks/
	def conv_outputs(H, W, filter_size, pad, stride):
	out_h = (H - filter_size + 2 * pad) / stride + 1
	out_w = (W - filter_size + 2 * pad) / stride + 1
	return (out_h, out_w)


	# pooling volume formula: W2 = (W1 - F) / S + 1
	def pool_outputs(H, W, pool_height, pool_width, stride):
	out_h = (H - pool_height) / stride + 1
	out_w = (W - pool_width) / stride + 1
	return (out_h, out_w)


	def conv_batchnorm_relu_pool_forward(x, w, b, gamma, beta, conv_param, pool_param, bn_param):
	"""
	Convenience layer that performs a convolution, spatial batchnorm, a ReLU, and a pool.

	Inputs:
	- x: Input to the convolutional layer
	- w, b, conv_param: Weights and parameters for the convolutional layer
	- pool_param: Parameters for the pooling layer

	Returns a tuple of:
	- out: Output from the pooling layer
	- cache: Object to give to the backward pass
	"""
	conv_out, conv_cache = conv_forward_fast(x, w, b, conv_param)
	# print "conv_out.shape: %s, gamma.shape: %s, beta.shape: %s" % (conv_out.shape, gamma.shape, beta.shape)

	batch_out, batch_cache = spatial_batchnorm_forward(conv_out, gamma, beta, bn_param)
	relu_out, relu_cache = relu_forward(batch_out)
	pool_out, pool_cache = max_pool_forward_fast(relu_out, pool_param)
	out = pool_out
	cache = (conv_cache, batch_cache, relu_cache, pool_cache)

	return out, cache

	def conv_batchnorm_relu_pool_backward(dout, cache):
	"""
	Backward pass for the conv-batchnorm-relu-pool convenience layer
	"""
	conv_cache, batch_cache, relu_cache, pool_cache = cache
	dpool = max_pool_backward_fast(dout, pool_cache)
	drelu = relu_backward(dpool, relu_cache)
	dbatch, dgamma, dbeta = spatial_batchnorm_backward(drelu, batch_cache)
	dx, dw, db = conv_backward_fast(dbatch, conv_cache)

	return dx, dw, db, dgamma, dbeta


	def affine_batchnorm_forward(x, w, b, gamma, beta, bn_param):
	"""
	Convenience layer that performs an affine transform, followed by batch normalization
	"""
	af_out, af_cache = affine_forward(x, w, b)
	# print "af_out.shape: %s, gamma.shape: %s, beta.shape: %s" % (af_out.shape, gamma.shape, beta.shape)

	bn_out, bn_cache = batchnorm_forward(af_out, gamma, beta, bn_param)

	cache = (af_cache, bn_cache)
	return bn_out, cache


	def affine_batchnorm_backward(dout, cache):
	"""
	Backward pass for the affine-batchnorm convenience layer
	"""
	af_cache, bn_cache = cache

	dbatch, dgamma, dbeta = batchnorm_backward(dout, bn_cache)
	dx, dw, db = affine_backward(dbatch, af_cache)

	return dx, dw, db, dgamma, dbeta