codekansas/sparse_filtering.py

## sparse_filtering.py
"""Implementation of sparse filtering using TensorFlow.

Original MATLAB code: https://github.com/jngiam/sparseFiltering
Paper: https://papers.nips.cc/paper/4334-sparse-filtering.pdf
"""

# For Python 3 compatibility.
from __future__ import print_function

# For building the algorithm.
import tensorflow as tf
import numpy as np

# Loads the MNIST dataset.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# These params define the model architecture.
# [100, 100] corresponds to 2 layers with 100 dimensions each.
nb_hidden = [100, 100]

nb_train = 1000  # Number of training steps for each layer.
batch_size = 100  # Number of samples in a batch.
learning_rate = 0.01 # Gradient descent learning rate.
print_every = 100  # Print loss every `print_every` steps.

# Returns Numpy arrays with the initial weights.
W_init = lambda shape, dtype, partition_info: np.random.uniform(-0.05, 0.05, size=shape)
b_init = lambda shape, dtype, partition_info: np.zeros(shape)


def build_layer(input_tensor, output_dim, activation=tf.abs):
  """Builds a single feed-forward layer.
  Args:
    input_tensor: 2D Tensor with shape (batch_size, input_dim).
    output_dim: int, the number of output dimensions.
    activation: the activation function to apply to the output.
  Returns:
    wx_b: output of the layer, with shape (batch_size, output_dim).
    weights: list, [W, b], the weights of this layer.
  """

  input_dim = input_tensor.get_shape()[-1].value

  W = tf.get_variable('W',
      shape=(input_dim, output_dim),
      dtype='float32',
      initializer=W_init)
  b = tf.get_variable('b',
      shape=(output_dim,),
      dtype='float32',
      initializer=b_init)

  wx_b = activation(tf.matmul(input_tensor, W) + b)

  return wx_b, [W, b]


# This is a placeholder for the input data.
data_pl = tf.placeholder('float32', shape=(None, 28 * 28))

# These will store the layer weights and outputs.
y_hats, layer_weights = [], []

# Builds the network.
output = data_pl
for i, nb_hid in enumerate(nb_hidden):
  with tf.variable_scope('layer_%d' % i):
    output, weights = build_layer(output, nb_hid)
    y_hats.append(output)  # Stores each layer output in y_hat list.
    layer_weights.append(weights)

# This is the optimizer that the training part will use.
opt = tf.train.AdamOptimizer(learning_rate=learning_rate)


def train_layer(sess, y_hat, weights):
  """Trains a single layer's weights.
  Args:
    sess: the TensorFlow session.
    y_hat: 2D Tensor (batch_size, output_dim), the output of the layer.
    weights: list of weights to minimize.
  """

  global opt, nb_train, iterator, X_data

  # This is the core of the unsupervised algorithm.
  l2_batch = tf.nn.l2_normalize(y_hat, dim=0)  # Norm along batch dim.
  l2_normed = tf.nn.l2_normalize(l2_batch, dim=1)  # Norm along feature dim.
  l1_loss = tf.reduce_sum(tf.abs(l2_normed), axis=1)  # Reduce L1 norm.
  loss = tf.reduce_mean(l1_loss)  # Total loss is mean across batches.

  min_op = opt.minimize(loss, var_list=weights)

  #  Initialize all variables.
  sess.run(tf.global_variables_initializer())

  for i in range(nb_train):
    X_data, _ = mnist.train.next_batch(batch_size)
    X_data = X_data * 2 - 1
    sess.run(min_op, feed_dict={data_pl: X_data})
    if not i % print_every:
      loss_v = sess.run(loss, feed_dict={data_pl: X_data})
      print('loss on step %d: %f' % (i, loss_v))

with tf.Session() as sess:

  for i, (y_hat, weights) in enumerate(zip(y_hats, layer_weights)):
    train_layer(sess, y_hat, weights)
    W_val = sess.run(weights[0]).T
    np.save('W_%d' % i, W_val)
	"""Implementation of sparse filtering using TensorFlow.

	Original MATLAB code: https://github.com/jngiam/sparseFiltering
	Paper: https://papers.nips.cc/paper/4334-sparse-filtering.pdf
	"""

	# For Python 3 compatibility.
	from __future__ import print_function

	# For building the algorithm.
	import tensorflow as tf
	import numpy as np

	# Loads the MNIST dataset.
	from tensorflow.examples.tutorials.mnist import input_data
	mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

	# These params define the model architecture.
	# [100, 100] corresponds to 2 layers with 100 dimensions each.
	nb_hidden = [100, 100]

	nb_train = 1000 # Number of training steps for each layer.
	batch_size = 100 # Number of samples in a batch.
	learning_rate = 0.01 # Gradient descent learning rate.
	print_every = 100 # Print loss every `print_every` steps.

	# Returns Numpy arrays with the initial weights.
	W_init = lambda shape, dtype, partition_info: np.random.uniform(-0.05, 0.05, size=shape)
	b_init = lambda shape, dtype, partition_info: np.zeros(shape)


	def build_layer(input_tensor, output_dim, activation=tf.abs):
	"""Builds a single feed-forward layer.
	Args:
	input_tensor: 2D Tensor with shape (batch_size, input_dim).
	output_dim: int, the number of output dimensions.
	activation: the activation function to apply to the output.
	Returns:
	wx_b: output of the layer, with shape (batch_size, output_dim).
	weights: list, [W, b], the weights of this layer.
	"""

	input_dim = input_tensor.get_shape()[-1].value

	W = tf.get_variable('W',
	shape=(input_dim, output_dim),
	dtype='float32',
	initializer=W_init)
	b = tf.get_variable('b',
	shape=(output_dim,),
	dtype='float32',
	initializer=b_init)

	wx_b = activation(tf.matmul(input_tensor, W) + b)

	return wx_b, [W, b]


	# This is a placeholder for the input data.
	data_pl = tf.placeholder('float32', shape=(None, 28 * 28))

	# These will store the layer weights and outputs.
	y_hats, layer_weights = [], []

	# Builds the network.
	output = data_pl
	for i, nb_hid in enumerate(nb_hidden):
	with tf.variable_scope('layer_%d' % i):
	output, weights = build_layer(output, nb_hid)
	y_hats.append(output) # Stores each layer output in y_hat list.
	layer_weights.append(weights)

	# This is the optimizer that the training part will use.
	opt = tf.train.AdamOptimizer(learning_rate=learning_rate)


	def train_layer(sess, y_hat, weights):
	"""Trains a single layer's weights.
	Args:
	sess: the TensorFlow session.
	y_hat: 2D Tensor (batch_size, output_dim), the output of the layer.
	weights: list of weights to minimize.
	"""

	global opt, nb_train, iterator, X_data

	# This is the core of the unsupervised algorithm.
	l2_batch = tf.nn.l2_normalize(y_hat, dim=0) # Norm along batch dim.
	l2_normed = tf.nn.l2_normalize(l2_batch, dim=1) # Norm along feature dim.
	l1_loss = tf.reduce_sum(tf.abs(l2_normed), axis=1) # Reduce L1 norm.
	loss = tf.reduce_mean(l1_loss) # Total loss is mean across batches.

	min_op = opt.minimize(loss, var_list=weights)

	# Initialize all variables.
	sess.run(tf.global_variables_initializer())

	for i in range(nb_train):
	X_data, _ = mnist.train.next_batch(batch_size)
	X_data = X_data * 2 - 1
	sess.run(min_op, feed_dict={data_pl: X_data})
	if not i % print_every:
	loss_v = sess.run(loss, feed_dict={data_pl: X_data})
	print('loss on step %d: %f' % (i, loss_v))

	with tf.Session() as sess:

	for i, (y_hat, weights) in enumerate(zip(y_hats, layer_weights)):
	train_layer(sess, y_hat, weights)
	W_val = sess.run(weights[0]).T
	np.save('W_%d' % i, W_val)