alextp/initialization_trouble.py

## initialization_trouble.py
import argparse
import numpy as np
import tensorflow as tf

from tensorflow.python.framework.errors import FailedPreconditionError

"""Code for data dependent initialization in Weight Normalization paper:
https://arxiv.org/abs/1602.07868
"""

parser = argparse.ArgumentParser()
parser.add_argument('mode', choices=['standard', 'onebyone'], type=str)
args = parser.parse_args()


def initialize_interdependent_variables(session, vars_list, feed_dict):
    """Initialize a list of variables one at a time, which is useful if
    initialization of some variables depends on initialization of the others.
    """
    vars_left = vars_list
    while len(vars_left) > 0:
        new_vars_left = []
        for v in vars_left:
            try:
                session.run(tf.variables_initializer([v]), feed_dict)
            except FailedPreconditionError:
                new_vars_left.append(v)
        if len(new_vars_left) >= len(vars_left):
            # This can happend if the variables all depend on each other, or more likely if there's
            # another variable outside of the list, that still needs to be initialized.
            raise Exception("Cycle in variable dependencies, or external precondition unsatisfied.")
        else:
            vars_left = new_vars_left


def fully_connected(x,
                    num_outputs,
                    activation_fn=None,
                    init_scale=1.,
                    scope=None,
                    reuse=None):
    with tf.variable_scope(scope, default_name="fully_connected", reuse=reuse):
        # data based initialization of parameters
        V = tf.get_variable(
            'V',
            [int(x.get_shape()[1]), num_outputs],
            tf.float32,
            tf.random_normal_initializer(0, 0.05), trainable=True
        ).initialized_value()
        V_norm = tf.nn.l2_normalize(V, [0])
        x_init = tf.matmul(x, V_norm)
        m_init, v_init = tf.nn.moments(x_init, [0])
        scale_init = init_scale / tf.sqrt(v_init + 1e-10)
        g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True)
        b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init * scale_init, trainable=True)

        # use weight normalization (Salimans & Kingma, 2016)
        x = tf.matmul(x, V)
        scaler = g / tf.sqrt(tf.reduce_sum(tf.square(V), [0]))
        x = tf.reshape(scaler, [1, num_outputs]) * x + tf.reshape(b, [1, num_outputs])

        # apply activation_fn
        if activation_fn is not None:
            x = activation_fn(x)
        return x


x = tf.placeholder(tf.float32, [None, 28 * 28])
h1 = fully_connected(x, num_outputs=200)
y = fully_connected(x, num_outputs=10)

session = tf.Session()

if args.mode == 'standard':
    # OPTION 1: Use tensorflow init (throws FailedPreconditionError)
    session.run(tf.global_variables_initializer(), {
        x: np.ones((128, 28 * 28))
    })
elif args.mode == 'onebyone':
    # OPTION 2: Initialize variables one by one
    initialize_interdependent_variables(session, tf.global_variables(), {
        x: np.ones((128, 28 * 28))
    })

# Ideal solution: tf.variables_initializer topologically
#                 sorts variables w.r.t dependency graph,
#                 before initializing.
	import argparse
	import numpy as np
	import tensorflow as tf

	from tensorflow.python.framework.errors import FailedPreconditionError

	"""Code for data dependent initialization in Weight Normalization paper:
	https://arxiv.org/abs/1602.07868
	"""

	parser = argparse.ArgumentParser()
	parser.add_argument('mode', choices=['standard', 'onebyone'], type=str)
	args = parser.parse_args()


	def initialize_interdependent_variables(session, vars_list, feed_dict):
	"""Initialize a list of variables one at a time, which is useful if
	initialization of some variables depends on initialization of the others.
	"""
	vars_left = vars_list
	while len(vars_left) > 0:
	new_vars_left = []
	for v in vars_left:
	try:
	session.run(tf.variables_initializer([v]), feed_dict)
	except FailedPreconditionError:
	new_vars_left.append(v)
	if len(new_vars_left) >= len(vars_left):
	# This can happend if the variables all depend on each other, or more likely if there's
	# another variable outside of the list, that still needs to be initialized.
	raise Exception("Cycle in variable dependencies, or external precondition unsatisfied.")
	else:
	vars_left = new_vars_left


	def fully_connected(x,
	num_outputs,
	activation_fn=None,
	init_scale=1.,
	scope=None,
	reuse=None):
	with tf.variable_scope(scope, default_name="fully_connected", reuse=reuse):
	# data based initialization of parameters
	V = tf.get_variable(
	'V',
	[int(x.get_shape()[1]), num_outputs],
	tf.float32,
	tf.random_normal_initializer(0, 0.05), trainable=True
	).initialized_value()
	V_norm = tf.nn.l2_normalize(V, [0])
	x_init = tf.matmul(x, V_norm)
	m_init, v_init = tf.nn.moments(x_init, [0])
	scale_init = init_scale / tf.sqrt(v_init + 1e-10)
	g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True)
	b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init * scale_init, trainable=True)

	# use weight normalization (Salimans & Kingma, 2016)
	x = tf.matmul(x, V)
	scaler = g / tf.sqrt(tf.reduce_sum(tf.square(V), [0]))
	x = tf.reshape(scaler, [1, num_outputs]) * x + tf.reshape(b, [1, num_outputs])

	# apply activation_fn
	if activation_fn is not None:
	x = activation_fn(x)
	return x


	x = tf.placeholder(tf.float32, [None, 28 * 28])
	h1 = fully_connected(x, num_outputs=200)
	y = fully_connected(x, num_outputs=10)

	session = tf.Session()

	if args.mode == 'standard':
	# OPTION 1: Use tensorflow init (throws FailedPreconditionError)
	session.run(tf.global_variables_initializer(), {
	x: np.ones((128, 28 * 28))
	})
	elif args.mode == 'onebyone':
	# OPTION 2: Initialize variables one by one
	initialize_interdependent_variables(session, tf.global_variables(), {
	x: np.ones((128, 28 * 28))
	})

	# Ideal solution: tf.variables_initializer topologically
	# sorts variables w.r.t dependency graph,
	# before initializing.