Skip to content

Instantly share code, notes, and snippets.

@siemanko
Created March 11, 2017 04:55
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save siemanko/551b502e1cf36a09c9c05385ccea5eb5 to your computer and use it in GitHub Desktop.
Save siemanko/551b502e1cf36a09c9c05385ccea5eb5 to your computer and use it in GitHub Desktop.
import argparse
import numpy as np
import tensorflow as tf
from tensorflow.python.framework.errors import FailedPreconditionError
"""Code for data dependent initialization in Weight Normalization paper:
https://arxiv.org/abs/1602.07868
"""
parser = argparse.ArgumentParser()
parser.add_argument('mode', choices=['standard', 'onebyone'], type=str)
args = parser.parse_args()
def initialize_interdependent_variables(session, vars_list, feed_dict):
"""Initialize a list of variables one at a time, which is useful if
initialization of some variables depends on initialization of the others.
"""
vars_left = vars_list
while len(vars_left) > 0:
new_vars_left = []
for v in vars_left:
try:
session.run(tf.variables_initializer([v]), feed_dict)
except FailedPreconditionError:
new_vars_left.append(v)
if len(new_vars_left) >= len(vars_left):
# This can happend if the variables all depend on each other, or more likely if there's
# another variable outside of the list, that still needs to be initialized.
raise Exception("Cycle in variable dependencies, or external precondition unsatisfied.")
else:
vars_left = new_vars_left
def fully_connected(x,
num_outputs,
activation_fn=None,
init_scale=1.,
scope=None,
reuse=None):
with tf.variable_scope(scope, default_name="fully_connected", reuse=reuse):
# data based initialization of parameters
V = tf.get_variable(
'V',
[int(x.get_shape()[1]), num_outputs],
tf.float32,
tf.random_normal_initializer(0, 0.05), trainable=True
)
V_norm = tf.nn.l2_normalize(V, [0])
x_init = tf.matmul(x, V_norm)
m_init, v_init = tf.nn.moments(x_init, [0])
scale_init = init_scale / tf.sqrt(v_init + 1e-10)
g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True)
b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init * scale_init, trainable=True)
# use weight normalization (Salimans & Kingma, 2016)
x = tf.matmul(x, V)
scaler = g / tf.sqrt(tf.reduce_sum(tf.square(V), [0]))
x = tf.reshape(scaler, [1, num_outputs]) * x + tf.reshape(b, [1, num_outputs])
# apply activation_fn
if activation_fn is not None:
x = activation_fn(x)
return x
x = tf.placeholder(tf.float32, [None, 28 * 28])
h1 = fully_connected(x, num_outputs=200)
y = fully_connected(x, num_outputs=10)
session = tf.Session()
if args.mode == 'standard':
# OPTION 1: Use tensorflow init (throws FailedPreconditionError)
session.run(tf.global_variables_initializer(), {
x: np.ones((128, 28 * 28))
})
elif args.mode == 'onebyone':
# OPTION 2: Initialize variables one by one
initialize_interdependent_variables(session, tf.global_variables(), {
x: np.ones((128, 28 * 28))
})
# Ideal solution: tf.variables_initializer topologically
# sorts variables w.r.t dependency graph,
# before initializing.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment