Skip to content

Instantly share code, notes, and snippets.

@alextp
Forked from siemanko/initialization_trouble.py
Created March 30, 2017 19:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alextp/ab074004cd7be65d762ac2d0ba123266 to your computer and use it in GitHub Desktop.
Save alextp/ab074004cd7be65d762ac2d0ba123266 to your computer and use it in GitHub Desktop.
import argparse
import numpy as np
import tensorflow as tf
from tensorflow.python.framework.errors import FailedPreconditionError
"""Code for data dependent initialization in Weight Normalization paper:
https://arxiv.org/abs/1602.07868
"""
parser = argparse.ArgumentParser()
parser.add_argument('mode', choices=['standard', 'onebyone'], type=str)
args = parser.parse_args()
def initialize_interdependent_variables(session, vars_list, feed_dict):
"""Initialize a list of variables one at a time, which is useful if
initialization of some variables depends on initialization of the others.
"""
vars_left = vars_list
while len(vars_left) > 0:
new_vars_left = []
for v in vars_left:
try:
session.run(tf.variables_initializer([v]), feed_dict)
except FailedPreconditionError:
new_vars_left.append(v)
if len(new_vars_left) >= len(vars_left):
# This can happend if the variables all depend on each other, or more likely if there's
# another variable outside of the list, that still needs to be initialized.
raise Exception("Cycle in variable dependencies, or external precondition unsatisfied.")
else:
vars_left = new_vars_left
def fully_connected(x,
num_outputs,
activation_fn=None,
init_scale=1.,
scope=None,
reuse=None):
with tf.variable_scope(scope, default_name="fully_connected", reuse=reuse):
# data based initialization of parameters
V = tf.get_variable(
'V',
[int(x.get_shape()[1]), num_outputs],
tf.float32,
tf.random_normal_initializer(0, 0.05), trainable=True
).initialized_value()
V_norm = tf.nn.l2_normalize(V, [0])
x_init = tf.matmul(x, V_norm)
m_init, v_init = tf.nn.moments(x_init, [0])
scale_init = init_scale / tf.sqrt(v_init + 1e-10)
g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True)
b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init * scale_init, trainable=True)
# use weight normalization (Salimans & Kingma, 2016)
x = tf.matmul(x, V)
scaler = g / tf.sqrt(tf.reduce_sum(tf.square(V), [0]))
x = tf.reshape(scaler, [1, num_outputs]) * x + tf.reshape(b, [1, num_outputs])
# apply activation_fn
if activation_fn is not None:
x = activation_fn(x)
return x
x = tf.placeholder(tf.float32, [None, 28 * 28])
h1 = fully_connected(x, num_outputs=200)
y = fully_connected(x, num_outputs=10)
session = tf.Session()
if args.mode == 'standard':
# OPTION 1: Use tensorflow init (throws FailedPreconditionError)
session.run(tf.global_variables_initializer(), {
x: np.ones((128, 28 * 28))
})
elif args.mode == 'onebyone':
# OPTION 2: Initialize variables one by one
initialize_interdependent_variables(session, tf.global_variables(), {
x: np.ones((128, 28 * 28))
})
# Ideal solution: tf.variables_initializer topologically
# sorts variables w.r.t dependency graph,
# before initializing.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment