ajbrock/Unrolled GAN Toy Example

## Unrolled GAN Toy Example
## Unrolled GAN
# A Brock, 2016
# This code implements the toy experiment for unrolled GANs.

# TODO: Make shared variables and reduce the memory transfer overhead

# Imports


import numpy as np
from numpy import pi
import theano
import theano.tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams
import lasagne
import lasagne.layers as ll

from pypr.clustering.gmm import sample_gaussian_mixture as GMM

import matplotlib.pyplot as plt
# Todo: add import gaussian mixture

batch_size = 150
batch_index = T.iscalar('batch_index')
batch_slice = slice(batch_index*batch_size, (batch_index+1)*batch_size)
# Set up generator and fixed random seeds
# rng_data = np.random.RandomState(args.seed_data)
rng = np.random.RandomState(42)
theano_rng = MRG_RandomStreams(rng.randint(2 ** 15))
lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 15)))

# load CIFAR-10


# specify generative model
noise_dim = (batch_size, 256)
noise = theano_rng.normal(size=noise_dim)
G = [ll.InputLayer(shape=noise_dim, input_var=noise)]
G+= [ll.DenseLayer(incoming = G[-1],
                   num_units = 128,
                   W=lasagne.init.Orthogonal(0.8),
                   b=lasagne.init.Constant(0.),
                   nonlinearity=lasagne.nonlinearities.rectify,
                   name='G1')]
G+= [ll.DenseLayer(incoming = G[-1],
                   num_units = 128,
                   W=lasagne.init.Orthogonal(0.8),
                   b=lasagne.init.Constant(0.),
                   nonlinearity=lasagne.nonlinearities.rectify,
                   name='G2')]
G+= [ll.DenseLayer(incoming = G[-1],
                   num_units = 2,
                   W=lasagne.init.Orthogonal(0.8),
                   b=lasagne.init.Constant(0.),
                   nonlinearity=None,
                   name='G_out')]

D = [ll.InputLayer(shape=(None,2))]
D+= [ll.DenseLayer(incoming = D[-1],
                   num_units = 128,
                   W=lasagne.init.Orthogonal(0.8),
                   b=lasagne.init.Constant(0.),
                   nonlinearity=lasagne.nonlinearities.rectify,
                   name='D1')]
D+= [ll.DenseLayer(incoming = D[-1],
                   num_units = 1,
                   W=lasagne.init.Orthogonal(0.8),
                   b=lasagne.init.Constant(0.),
                   nonlinearity=lasagne.nonlinearities.sigmoid,
                   name='DO')]

# Variables
X = T.TensorType('float32', [False]*2)('X')
X_shared = lasagne.utils.shared_empty(2, dtype='float32')


# Outputs
Xh = ll.get_output(G[-1]) # G output
p_X = ll.get_output(D[-1],X/4) # D(X)
p_Xh = ll.get_output(D[-1],Xh/4) # D(G(Z))

# Params
learning_rate = 1e-4;
beta1 = 0.5;

L_Dg = T.nnet.binary_crossentropy(T.clip( p_Xh , 1e-7, 1.0 - 1e-7), T.zeros(p_Xh.shape))
L_Dd = T.nnet.binary_crossentropy(T.clip( p_X , 1e-7, 1.0 - 1e-7), T.ones(p_X.shape))
L_G1 = T.nnet.binary_crossentropy(T.clip( p_Xh, 1e-7, 1.0 - 1e-7), T.ones(p_Xh.shape))
# Get D Updates for use in unrolling
D_updates = lasagne.updates.adam(T.mean(L_Dg+L_Dd),ll.get_all_params(D[-1],trainable=True),learning_rate,beta1=beta1)

# Get L_G for second step
# L_G = theano.clone(L_G1,replace = D_updates)
def fprop(X,Xh):
    # p_X = ll.get_output(D[-1],X) # D(X)
    # p_Xh = ll.get_output(D[-1],Xh) # D(G(Z))
    # L_Dg = T.nnet.binary_crossentropy(T.clip( p_Xh , 1e-7, 1.0 - 1e-7), T.zeros(p_Xh.shape))
    # L_Dd = T.nnet.binary_crossentropy(T.clip( p_X , 1e-7, 1.0 - 1e-7), T.ones(p_X.shape))
    # lasagne.updates.adam(T.mean(L_Dg+L_Dd),ll.get_all_params(D[-1],trainable=True),learning_rate,beta1=beta1)

    # return replace = D_updates
    return [L_G1, D_updates]

# Maybe...
print('Building G graph...')

values,updates = theano.scan(fprop,n_steps=10,non_sequences=[X,Xh])
# Scan and go through all 10 D updates
# losses = scan...
# l_G = values[-1]
L_G = theano.clone(L_G1,replace = updates)
G_updates = lasagne.updates.adam(T.mean(L_G),ll.get_all_params(G[-1],trainable=True),learning_rate,beta1=beta1)

print('Compiling Discriminator Function...')
Dfn = theano.function([batch_index],T.grad(T.mean(L_G1),Xh),updates=D_updates,givens={X: X_shared[batch_slice]})
Dgd = theano.function([X],L_Dd)
print('Compiling Generator Function...')
Gfn = theano.function([batch_index],Xh,updates=G_updates,givens={X: X_shared[batch_slice]})

# Define the gaussian mixture
r = 2
thetas = np.linspace(0,2*pi-pi/4,8)
means = np.asarray([r*np.cos(thetas),r*np.sin(thetas)]).transpose()
variances = [np.diag(0.02*np.ones(2))]*8

# Contour Plot Limits and Resolution
delt = 0.1
axlim = 4.5
[Xx,Yy] = np.meshgrid(np.arange(-axlim,axlim,delt),np.arange(-axlim,axlim,delt))

print 'running...'

batch_index = 0
num_batches = 50

X_shared.set_value(np.float32(GMM(centroids=means,ccov = variances,samples = batch_size*num_batches)))

for i in range(25000):

    if not batch_index % num_batches:
        batch_index = 0
        X_shared.set_value(np.float32(GMM(centroids=means,ccov = variances,samples = batch_size*num_batches)))


    if i%2:
        grads = Dfn(batch_index)
        batch_index += 1
    else:
        Xs = Gfn(batch_index)
    if not i%20:
        plt.scatter(Xs[:,0],Xs[:,1])
        gds = Dgd(np.float32(np.asarray([Xx.flatten(),Yy.flatten()])).transpose())

        plt.contour(Xx,Yy,np.reshape(gds,np.shape(Xx)))
        plt.axis([-axlim,axlim,-axlim,axlim])
        plt.title('Step # '+str(i))
        plt.pause(0.000001)
        plt.clf()
	## Unrolled GAN
	# A Brock, 2016
	# This code implements the toy experiment for unrolled GANs.

	# TODO: Make shared variables and reduce the memory transfer overhead

	# Imports


	import numpy as np
	from numpy import pi
	import theano
	import theano.tensor as T
	from theano.sandbox.rng_mrg import MRG_RandomStreams
	import lasagne
	import lasagne.layers as ll

	from pypr.clustering.gmm import sample_gaussian_mixture as GMM

	import matplotlib.pyplot as plt
	# Todo: add import gaussian mixture

	batch_size = 150
	batch_index = T.iscalar('batch_index')
	batch_slice = slice(batch_indexbatch_size, (batch_index+1)batch_size)
	# Set up generator and fixed random seeds
	# rng_data = np.random.RandomState(args.seed_data)
	rng = np.random.RandomState(42)
	theano_rng = MRG_RandomStreams(rng.randint(2 ** 15))
	lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 15)))

	# load CIFAR-10


	# specify generative model
	noise_dim = (batch_size, 256)
	noise = theano_rng.normal(size=noise_dim)
	G = [ll.InputLayer(shape=noise_dim, input_var=noise)]
	G+= [ll.DenseLayer(incoming = G[-1],
	num_units = 128,
	W=lasagne.init.Orthogonal(0.8),
	b=lasagne.init.Constant(0.),
	nonlinearity=lasagne.nonlinearities.rectify,
	name='G1')]
	G+= [ll.DenseLayer(incoming = G[-1],
	num_units = 128,
	W=lasagne.init.Orthogonal(0.8),
	b=lasagne.init.Constant(0.),
	nonlinearity=lasagne.nonlinearities.rectify,
	name='G2')]
	G+= [ll.DenseLayer(incoming = G[-1],
	num_units = 2,
	W=lasagne.init.Orthogonal(0.8),
	b=lasagne.init.Constant(0.),
	nonlinearity=None,
	name='G_out')]

	D = [ll.InputLayer(shape=(None,2))]
	D+= [ll.DenseLayer(incoming = D[-1],
	num_units = 128,
	W=lasagne.init.Orthogonal(0.8),
	b=lasagne.init.Constant(0.),
	nonlinearity=lasagne.nonlinearities.rectify,
	name='D1')]
	D+= [ll.DenseLayer(incoming = D[-1],
	num_units = 1,
	W=lasagne.init.Orthogonal(0.8),
	b=lasagne.init.Constant(0.),
	nonlinearity=lasagne.nonlinearities.sigmoid,
	name='DO')]

	# Variables
	X = T.TensorType('float32', [False]*2)('X')
	X_shared = lasagne.utils.shared_empty(2, dtype='float32')


	# Outputs
	Xh = ll.get_output(G[-1]) # G output
	p_X = ll.get_output(D[-1],X/4) # D(X)
	p_Xh = ll.get_output(D[-1],Xh/4) # D(G(Z))

	# Params
	learning_rate = 1e-4;
	beta1 = 0.5;

	L_Dg = T.nnet.binary_crossentropy(T.clip( p_Xh , 1e-7, 1.0 - 1e-7), T.zeros(p_Xh.shape))
	L_Dd = T.nnet.binary_crossentropy(T.clip( p_X , 1e-7, 1.0 - 1e-7), T.ones(p_X.shape))
	L_G1 = T.nnet.binary_crossentropy(T.clip( p_Xh, 1e-7, 1.0 - 1e-7), T.ones(p_Xh.shape))
	# Get D Updates for use in unrolling
	D_updates = lasagne.updates.adam(T.mean(L_Dg+L_Dd),ll.get_all_params(D[-1],trainable=True),learning_rate,beta1=beta1)

	# Get L_G for second step
	# L_G = theano.clone(L_G1,replace = D_updates)
	def fprop(X,Xh):
	# p_X = ll.get_output(D[-1],X) # D(X)
	# p_Xh = ll.get_output(D[-1],Xh) # D(G(Z))
	# L_Dg = T.nnet.binary_crossentropy(T.clip( p_Xh , 1e-7, 1.0 - 1e-7), T.zeros(p_Xh.shape))
	# L_Dd = T.nnet.binary_crossentropy(T.clip( p_X , 1e-7, 1.0 - 1e-7), T.ones(p_X.shape))
	# lasagne.updates.adam(T.mean(L_Dg+L_Dd),ll.get_all_params(D[-1],trainable=True),learning_rate,beta1=beta1)

	# return replace = D_updates
	return [L_G1, D_updates]

	# Maybe...
	print('Building G graph...')

	values,updates = theano.scan(fprop,n_steps=10,non_sequences=[X,Xh])
	# Scan and go through all 10 D updates
	# losses = scan...
	# l_G = values[-1]
	L_G = theano.clone(L_G1,replace = updates)
	G_updates = lasagne.updates.adam(T.mean(L_G),ll.get_all_params(G[-1],trainable=True),learning_rate,beta1=beta1)

	print('Compiling Discriminator Function...')
	Dfn = theano.function([batch_index],T.grad(T.mean(L_G1),Xh),updates=D_updates,givens={X: X_shared[batch_slice]})
	Dgd = theano.function([X],L_Dd)
	print('Compiling Generator Function...')
	Gfn = theano.function([batch_index],Xh,updates=G_updates,givens={X: X_shared[batch_slice]})

	# Define the gaussian mixture
	r = 2
	thetas = np.linspace(0,2*pi-pi/4,8)
	means = np.asarray([rnp.cos(thetas),rnp.sin(thetas)]).transpose()
	variances = [np.diag(0.02np.ones(2))]8

	# Contour Plot Limits and Resolution
	delt = 0.1
	axlim = 4.5
	[Xx,Yy] = np.meshgrid(np.arange(-axlim,axlim,delt),np.arange(-axlim,axlim,delt))

	print 'running...'

	batch_index = 0
	num_batches = 50

	X_shared.set_value(np.float32(GMM(centroids=means,ccov = variances,samples = batch_size*num_batches)))

	for i in range(25000):

	if not batch_index % num_batches:
	batch_index = 0
	X_shared.set_value(np.float32(GMM(centroids=means,ccov = variances,samples = batch_size*num_batches)))


	if i%2:
	grads = Dfn(batch_index)
	batch_index += 1
	else:
	Xs = Gfn(batch_index)
	if not i%20:
	plt.scatter(Xs[:,0],Xs[:,1])
	gds = Dgd(np.float32(np.asarray([Xx.flatten(),Yy.flatten()])).transpose())

	plt.contour(Xx,Yy,np.reshape(gds,np.shape(Xx)))
	plt.axis([-axlim,axlim,-axlim,axlim])
	plt.title('Step # '+str(i))
	plt.pause(0.000001)
	plt.clf()