Skip to content

Instantly share code, notes, and snippets.

@ajbrock
Created December 22, 2016 01:29
Show Gist options
  • Save ajbrock/de01df65430748f36cb3c2d303e696c8 to your computer and use it in GitHub Desktop.
Save ajbrock/de01df65430748f36cb3c2d303e696c8 to your computer and use it in GitHub Desktop.
## Unrolled GAN
# A Brock, 2016
# This code implements the toy experiment for unrolled GANs.
# TODO: Make shared variables and reduce the memory transfer overhead
# Imports
import numpy as np
from numpy import pi
import theano
import theano.tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams
import lasagne
import lasagne.layers as ll
from pypr.clustering.gmm import sample_gaussian_mixture as GMM
import matplotlib.pyplot as plt
# Todo: add import gaussian mixture
batch_size = 150
batch_index = T.iscalar('batch_index')
batch_slice = slice(batch_index*batch_size, (batch_index+1)*batch_size)
# Set up generator and fixed random seeds
# rng_data = np.random.RandomState(args.seed_data)
rng = np.random.RandomState(42)
theano_rng = MRG_RandomStreams(rng.randint(2 ** 15))
lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 15)))
# load CIFAR-10
# specify generative model
noise_dim = (batch_size, 256)
noise = theano_rng.normal(size=noise_dim)
G = [ll.InputLayer(shape=noise_dim, input_var=noise)]
G+= [ll.DenseLayer(incoming = G[-1],
num_units = 128,
W=lasagne.init.Orthogonal(0.8),
b=lasagne.init.Constant(0.),
nonlinearity=lasagne.nonlinearities.rectify,
name='G1')]
G+= [ll.DenseLayer(incoming = G[-1],
num_units = 128,
W=lasagne.init.Orthogonal(0.8),
b=lasagne.init.Constant(0.),
nonlinearity=lasagne.nonlinearities.rectify,
name='G2')]
G+= [ll.DenseLayer(incoming = G[-1],
num_units = 2,
W=lasagne.init.Orthogonal(0.8),
b=lasagne.init.Constant(0.),
nonlinearity=None,
name='G_out')]
D = [ll.InputLayer(shape=(None,2))]
D+= [ll.DenseLayer(incoming = D[-1],
num_units = 128,
W=lasagne.init.Orthogonal(0.8),
b=lasagne.init.Constant(0.),
nonlinearity=lasagne.nonlinearities.rectify,
name='D1')]
D+= [ll.DenseLayer(incoming = D[-1],
num_units = 1,
W=lasagne.init.Orthogonal(0.8),
b=lasagne.init.Constant(0.),
nonlinearity=lasagne.nonlinearities.sigmoid,
name='DO')]
# Variables
X = T.TensorType('float32', [False]*2)('X')
X_shared = lasagne.utils.shared_empty(2, dtype='float32')
# Outputs
Xh = ll.get_output(G[-1]) # G output
p_X = ll.get_output(D[-1],X/4) # D(X)
p_Xh = ll.get_output(D[-1],Xh/4) # D(G(Z))
# Params
learning_rate = 1e-4;
beta1 = 0.5;
L_Dg = T.nnet.binary_crossentropy(T.clip( p_Xh , 1e-7, 1.0 - 1e-7), T.zeros(p_Xh.shape))
L_Dd = T.nnet.binary_crossentropy(T.clip( p_X , 1e-7, 1.0 - 1e-7), T.ones(p_X.shape))
L_G1 = T.nnet.binary_crossentropy(T.clip( p_Xh, 1e-7, 1.0 - 1e-7), T.ones(p_Xh.shape))
# Get D Updates for use in unrolling
D_updates = lasagne.updates.adam(T.mean(L_Dg+L_Dd),ll.get_all_params(D[-1],trainable=True),learning_rate,beta1=beta1)
# Get L_G for second step
# L_G = theano.clone(L_G1,replace = D_updates)
def fprop(X,Xh):
# p_X = ll.get_output(D[-1],X) # D(X)
# p_Xh = ll.get_output(D[-1],Xh) # D(G(Z))
# L_Dg = T.nnet.binary_crossentropy(T.clip( p_Xh , 1e-7, 1.0 - 1e-7), T.zeros(p_Xh.shape))
# L_Dd = T.nnet.binary_crossentropy(T.clip( p_X , 1e-7, 1.0 - 1e-7), T.ones(p_X.shape))
# lasagne.updates.adam(T.mean(L_Dg+L_Dd),ll.get_all_params(D[-1],trainable=True),learning_rate,beta1=beta1)
# return replace = D_updates
return [L_G1, D_updates]
# Maybe...
print('Building G graph...')
values,updates = theano.scan(fprop,n_steps=10,non_sequences=[X,Xh])
# Scan and go through all 10 D updates
# losses = scan...
# l_G = values[-1]
L_G = theano.clone(L_G1,replace = updates)
G_updates = lasagne.updates.adam(T.mean(L_G),ll.get_all_params(G[-1],trainable=True),learning_rate,beta1=beta1)
print('Compiling Discriminator Function...')
Dfn = theano.function([batch_index],T.grad(T.mean(L_G1),Xh),updates=D_updates,givens={X: X_shared[batch_slice]})
Dgd = theano.function([X],L_Dd)
print('Compiling Generator Function...')
Gfn = theano.function([batch_index],Xh,updates=G_updates,givens={X: X_shared[batch_slice]})
# Define the gaussian mixture
r = 2
thetas = np.linspace(0,2*pi-pi/4,8)
means = np.asarray([r*np.cos(thetas),r*np.sin(thetas)]).transpose()
variances = [np.diag(0.02*np.ones(2))]*8
# Contour Plot Limits and Resolution
delt = 0.1
axlim = 4.5
[Xx,Yy] = np.meshgrid(np.arange(-axlim,axlim,delt),np.arange(-axlim,axlim,delt))
print 'running...'
batch_index = 0
num_batches = 50
X_shared.set_value(np.float32(GMM(centroids=means,ccov = variances,samples = batch_size*num_batches)))
for i in range(25000):
if not batch_index % num_batches:
batch_index = 0
X_shared.set_value(np.float32(GMM(centroids=means,ccov = variances,samples = batch_size*num_batches)))
if i%2:
grads = Dfn(batch_index)
batch_index += 1
else:
Xs = Gfn(batch_index)
if not i%20:
plt.scatter(Xs[:,0],Xs[:,1])
gds = Dgd(np.float32(np.asarray([Xx.flatten(),Yy.flatten()])).transpose())
plt.contour(Xx,Yy,np.reshape(gds,np.shape(Xx)))
plt.axis([-axlim,axlim,-axlim,axlim])
plt.title('Step # '+str(i))
plt.pause(0.000001)
plt.clf()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment