stokasto/catgan_objective_illustration.py

## catgan_objective_illustration.py
import numpy as np

def class_entropy(X):
    class_prob = np.mean(X, axis = 0)
    return -np.sum(class_prob * np.log(class_prob + 1e-4))

def cond_entropy(X):
    return -np.mean(np.sum(X * np.log(X + 1e-4), axis=1))

def cross_entropy(X, Y):
    return -np.mean(np.sum(Y * np.log(X + 1e-4), axis=1))

def catgan_cost(xp, xn, weight_gen=1.):
    class_ent_pos = class_entropy(xp)
    class_ent_neg = class_entropy(xn)
    cond_ent_pos = cond_entropy(xp)
    cond_ent_neg = cond_entropy(xn)
    Dcost = class_ent_pos - cond_ent_pos + weight_gen *  cond_ent_neg
    Gcost = -class_ent_neg + cond_ent_neg
    return Dcost, Gcost

def semi_catgan_cost(xp, xn, xl, yl):
    class_ent_pos = class_entropy(xp)
    class_ent_neg = class_entropy(xn)
    cond_ent_pos = cond_entropy(xp)
    cond_ent_neg = cond_entropy(xn)
    cross_ent_pos = cross_entropy(xl, yl)
    Dcost = cross_ent_pos + (class_ent_pos - cond_ent_pos + cond_ent_neg)
    Gcost = class_ent_neg - cond_ent_neg
    return Dcost, Gcost

def gan_cost(xp, xn):
    return np.mean(np.log(xp)) + np.mean(np.log(1. - xn))

# In the following We assume the following situation:
#   1. We have 10 unlabeled examples belonging to two classes, i.e. 5 belong to class 1 and 5 belong to class 2
#      (but obviously we assume no knowledge about the true assignemnt of examples to classes)
#   2. We evaluate the objective function on all of these 10 examples and the
#      generator gets to generate an additional 10 examples which are used for calculating the
#      cost terms involving the generator (Note that as mentioned in the rebuttal we could also let
#      the generator generate less examples, which would lead to an even clearer result in this script)
#   We further assume that the generator has learned to perfectly reconstruct the 10
#   training examples.
# It should be noted that this is obviously not a proof in any sens and only considers the situation
# mentioned by the reviewer.

# Assume the discriminator assigns equal probability to both classes
# for the real examples and (as the generated examples are assumed to be indistinguishable from
# the real examples) also for the generated examples
pred_real = np.array([0.5, 0.5] * 10).reshape(10, 2)
pred_gen = np.copy(pred_real)
print("CatGAN")
print("------")
print("> CatGAN cost for 10 examples and 10 generated examples that are perfect reconstructions")
print("> assuming that the discriminator assigns probability 0.5 to both classes")
Dcost, Gcost = catgan_cost(pred_real, pred_gen)
print("\t Discriminator objective (maximize this) = {}, generator objective (minimize this) = {}".format(Dcost, Gcost))


# Next assume the discriminator assigns high probability for class 1 to
# 5 examples and for class 2 to the other 5
pred_real = np.array([0.1, 0.9] * 5 + [0.9, 0.1] * 5).reshape(10,2)
pred_gen = np.copy(pred_real)
print("> CatGAN cost for 10 examples and 10 generated examples that are perfect reconstructions:")
print("> assuming that the discriminator assigns probability 0.9 to the 'correct' classes")
Dcost, Gcost = catgan_cost(pred_real, pred_gen)
print("\t Discriminator objective (maximize this) = {}, generator objective (minimize this) = {}".format(Dcost, Gcost))

print("!! As expected there is no difference between the objective function values for the discriminator")
print("!! but the generator prefers the second configuration.")
print("")
print("")
print("NOTE: If we re-weight the real and generated parts of the objective function we can obviously get different behaviour,")
print("      making sure that the discriminator also prefers the second case.")


pred_real = np.array([0.5, 0.5] * 10).reshape(10, 2)
pred_gen = np.copy(pred_real)
print("> CatGAN cost for 16 examples and 10 generated examples that are perfect reconstructions")
print("> assuming that the discriminator assigns probability 0.5 to both classes")
Dcost, Gcost = catgan_cost(pred_real, pred_gen, weight_gen=0.75)
print("\t Discriminator objective (maximize this) = {}, generator objective (minimize this) = {}".format(Dcost, Gcost))

pred_real = np.array([0.1, 0.9] * 5 + [0.9, 0.1] * 5).reshape(10,2)
pred_gen = np.copy(pred_real)
print("> CatGAN cost for 16 examples and 10 generated examples that are perfect reconstructions:")
print("> assuming that the discriminator assigns probability 0.9 to the 'correct' classes")
Dcost, Gcost = catgan_cost(pred_real, pred_gen, weight_gen=0.75)
print("\t Discriminator objective (maximize this) = {}, generator objective (minimize this) = {}".format(Dcost, Gcost))
print("")

print("Semi-supervised CatGAN")
print("----------------------")
print("Using the same setup as above but adding labels to 4 of the examples we get")

pred_real = np.array([0.5, 0.5] * 10).reshape(10, 2)
pred_gen = np.copy(pred_real)
# assume labeles for examples 0,1,5,6 are given and they are [2, 2, 1, 1]
pred_labeled = np.vstack([pred_real[:2, :], pred_real[5:7, :]])
true_labels_onehot = np.array([[0, 1.], [0, 1.], [1., 0], [1., 0]])
print("> Semi-supervised CatGAN cost for 10 examples and 10 generated examples that are perfect reconstructions")
print("> assuming that the discriminator assigns probability 0.5 to both classes")
Dcost, Gcost = catgan_cost(pred_real, pred_gen)
print("\t Discriminator objective (maximize this) = {}, generator objective (minimize this) = {}".format(Dcost, Gcost))


pred_real = np.array([0.1, 0.9] * 5 + [0.9, 0.1] * 5).reshape(10,2)
pred_gen = np.copy(pred_real)
# assume labeles for examples 0,1,5,6 are given and they are [2, 2, 1, 1]
pred_labeled = np.vstack([pred_real[:2, :], pred_real[5:7, :]])
true_labels_onehot = np.array([[0, 1.], [0, 1.], [1., 0], [1., 0]])
print("> Semi-supervised CatGAN cost for 10 examples and 10 generated examples that are perfect reconstructions:")
print("> assuming that the discriminator assigns probability 0.9 to the 'correct' classes")
Dcost, Gcost = semi_catgan_cost(pred_real, pred_gen, pred_labeled, true_labels_onehot)
print("\t Discriminator objective (maximize this) = {}, generator objective (minimize this) = {}".format(Dcost, Gcost))

print("!! In this case again, naturaly the second case leads to better values of the objective functions.")
print("")

print("GAN")
print("---")
print("For a simple GAN using a similar setup we have (assuming 10 generated and 10 real examples, but no notion of classes)")

#
dist = [0.5] * 10
pred_pos = np.array(dist)
pred_neg = np.array(dist)

print("> GAN cost for 10 examples and 10 generated examples that are perfect reconstructions:")
print("> assuming that the discriminator accepts all examples as real with probability 0.5")
print("\t GAN objective (maximize this for the discriminator / minimize for generator) = {}".format(gan_cost(pred_pos, pred_neg)))

dist = [0.9] * 10
pred_pos = np.array(dist)
pred_neg = np.array(dist)
print("> GAN cost for 10 examples and 10 generated examples that are perfect reconstructions:")
print("> assuming that the discriminator accepts all examples as real with probability 0.9")
print("\t GAN objective (maximize this for the discriminator / minimize for generator) = {}".format(gan_cost(pred_pos, pred_neg)))
print("!! Here the discriminator clearly prefers the first setting, as was also proven in the GAN paper.")
	import numpy as np

	def class_entropy(X):
	class_prob = np.mean(X, axis = 0)
	return -np.sum(class_prob * np.log(class_prob + 1e-4))

	def cond_entropy(X):
	return -np.mean(np.sum(X * np.log(X + 1e-4), axis=1))

	def cross_entropy(X, Y):
	return -np.mean(np.sum(Y * np.log(X + 1e-4), axis=1))

	def catgan_cost(xp, xn, weight_gen=1.):
	class_ent_pos = class_entropy(xp)
	class_ent_neg = class_entropy(xn)
	cond_ent_pos = cond_entropy(xp)
	cond_ent_neg = cond_entropy(xn)
	Dcost = class_ent_pos - cond_ent_pos + weight_gen * cond_ent_neg
	Gcost = -class_ent_neg + cond_ent_neg
	return Dcost, Gcost

	def semi_catgan_cost(xp, xn, xl, yl):
	class_ent_pos = class_entropy(xp)
	class_ent_neg = class_entropy(xn)
	cond_ent_pos = cond_entropy(xp)
	cond_ent_neg = cond_entropy(xn)
	cross_ent_pos = cross_entropy(xl, yl)
	Dcost = cross_ent_pos + (class_ent_pos - cond_ent_pos + cond_ent_neg)
	Gcost = class_ent_neg - cond_ent_neg
	return Dcost, Gcost

	def gan_cost(xp, xn):
	return np.mean(np.log(xp)) + np.mean(np.log(1. - xn))

	# In the following We assume the following situation:
	# 1. We have 10 unlabeled examples belonging to two classes, i.e. 5 belong to class 1 and 5 belong to class 2
	# (but obviously we assume no knowledge about the true assignemnt of examples to classes)
	# 2. We evaluate the objective function on all of these 10 examples and the
	# generator gets to generate an additional 10 examples which are used for calculating the
	# cost terms involving the generator (Note that as mentioned in the rebuttal we could also let
	# the generator generate less examples, which would lead to an even clearer result in this script)
	# We further assume that the generator has learned to perfectly reconstruct the 10
	# training examples.
	# It should be noted that this is obviously not a proof in any sens and only considers the situation
	# mentioned by the reviewer.

	# Assume the discriminator assigns equal probability to both classes
	# for the real examples and (as the generated examples are assumed to be indistinguishable from
	# the real examples) also for the generated examples
	pred_real = np.array([0.5, 0.5] * 10).reshape(10, 2)
	pred_gen = np.copy(pred_real)
	print("CatGAN")
	print("------")
	print("> CatGAN cost for 10 examples and 10 generated examples that are perfect reconstructions")
	print("> assuming that the discriminator assigns probability 0.5 to both classes")
	Dcost, Gcost = catgan_cost(pred_real, pred_gen)
	print("\t Discriminator objective (maximize this) = {}, generator objective (minimize this) = {}".format(Dcost, Gcost))


	# Next assume the discriminator assigns high probability for class 1 to
	# 5 examples and for class 2 to the other 5
	pred_real = np.array([0.1, 0.9] * 5 + [0.9, 0.1] * 5).reshape(10,2)
	pred_gen = np.copy(pred_real)
	print("> CatGAN cost for 10 examples and 10 generated examples that are perfect reconstructions:")
	print("> assuming that the discriminator assigns probability 0.9 to the 'correct' classes")
	Dcost, Gcost = catgan_cost(pred_real, pred_gen)
	print("\t Discriminator objective (maximize this) = {}, generator objective (minimize this) = {}".format(Dcost, Gcost))

	print("!! As expected there is no difference between the objective function values for the discriminator")
	print("!! but the generator prefers the second configuration.")
	print("")
	print("")
	print("NOTE: If we re-weight the real and generated parts of the objective function we can obviously get different behaviour,")
	print(" making sure that the discriminator also prefers the second case.")


	pred_real = np.array([0.5, 0.5] * 10).reshape(10, 2)
	pred_gen = np.copy(pred_real)
	print("> CatGAN cost for 16 examples and 10 generated examples that are perfect reconstructions")
	print("> assuming that the discriminator assigns probability 0.5 to both classes")
	Dcost, Gcost = catgan_cost(pred_real, pred_gen, weight_gen=0.75)
	print("\t Discriminator objective (maximize this) = {}, generator objective (minimize this) = {}".format(Dcost, Gcost))

	pred_real = np.array([0.1, 0.9] * 5 + [0.9, 0.1] * 5).reshape(10,2)
	pred_gen = np.copy(pred_real)
	print("> CatGAN cost for 16 examples and 10 generated examples that are perfect reconstructions:")
	print("> assuming that the discriminator assigns probability 0.9 to the 'correct' classes")
	Dcost, Gcost = catgan_cost(pred_real, pred_gen, weight_gen=0.75)
	print("\t Discriminator objective (maximize this) = {}, generator objective (minimize this) = {}".format(Dcost, Gcost))
	print("")

	print("Semi-supervised CatGAN")
	print("----------------------")
	print("Using the same setup as above but adding labels to 4 of the examples we get")

	pred_real = np.array([0.5, 0.5] * 10).reshape(10, 2)
	pred_gen = np.copy(pred_real)
	# assume labeles for examples 0,1,5,6 are given and they are [2, 2, 1, 1]
	pred_labeled = np.vstack([pred_real[:2, :], pred_real[5:7, :]])
	true_labels_onehot = np.array([[0, 1.], [0, 1.], [1., 0], [1., 0]])
	print("> Semi-supervised CatGAN cost for 10 examples and 10 generated examples that are perfect reconstructions")
	print("> assuming that the discriminator assigns probability 0.5 to both classes")
	Dcost, Gcost = catgan_cost(pred_real, pred_gen)
	print("\t Discriminator objective (maximize this) = {}, generator objective (minimize this) = {}".format(Dcost, Gcost))


	pred_real = np.array([0.1, 0.9] * 5 + [0.9, 0.1] * 5).reshape(10,2)
	pred_gen = np.copy(pred_real)
	# assume labeles for examples 0,1,5,6 are given and they are [2, 2, 1, 1]
	pred_labeled = np.vstack([pred_real[:2, :], pred_real[5:7, :]])
	true_labels_onehot = np.array([[0, 1.], [0, 1.], [1., 0], [1., 0]])
	print("> Semi-supervised CatGAN cost for 10 examples and 10 generated examples that are perfect reconstructions:")
	print("> assuming that the discriminator assigns probability 0.9 to the 'correct' classes")
	Dcost, Gcost = semi_catgan_cost(pred_real, pred_gen, pred_labeled, true_labels_onehot)
	print("\t Discriminator objective (maximize this) = {}, generator objective (minimize this) = {}".format(Dcost, Gcost))

	print("!! In this case again, naturaly the second case leads to better values of the objective functions.")
	print("")

	print("GAN")
	print("---")
	print("For a simple GAN using a similar setup we have (assuming 10 generated and 10 real examples, but no notion of classes)")

	#
	dist = [0.5] * 10
	pred_pos = np.array(dist)
	pred_neg = np.array(dist)

	print("> GAN cost for 10 examples and 10 generated examples that are perfect reconstructions:")
	print("> assuming that the discriminator accepts all examples as real with probability 0.5")
	print("\t GAN objective (maximize this for the discriminator / minimize for generator) = {}".format(gan_cost(pred_pos, pred_neg)))

	dist = [0.9] * 10
	pred_pos = np.array(dist)
	pred_neg = np.array(dist)
	print("> GAN cost for 10 examples and 10 generated examples that are perfect reconstructions:")
	print("> assuming that the discriminator accepts all examples as real with probability 0.9")
	print("\t GAN objective (maximize this for the discriminator / minimize for generator) = {}".format(gan_cost(pred_pos, pred_neg)))
	print("!! Here the discriminator clearly prefers the first setting, as was also proven in the GAN paper.")