pmolodo/test_tanh_crossentropy.py

## test_tanh_crossentropy.py
'''Tests the performance of a cross-entropy-like cost function, designed for use with tanh activations.

This simple test is attempting to emulate the result of a simple 2D-function:

f(x,y) = tanh(10 * (y - gaussian(x))

My test results show a nearly 2x performance increase after 1000 iterations (judged by mae) vs using MSE as a cost function.

See this post for more info: https://stats.stackexchange.com/questions/221901/can-the-cross-entropy-cost-function-be-used-with-tanh/329921#329921
'''

import keras.backend as K
from keras.models import Model
from keras.layers import Input, Dense, Activation, Dropout
from keras.optimizers import Adam, SGD
import numpy as np
from scipy.stats import norm
from tqdm import tqdm
import cv2

import itertools
import os
import time

X_MIN = -1.0
X_MAX = 1.0

BATCH_SIZE = 400
TRAIN_TIME = 5.0
IMAGE_RES = 100
NUM_MODELS_TO_AVERAGE = 20

WEIGHTS = os.path.expanduser('~/Desktop/test_gauss_weights.h5')

gaussian = norm.pdf

def tanh_cross_entropy(y_true, y_pred):
    epsilon = 1.0e-9
    y = y_true
    a = K.clip(y_pred, -1.0 + epsilon, 1.0 - epsilon)
    return K.mean(-.5 * ((1.0 - y) * K.log(1.0 - a) + (1.0 + y) * K.log(1.0 + a)) + np.log(2),
                  axis=-1)

def createModel(loadWeights=False, loss=tanh_cross_entropy):
    inp = Input(shape=(2,))
    x = inp
    x = Dense(100, activation='tanh')(x)
    x = Dense(100, activation='tanh')(x)
    gaussdist_out = Dense(1, activation='tanh')(x)

    gauss_model = Model(inp, gaussdist_out)
    optimizer = Adam()
    gauss_model.compile(loss=loss, optimizer=optimizer, metrics=['mae'])
    if loadWeights and os.path.isfile(WEIGHTS):
        gauss_model.load_weights(WEIGHTS)
        print("Loaded weights!!!!!!")
    return gauss_model

def batch_iterator(batch_size):
    for i in itertools.count():
        # get a bunch of random (x,y) values, x in [X_MIN, X_MAX) range,
        # y in [0, 1)
        X = np.random.rand(batch_size, 2)
        X[..., 0] *= X_MAX - X_MIN
        X[..., 0] += X_MIN
        yield i, X

def true_values(XYs):
    Xs = XYs[..., 0]
    Ys = XYs[..., 1]
    result = 10 * (Ys - gaussian(Xs))
    return np.tanh(result)

def trainModel(model, saveWeights=False):
    batches = batch_iterator(BATCH_SIZE)
    with tqdm(total=TRAIN_TIME, unit='s',
              bar_format='{l_bar}{bar}| {n:.1f}/{total_fmt}s [{postfix}]') \
            as updater:
        i = 0
        start = time.time()
        while time.time() - start < TRAIN_TIME:
            i += 1
            batch = next(batches)[1]
            truth = true_values(batch)
            loss = model.train_on_batch(batch, truth)
            updater.set_postfix(loss=loss[0], mae=loss[1], it=i)
            elapsed = min(time.time() - start, TRAIN_TIME)
            updater.update(elapsed - updater.n)
    if saveWeights:
        model.save_weights(WEIGHTS)
    return model

def getTestGrid():
    XYs = np.dstack(np.meshgrid(np.linspace(X_MIN, X_MAX, IMAGE_RES),
                                   np.linspace(1, 0, IMAGE_RES))).reshape(-1, 2)
    truth = true_values(XYs)
    return XYs, truth

def viewResult(model):
    # view result
    XYs, truth = getTestGrid()
    predicted = model.predict(XYs, BATCH_SIZE)

    # get these in 0 to 1 range, instead of -1 to 1
    def to0To1(vals):
        return (vals + 1.0) / 2.0
    truth = to0To1(truth)
    predicted = to0To1(predicted)

    predicted = predicted.reshape(truth.shape)
    mae = np.mean(np.abs(truth - predicted))
    print(f"image test sample mae: {mae}")


    image = np.ndarray((2*IMAGE_RES, IMAGE_RES), dtype='float32')
    truth_image = image[:IMAGE_RES]
    predicted_image = image[IMAGE_RES:]

    truth_image.flat = truth.flat
    predicted_image.flat = predicted.flat
    print(np.amin(truth_image.flat), np.amax(truth_image.flat))
    print(np.amin(predicted_image.flat), np.amax(predicted_image.flat))

    cv2.imshow("my window", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    return image

def doTrialRuns(numRuns, loss):
    XYs, truth = getTestGrid()
    maes = []
    for i in range(numRuns):
        model = trainModel(createModel(loss=loss))
        predicted = model.predict(XYs, BATCH_SIZE)
        predicted = predicted.reshape((-1,))
        mae = np.mean(np.abs(truth - predicted))
        print(f"image test sample mae: {mae}")
        maes.append(mae)
    print("{} maes:".format(loss))
    for mae in maes:
        print(mae)
    print("average: {}".format(np.mean(maes)))
    return maes

doTrialRuns(NUM_MODELS_TO_AVERAGE, 'mse')
doTrialRuns(NUM_MODELS_TO_AVERAGE, tanh_cross_entropy)

#viewResult(trainModel(createModel()))
	'''Tests the performance of a cross-entropy-like cost function, designed for use with tanh activations.

	This simple test is attempting to emulate the result of a simple 2D-function:

	f(x,y) = tanh(10 * (y - gaussian(x))

	My test results show a nearly 2x performance increase after 1000 iterations (judged by mae) vs using MSE as a cost function.

	See this post for more info: https://stats.stackexchange.com/questions/221901/can-the-cross-entropy-cost-function-be-used-with-tanh/329921#329921
	'''

	import keras.backend as K
	from keras.models import Model
	from keras.layers import Input, Dense, Activation, Dropout
	from keras.optimizers import Adam, SGD
	import numpy as np
	from scipy.stats import norm
	from tqdm import tqdm
	import cv2

	import itertools
	import os
	import time

	X_MIN = -1.0
	X_MAX = 1.0

	BATCH_SIZE = 400
	TRAIN_TIME = 5.0
	IMAGE_RES = 100
	NUM_MODELS_TO_AVERAGE = 20

	WEIGHTS = os.path.expanduser('~/Desktop/test_gauss_weights.h5')

	gaussian = norm.pdf

	def tanh_cross_entropy(y_true, y_pred):
	epsilon = 1.0e-9
	y = y_true
	a = K.clip(y_pred, -1.0 + epsilon, 1.0 - epsilon)
	return K.mean(-.5 * ((1.0 - y) * K.log(1.0 - a) + (1.0 + y) * K.log(1.0 + a)) + np.log(2),
	axis=-1)

	def createModel(loadWeights=False, loss=tanh_cross_entropy):
	inp = Input(shape=(2,))
	x = inp
	x = Dense(100, activation='tanh')(x)
	x = Dense(100, activation='tanh')(x)
	gaussdist_out = Dense(1, activation='tanh')(x)

	gauss_model = Model(inp, gaussdist_out)
	optimizer = Adam()
	gauss_model.compile(loss=loss, optimizer=optimizer, metrics=['mae'])
	if loadWeights and os.path.isfile(WEIGHTS):
	gauss_model.load_weights(WEIGHTS)
	print("Loaded weights!!!!!!")
	return gauss_model

	def batch_iterator(batch_size):
	for i in itertools.count():
	# get a bunch of random (x,y) values, x in [X_MIN, X_MAX) range,
	# y in [0, 1)
	X = np.random.rand(batch_size, 2)
	X[..., 0] *= X_MAX - X_MIN
	X[..., 0] += X_MIN
	yield i, X

	def true_values(XYs):
	Xs = XYs[..., 0]
	Ys = XYs[..., 1]
	result = 10 * (Ys - gaussian(Xs))
	return np.tanh(result)

	def trainModel(model, saveWeights=False):
	batches = batch_iterator(BATCH_SIZE)
	with tqdm(total=TRAIN_TIME, unit='s',
	bar_format='{l_bar}{bar}\| {n:.1f}/{total_fmt}s [{postfix}]') \
	as updater:
	i = 0
	start = time.time()
	while time.time() - start < TRAIN_TIME:
	i += 1
	batch = next(batches)[1]
	truth = true_values(batch)
	loss = model.train_on_batch(batch, truth)
	updater.set_postfix(loss=loss[0], mae=loss[1], it=i)
	elapsed = min(time.time() - start, TRAIN_TIME)
	updater.update(elapsed - updater.n)
	if saveWeights:
	model.save_weights(WEIGHTS)
	return model

	def getTestGrid():
	XYs = np.dstack(np.meshgrid(np.linspace(X_MIN, X_MAX, IMAGE_RES),
	np.linspace(1, 0, IMAGE_RES))).reshape(-1, 2)
	truth = true_values(XYs)
	return XYs, truth

	def viewResult(model):
	# view result
	XYs, truth = getTestGrid()
	predicted = model.predict(XYs, BATCH_SIZE)

	# get these in 0 to 1 range, instead of -1 to 1
	def to0To1(vals):
	return (vals + 1.0) / 2.0
	truth = to0To1(truth)
	predicted = to0To1(predicted)

	predicted = predicted.reshape(truth.shape)
	mae = np.mean(np.abs(truth - predicted))
	print(f"image test sample mae: {mae}")


	image = np.ndarray((2*IMAGE_RES, IMAGE_RES), dtype='float32')
	truth_image = image[:IMAGE_RES]
	predicted_image = image[IMAGE_RES:]

	truth_image.flat = truth.flat
	predicted_image.flat = predicted.flat
	print(np.amin(truth_image.flat), np.amax(truth_image.flat))
	print(np.amin(predicted_image.flat), np.amax(predicted_image.flat))

	cv2.imshow("my window", image)
	cv2.waitKey(0)
	cv2.destroyAllWindows()

	return image

	def doTrialRuns(numRuns, loss):
	XYs, truth = getTestGrid()
	maes = []
	for i in range(numRuns):
	model = trainModel(createModel(loss=loss))
	predicted = model.predict(XYs, BATCH_SIZE)
	predicted = predicted.reshape((-1,))
	mae = np.mean(np.abs(truth - predicted))
	print(f"image test sample mae: {mae}")
	maes.append(mae)
	print("{} maes:".format(loss))
	for mae in maes:
	print(mae)
	print("average: {}".format(np.mean(maes)))
	return maes

	doTrialRuns(NUM_MODELS_TO_AVERAGE, 'mse')
	doTrialRuns(NUM_MODELS_TO_AVERAGE, tanh_cross_entropy)

	#viewResult(trainModel(createModel()))