Skip to content

Instantly share code, notes, and snippets.

@pmolodo
Last active March 1, 2018 21:11
Show Gist options
  • Save pmolodo/a016230229322f3ba2b46a99ff0b4e7c to your computer and use it in GitHub Desktop.
Save pmolodo/a016230229322f3ba2b46a99ff0b4e7c to your computer and use it in GitHub Desktop.
A cross-entropy function for tanh
'''Tests the performance of a cross-entropy-like cost function, designed for use with tanh activations.
This simple test is attempting to emulate the result of a simple 2D-function:
f(x,y) = tanh(10 * (y - gaussian(x))
My test results show a nearly 2x performance increase after 1000 iterations (judged by mae) vs using MSE as a cost function.
See this post for more info: https://stats.stackexchange.com/questions/221901/can-the-cross-entropy-cost-function-be-used-with-tanh/329921#329921
'''
import keras.backend as K
from keras.models import Model
from keras.layers import Input, Dense, Activation, Dropout
from keras.optimizers import Adam, SGD
import numpy as np
from scipy.stats import norm
from tqdm import tqdm
import cv2
import itertools
import os
import time
X_MIN = -1.0
X_MAX = 1.0
BATCH_SIZE = 400
TRAIN_TIME = 5.0
IMAGE_RES = 100
NUM_MODELS_TO_AVERAGE = 20
WEIGHTS = os.path.expanduser('~/Desktop/test_gauss_weights.h5')
gaussian = norm.pdf
def tanh_cross_entropy(y_true, y_pred):
epsilon = 1.0e-9
y = y_true
a = K.clip(y_pred, -1.0 + epsilon, 1.0 - epsilon)
return K.mean(-.5 * ((1.0 - y) * K.log(1.0 - a) + (1.0 + y) * K.log(1.0 + a)) + np.log(2),
axis=-1)
def createModel(loadWeights=False, loss=tanh_cross_entropy):
inp = Input(shape=(2,))
x = inp
x = Dense(100, activation='tanh')(x)
x = Dense(100, activation='tanh')(x)
gaussdist_out = Dense(1, activation='tanh')(x)
gauss_model = Model(inp, gaussdist_out)
optimizer = Adam()
gauss_model.compile(loss=loss, optimizer=optimizer, metrics=['mae'])
if loadWeights and os.path.isfile(WEIGHTS):
gauss_model.load_weights(WEIGHTS)
print("Loaded weights!!!!!!")
return gauss_model
def batch_iterator(batch_size):
for i in itertools.count():
# get a bunch of random (x,y) values, x in [X_MIN, X_MAX) range,
# y in [0, 1)
X = np.random.rand(batch_size, 2)
X[..., 0] *= X_MAX - X_MIN
X[..., 0] += X_MIN
yield i, X
def true_values(XYs):
Xs = XYs[..., 0]
Ys = XYs[..., 1]
result = 10 * (Ys - gaussian(Xs))
return np.tanh(result)
def trainModel(model, saveWeights=False):
batches = batch_iterator(BATCH_SIZE)
with tqdm(total=TRAIN_TIME, unit='s',
bar_format='{l_bar}{bar}| {n:.1f}/{total_fmt}s [{postfix}]') \
as updater:
i = 0
start = time.time()
while time.time() - start < TRAIN_TIME:
i += 1
batch = next(batches)[1]
truth = true_values(batch)
loss = model.train_on_batch(batch, truth)
updater.set_postfix(loss=loss[0], mae=loss[1], it=i)
elapsed = min(time.time() - start, TRAIN_TIME)
updater.update(elapsed - updater.n)
if saveWeights:
model.save_weights(WEIGHTS)
return model
def getTestGrid():
XYs = np.dstack(np.meshgrid(np.linspace(X_MIN, X_MAX, IMAGE_RES),
np.linspace(1, 0, IMAGE_RES))).reshape(-1, 2)
truth = true_values(XYs)
return XYs, truth
def viewResult(model):
# view result
XYs, truth = getTestGrid()
predicted = model.predict(XYs, BATCH_SIZE)
# get these in 0 to 1 range, instead of -1 to 1
def to0To1(vals):
return (vals + 1.0) / 2.0
truth = to0To1(truth)
predicted = to0To1(predicted)
predicted = predicted.reshape(truth.shape)
mae = np.mean(np.abs(truth - predicted))
print(f"image test sample mae: {mae}")
image = np.ndarray((2*IMAGE_RES, IMAGE_RES), dtype='float32')
truth_image = image[:IMAGE_RES]
predicted_image = image[IMAGE_RES:]
truth_image.flat = truth.flat
predicted_image.flat = predicted.flat
print(np.amin(truth_image.flat), np.amax(truth_image.flat))
print(np.amin(predicted_image.flat), np.amax(predicted_image.flat))
cv2.imshow("my window", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
return image
def doTrialRuns(numRuns, loss):
XYs, truth = getTestGrid()
maes = []
for i in range(numRuns):
model = trainModel(createModel(loss=loss))
predicted = model.predict(XYs, BATCH_SIZE)
predicted = predicted.reshape((-1,))
mae = np.mean(np.abs(truth - predicted))
print(f"image test sample mae: {mae}")
maes.append(mae)
print("{} maes:".format(loss))
for mae in maes:
print(mae)
print("average: {}".format(np.mean(maes)))
return maes
doTrialRuns(NUM_MODELS_TO_AVERAGE, 'mse')
doTrialRuns(NUM_MODELS_TO_AVERAGE, tanh_cross_entropy)
#viewResult(trainModel(createModel()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment