Last active
March 1, 2018 21:11
-
-
Save pmolodo/a016230229322f3ba2b46a99ff0b4e7c to your computer and use it in GitHub Desktop.
A cross-entropy function for tanh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''Tests the performance of a cross-entropy-like cost function, designed for use with tanh activations. | |
This simple test is attempting to emulate the result of a simple 2D-function: | |
f(x,y) = tanh(10 * (y - gaussian(x)) | |
My test results show a nearly 2x performance increase after 1000 iterations (judged by mae) vs using MSE as a cost function. | |
See this post for more info: https://stats.stackexchange.com/questions/221901/can-the-cross-entropy-cost-function-be-used-with-tanh/329921#329921 | |
''' | |
import keras.backend as K | |
from keras.models import Model | |
from keras.layers import Input, Dense, Activation, Dropout | |
from keras.optimizers import Adam, SGD | |
import numpy as np | |
from scipy.stats import norm | |
from tqdm import tqdm | |
import cv2 | |
import itertools | |
import os | |
import time | |
X_MIN = -1.0 | |
X_MAX = 1.0 | |
BATCH_SIZE = 400 | |
TRAIN_TIME = 5.0 | |
IMAGE_RES = 100 | |
NUM_MODELS_TO_AVERAGE = 20 | |
WEIGHTS = os.path.expanduser('~/Desktop/test_gauss_weights.h5') | |
gaussian = norm.pdf | |
def tanh_cross_entropy(y_true, y_pred): | |
epsilon = 1.0e-9 | |
y = y_true | |
a = K.clip(y_pred, -1.0 + epsilon, 1.0 - epsilon) | |
return K.mean(-.5 * ((1.0 - y) * K.log(1.0 - a) + (1.0 + y) * K.log(1.0 + a)) + np.log(2), | |
axis=-1) | |
def createModel(loadWeights=False, loss=tanh_cross_entropy): | |
inp = Input(shape=(2,)) | |
x = inp | |
x = Dense(100, activation='tanh')(x) | |
x = Dense(100, activation='tanh')(x) | |
gaussdist_out = Dense(1, activation='tanh')(x) | |
gauss_model = Model(inp, gaussdist_out) | |
optimizer = Adam() | |
gauss_model.compile(loss=loss, optimizer=optimizer, metrics=['mae']) | |
if loadWeights and os.path.isfile(WEIGHTS): | |
gauss_model.load_weights(WEIGHTS) | |
print("Loaded weights!!!!!!") | |
return gauss_model | |
def batch_iterator(batch_size): | |
for i in itertools.count(): | |
# get a bunch of random (x,y) values, x in [X_MIN, X_MAX) range, | |
# y in [0, 1) | |
X = np.random.rand(batch_size, 2) | |
X[..., 0] *= X_MAX - X_MIN | |
X[..., 0] += X_MIN | |
yield i, X | |
def true_values(XYs): | |
Xs = XYs[..., 0] | |
Ys = XYs[..., 1] | |
result = 10 * (Ys - gaussian(Xs)) | |
return np.tanh(result) | |
def trainModel(model, saveWeights=False): | |
batches = batch_iterator(BATCH_SIZE) | |
with tqdm(total=TRAIN_TIME, unit='s', | |
bar_format='{l_bar}{bar}| {n:.1f}/{total_fmt}s [{postfix}]') \ | |
as updater: | |
i = 0 | |
start = time.time() | |
while time.time() - start < TRAIN_TIME: | |
i += 1 | |
batch = next(batches)[1] | |
truth = true_values(batch) | |
loss = model.train_on_batch(batch, truth) | |
updater.set_postfix(loss=loss[0], mae=loss[1], it=i) | |
elapsed = min(time.time() - start, TRAIN_TIME) | |
updater.update(elapsed - updater.n) | |
if saveWeights: | |
model.save_weights(WEIGHTS) | |
return model | |
def getTestGrid(): | |
XYs = np.dstack(np.meshgrid(np.linspace(X_MIN, X_MAX, IMAGE_RES), | |
np.linspace(1, 0, IMAGE_RES))).reshape(-1, 2) | |
truth = true_values(XYs) | |
return XYs, truth | |
def viewResult(model): | |
# view result | |
XYs, truth = getTestGrid() | |
predicted = model.predict(XYs, BATCH_SIZE) | |
# get these in 0 to 1 range, instead of -1 to 1 | |
def to0To1(vals): | |
return (vals + 1.0) / 2.0 | |
truth = to0To1(truth) | |
predicted = to0To1(predicted) | |
predicted = predicted.reshape(truth.shape) | |
mae = np.mean(np.abs(truth - predicted)) | |
print(f"image test sample mae: {mae}") | |
image = np.ndarray((2*IMAGE_RES, IMAGE_RES), dtype='float32') | |
truth_image = image[:IMAGE_RES] | |
predicted_image = image[IMAGE_RES:] | |
truth_image.flat = truth.flat | |
predicted_image.flat = predicted.flat | |
print(np.amin(truth_image.flat), np.amax(truth_image.flat)) | |
print(np.amin(predicted_image.flat), np.amax(predicted_image.flat)) | |
cv2.imshow("my window", image) | |
cv2.waitKey(0) | |
cv2.destroyAllWindows() | |
return image | |
def doTrialRuns(numRuns, loss): | |
XYs, truth = getTestGrid() | |
maes = [] | |
for i in range(numRuns): | |
model = trainModel(createModel(loss=loss)) | |
predicted = model.predict(XYs, BATCH_SIZE) | |
predicted = predicted.reshape((-1,)) | |
mae = np.mean(np.abs(truth - predicted)) | |
print(f"image test sample mae: {mae}") | |
maes.append(mae) | |
print("{} maes:".format(loss)) | |
for mae in maes: | |
print(mae) | |
print("average: {}".format(np.mean(maes))) | |
return maes | |
doTrialRuns(NUM_MODELS_TO_AVERAGE, 'mse') | |
doTrialRuns(NUM_MODELS_TO_AVERAGE, tanh_cross_entropy) | |
#viewResult(trainModel(createModel())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment