Voyz/Keras batch norm with roc_auc_score bug.py

## Keras batch norm with roc_auc_score bug.py
from keras import Input, Model
from keras.callbacks import Callback
from keras.layers import Dense, BatchNormalization, Lambda, Flatten, Dropout
import numpy as np
from sklearn.metrics import roc_auc_score
import tensorflow as tf
from keras import backend as K

# generate random test data
m = 10000
f = 600

X_train = np.random.randn(m, f)
X_test = np.random.randn(m, f)
y_train = np.random.randint(0, 2, (m, 1))
y_test = np.random.randint(0, 2, (m, 1))

input_dim = X_train.shape[1]


# from https://stackoverflow.com/a/46844409/3508719
# roc_auc_score used here in a callback doesn't produce the bug - it only happens when used in custom metric
class roc_callback(Callback):
    def __init__(self,training_data,validation_data):
        self.x = training_data[0]
        self.y = training_data[1]
        self.x_val = validation_data[0]
        self.y_val = validation_data[1]

    def on_epoch_end(self, epoch, logs={}):
        y_pred = self.model.predict(self.x)
        roc = roc_auc_score(self.y, y_pred)
        y_pred_val = self.model.predict(self.x_val)
        roc_val = roc_auc_score(self.y_val, y_pred_val)
        print('\rIn a callback: %s - roc-auc_val: %s' % (str(round(roc,4)),str(round(roc_val,4))),end=100*' '+'\n')
        return


def auc(y_true, y_pred):
    ## Using the sklearn.metrics.roc_auc_score produces the bug
    return tf.py_function(roc_auc_score, (y_true, y_pred), tf.double)


    ## native Tensorflow auc metric - it doesn't produce the bug, yet apparently is less accurate.
    ## see: https://github.com/tensorflow/tensorflow/issues/14834#issuecomment-346618889
    # auc = tf.metrics.auc(y_true, y_pred)[1]
    # K.get_session().run(tf.local_variables_initializer())
    # return auc


# Define the model
# Note: If shape is changed to (None, f, 1) instead of (None, f) the bug seems to disappear
# Uncomment the Lambda and Flatten layers to introduce this change
inputs = Input(shape=[input_dim])
x = Dense(10, activation='relu')(inputs)
# x = Lambda(lambda x: K.expand_dims(x, -1))(x) #changing shape to (None, input_dim, 1)
x = BatchNormalization(axis=1)(x)
# x = Flatten()(x)
x = Dense(1, activation='sigmoid')(x)

model = Model(
    inputs=[inputs],
    outputs=[x],
    name='model')

# Note: the bug persists with SGD optimizer, as well as MSE loss. It disappears if 'auc' is removed from metrics.
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', auc])
model.summary()

callbacks = []
callbacks.append(roc_callback(training_data=(X_train, y_train), validation_data=(X_test, y_test)))

model.fit(x=X_train, y=y_train, batch_size = 1024, epochs = 50, validation_data = (X_test, y_test), callbacks=callbacks)
	from keras import Input, Model
	from keras.callbacks import Callback
	from keras.layers import Dense, BatchNormalization, Lambda, Flatten, Dropout
	import numpy as np
	from sklearn.metrics import roc_auc_score
	import tensorflow as tf
	from keras import backend as K

	# generate random test data
	m = 10000
	f = 600

	X_train = np.random.randn(m, f)
	X_test = np.random.randn(m, f)
	y_train = np.random.randint(0, 2, (m, 1))
	y_test = np.random.randint(0, 2, (m, 1))

	input_dim = X_train.shape[1]


	# from https://stackoverflow.com/a/46844409/3508719
	# roc_auc_score used here in a callback doesn't produce the bug - it only happens when used in custom metric
	class roc_callback(Callback):
	def __init__(self,training_data,validation_data):
	self.x = training_data[0]
	self.y = training_data[1]
	self.x_val = validation_data[0]
	self.y_val = validation_data[1]

	def on_epoch_end(self, epoch, logs={}):
	y_pred = self.model.predict(self.x)
	roc = roc_auc_score(self.y, y_pred)
	y_pred_val = self.model.predict(self.x_val)
	roc_val = roc_auc_score(self.y_val, y_pred_val)
	print('\rIn a callback: %s - roc-auc_val: %s' % (str(round(roc,4)),str(round(roc_val,4))),end=100*' '+'\n')
	return


	def auc(y_true, y_pred):
	## Using the sklearn.metrics.roc_auc_score produces the bug
	return tf.py_function(roc_auc_score, (y_true, y_pred), tf.double)


	## native Tensorflow auc metric - it doesn't produce the bug, yet apparently is less accurate.
	## see: https://github.com/tensorflow/tensorflow/issues/14834#issuecomment-346618889
	# auc = tf.metrics.auc(y_true, y_pred)[1]
	# K.get_session().run(tf.local_variables_initializer())
	# return auc


	# Define the model
	# Note: If shape is changed to (None, f, 1) instead of (None, f) the bug seems to disappear
	# Uncomment the Lambda and Flatten layers to introduce this change
	inputs = Input(shape=[input_dim])
	x = Dense(10, activation='relu')(inputs)
	# x = Lambda(lambda x: K.expand_dims(x, -1))(x) #changing shape to (None, input_dim, 1)
	x = BatchNormalization(axis=1)(x)
	# x = Flatten()(x)
	x = Dense(1, activation='sigmoid')(x)

	model = Model(
	inputs=[inputs],
	outputs=[x],
	name='model')

	# Note: the bug persists with SGD optimizer, as well as MSE loss. It disappears if 'auc' is removed from metrics.
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', auc])
	model.summary()

	callbacks = []
	callbacks.append(roc_callback(training_data=(X_train, y_train), validation_data=(X_test, y_test)))

	model.fit(x=X_train, y=y_train, batch_size = 1024, epochs = 50, validation_data = (X_test, y_test), callbacks=callbacks)