Created
November 15, 2021 17:04
-
-
Save janaSunrise/a7662d39929f7fed7644f0e0f7e8ad28 to your computer and use it in GitHub Desktop.
Gradient centralization in Tensorflow to easily boost the stats of Deep neural networks. It helps to achieve better accuracy and lower loss. This operates directly on the gradients to centralize the vectors to have zero mean.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Imports | |
import tensorflow as tf | |
from keras import backend as K | |
# Define function for centralizing the gradients | |
def centralize_gradients(optimizer, loss, params): | |
grads = [] # List to store the gradients | |
for grad in K.gradients(loss, params): # Iterate over gradients using the Keras Backend | |
grad_len = len(grad.shape) # Get the size of the gradient | |
if grad_len > 1: | |
axis = list(range(grad_len - 1)) | |
grad -= tf.reduce_mean(grad, axis=axis, keep_dims=True) # Clip off the mean | |
grads.append(grad) | |
if None in grads: | |
raise ValueError(""" | |
An operation has `None` for gradient. Please make sure that all of your ops have a | |
gradient defined (That is, are differentiable). Common operations without gradients | |
are: `K.argmax`, `K.round` and `K.eval`. | |
""") | |
# Clip gradients | |
if hasattr(optimizer, "clipnorm") and optimizer.clipnorm > 0: | |
# Get normalized gradients | |
norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) | |
# Clip normalized gradients | |
grads = [ | |
tf.keras.optimizers.clip_norm(g, optimizer.clipnorm, norm) for g in grads | |
] | |
# Do the final clip and update the list | |
if hasattr(optimizer, "clipvalue") and optimizer.clipvalue > 0: | |
grads = [K.clip(g, -optimizer.clipvalue, optimizer.clipvalue) for g in grads] | |
return grads | |
# Function to apply gradient centralization to optimizers. | |
def apply_gradient_centralization(optimizer): | |
""" | |
adam = tf.keras.optimizers.Adam(...) | |
adam.get_gradients = apply_gradient_centralization(adam) | |
""" | |
def get_centralized_gradients(loss, params): | |
return centralize_gradients(optimizer, loss, params) | |
return get_centralized_gradients |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Imports | |
from tensorflow.keras import layers, optimizers, Sequential | |
from gradient_centralization import apply_gradient_centralization | |
# Build a basic model, This one is for MNIST. | |
model = Sequential([ | |
layers.Flatten(input_shape=(28, 28)), | |
layers.Dense(256, activation="relu"), | |
layers.Dense(128, activation="relu"), | |
layers.Dropout(0.2), | |
layers.Dense(10, activation="softmax"), | |
]) | |
# Create the custom optimizer | |
optimizer = optimizers.Adam() | |
optimizer.get_gradients = apply_gradient_centralization(optimizer) | |
# Finally compile, and do the steps you need to. | |
model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment