Skip to content

Instantly share code, notes, and snippets.

@benoitdescamps
Last active September 25, 2018 20:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benoitdescamps/a22b687edd1c45e6a37fd5bf08251034 to your computer and use it in GitHub Desktop.
Save benoitdescamps/a22b687edd1c45e6a37fd5bf08251034 to your computer and use it in GitHub Desktop.
class AddSign(optimizer.Optimizer):
"""Implementation of AddSign.
See [Bello et. al., 2017](https://arxiv.org/abs/1709.07417)
@@__init__
"""
def __init__(self, learning_rate=1.001,alpha=0.01,beta=0.5, use_locking=False, name="AddSign"):
super(AddSign, self).__init__(use_locking, name)
self._lr = learning_rate
self._alpha = alpha
self._beta = beta
# Tensor versions of the constructor arguments, created in _prepare().
self._lr_t = None
self._alpha_t = None
self._beta_t = None
def _prepare(self):
self._lr_t = ops.convert_to_tensor(self._lr, name="learning_rate")
self._alpha_t = ops.convert_to_tensor(self._beta, name="beta_t")
self._beta_t = ops.convert_to_tensor(self._beta, name="beta_t")
def _create_slots(self, var_list):
# Create slots for the first and second moments.
for v in var_list:
self._zeros_slot(v, "m", self._name)
def _apply_dense(self, grad, var):
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype)
alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype)
eps = 1e-7 #cap for moving average
m = self.get_slot(var, "m")
m_t = m.assign(tf.maximum(beta_t * m + eps, tf.abs(grad)))
var_update = state_ops.assign_sub(var, lr_t*grad*(1.0+alpha_t*tf.sign(grad)*tf.sign(m_t) ) )
#Create an op that groups multiple operations
#When this op finishes, all ops in input have finished
return control_flow_ops.group(*[var_update, m_t])
def _apply_sparse(self, grad, var):
raise NotImplementedError("Sparse gradient updates are not supported.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment