Skip to content

Instantly share code, notes, and snippets.

View bushaev's full-sized avatar

Vitaly Bushaev bushaev

  • ITMO University
  • Saint Petersburg
View GitHub Profile
for _ in iterations:
hello_world = "Hello World"
for _ in iterations:
hello_world = "Hello World"
for _ in iterations:
hello_world = "Hello World"
@bushaev
bushaev / rprop.py
Created September 1, 2018 17:50
Siplest form of rprop algorithms
for t in range(num_interations):
dw[t] = compute_gradient(x, y)
if dw[t] * dw[t - 1] > 0:
step_size = min(step_size * incFactor, step_size_max)
elif dw[t] * dw[t - 1] < 0:
step_size = max(step_size * decFactor, step_size_min)
w[t] = w[t - 1] - sign(dw[t]) * step_size
for t in range(num_interations):
dw[t] = compute_gradient(x, y)
if dw[t] * dw[t - 1] > 0:
step_size = min(step_size * incFactor, step_size_max)
elif dw[t] * dw[t - 1] < 0:
step_size = max(step_size * decFactor, step_size_min)
w[t] = w[t - 1] - sign(dw[t]) * step_size
@bushaev
bushaev / rprop.py
Created September 1, 2018 17:50
simplest form of rprop update rule
for t in range(num_interations):
dw[t] = compute_gradient(x, y)
if dw[t] * dw[t - 1] > 0:
step_size = min(step_size * incFactor, step_size_max)
elif dw[t] * dw[t - 1] < 0:
step_size = max(step_size * decFactor, step_size_min)
w[t] = w[t - 1] - sign(dw[t]) * step_size
drad_squared = 0
for _ in num_iterations:
dw = compute_gradients(x, y)
grad_squared = 0.9 * grads_squared + 0.1 * dx * dx
w = w - (lr / np.sqrt(grad_squared)) * dw
grads_squared = 0
for _ in num_iterations:
dw = compute_gradient(x, y_
grad_squared += dw * dw
w = w - (lr / np.sqrt(grad_squared)) * dw
grads_squared = 0
for _ in num_iterations:
dw = compute_gradient(x, y)
grad_squared += dw * dw
w = w - (lr / np.sqrt(grad_squared)) * dw
@bushaev
bushaev / Adam.py
Last active October 22, 2018 12:49
for t in range(num_iterations):
g = compute_gradient(x, y)
m = beta_1 * m + (1 - beta_1) * g
v = beta_2 * v + (1 - beta_2) * np.power(g, 2)
m_hat = m / (1 - np.power(beta_1, t))
v_hat = v / (1 - np.power(beta_2, t))
w = w - step_size * m_hat / (np.sqrt(v_hat) + epsilon)