xylcbd/dl_optimizer.py

## dl_optimizer.py
#coding:utf-8
import numpy as np
import matplotlib.pyplot as plt
import math

def f(x):
    return x[0] * x[0] + 50 * x[1] * x[1]

def g(x):
    return np.array([2 * x[0], 100 * x[1]])

def contour(X,Y,Z,arr,name):
    plt.figure(figsize=(15,7))
    xx = X.flatten()
    yy = Y.flatten()
    zz = Z.flatten()
    plt.contour(X, Y, Z, colors='black')
    plt.plot(0,0,marker='*')
    if arr is not None:
        arr = np.array(arr)
        for i in range(len(arr) - 1):
            plt.plot(arr[i:i+2,0],arr[i:i+2,1])
    plt.title(name)
    plt.show()

def sgd(x_start, step, g):
    x = np.array(x_start, dtype='float64')
    passing_dot = [x.copy()]
    for i in range(50):
        grad = g(x)
        x -= grad * step

        passing_dot.append(x.copy())
        print '[ Epoch {0} ] grad = {1}, x = {2}'.format(i, grad, x)
        if abs(sum(grad)) < 1e-6:
            break;
    return x, passing_dot

def momentum(x_start, u, step, g):
    x = np.array(x_start, dtype='float64')
    passing_dot = [x.copy()]
    prev_mom = np.zeros_like(x)
    for i in range(50):
        grad = g(x)
        cur_mom = u*prev_mom + grad
        x -= step * cur_mom
        prev_mom = cur_mom

        passing_dot.append(x.copy())
        print '[ Epoch {0} ] grad = {1}, x = {2}'.format(i, grad, x)
        if abs(sum(grad)) < 1e-6:
            break;
    return x, passing_dot

def nesterov(x_start, u, step, g):
    x = np.array(x_start, dtype='float64')
    passing_dot = [x.copy()]
    prev_mom = np.zeros_like(x)
    for i in range(50):
        tmp = x - step*u*prev_mom
        grad = g(tmp)
        cur_mom = u*prev_mom + grad
        x -= step * cur_mom
        prev_mom = cur_mom

        passing_dot.append(x.copy())
        print '[ Epoch {0} ] grad = {1}, x = {2}'.format(i, grad, x)
        if abs(sum(grad)) < 1e-6:
            break;
    return x, passing_dot

def adagrad(x_start, u, step, g):
    x = np.array(x_start, dtype='float64')
    passing_dot = [x.copy()]
    prev_mom = np.zeros_like(x)
    for i in range(50):
        grad = g(x)
        cur_mom = prev_mom + np.square(grad)
        x -= step * grad / np.sqrt(cur_mom+u)
        prev_mom = cur_mom

        passing_dot.append(x.copy())
        print '[ Epoch {0} ] grad = {1}, x = {2}'.format(i, grad, x)
        if abs(sum(grad)) < 1e-6:
            break;
    return x, passing_dot

def adadelta(x_start, u, step, g):
    x = np.array(x_start, dtype='float64')
    passing_dot = [x.copy()]
    prev_mom = np.zeros_like(x)
    for i in range(50):
        grad = g(x)
        cur_mom = prev_mom + np.square(grad)
        x -= step * grad / np.sqrt(cur_mom+u)
        prev_mom = cur_mom

        passing_dot.append(x.copy())
        print '[ Epoch {0} ] grad = {1}, x = {2}'.format(i, grad, x)
        if abs(sum(grad)) < 1e-6:
            break;
    return x, passing_dot

xi = np.linspace(-200,200,1000)
yi = np.linspace(-100,100,1000)
X,Y = np.meshgrid(xi, yi)
Z = X * X + 50 * Y * Y

res, x_arr = adagrad([150,75], 0.7, 60, g)
contour(X,Y,Z, x_arr,'adagrad')
	#coding:utf-8
	import numpy as np
	import matplotlib.pyplot as plt
	import math

	def f(x):
	return x[0] * x[0] + 50 * x[1] * x[1]

	def g(x):
	return np.array([2 * x[0], 100 * x[1]])

	def contour(X,Y,Z,arr,name):
	plt.figure(figsize=(15,7))
	xx = X.flatten()
	yy = Y.flatten()
	zz = Z.flatten()
	plt.contour(X, Y, Z, colors='black')
	plt.plot(0,0,marker='*')
	if arr is not None:
	arr = np.array(arr)
	for i in range(len(arr) - 1):
	plt.plot(arr[i:i+2,0],arr[i:i+2,1])
	plt.title(name)
	plt.show()

	def sgd(x_start, step, g):
	x = np.array(x_start, dtype='float64')
	passing_dot = [x.copy()]
	for i in range(50):
	grad = g(x)
	x -= grad * step

	passing_dot.append(x.copy())
	print '[ Epoch {0} ] grad = {1}, x = {2}'.format(i, grad, x)
	if abs(sum(grad)) < 1e-6:
	break;
	return x, passing_dot

	def momentum(x_start, u, step, g):
	x = np.array(x_start, dtype='float64')
	passing_dot = [x.copy()]
	prev_mom = np.zeros_like(x)
	for i in range(50):
	grad = g(x)
	cur_mom = u*prev_mom + grad
	x -= step * cur_mom
	prev_mom = cur_mom

	passing_dot.append(x.copy())
	print '[ Epoch {0} ] grad = {1}, x = {2}'.format(i, grad, x)
	if abs(sum(grad)) < 1e-6:
	break;
	return x, passing_dot

	def nesterov(x_start, u, step, g):
	x = np.array(x_start, dtype='float64')
	passing_dot = [x.copy()]
	prev_mom = np.zeros_like(x)
	for i in range(50):
	tmp = x - stepuprev_mom
	grad = g(tmp)
	cur_mom = u*prev_mom + grad
	x -= step * cur_mom
	prev_mom = cur_mom

	passing_dot.append(x.copy())
	print '[ Epoch {0} ] grad = {1}, x = {2}'.format(i, grad, x)
	if abs(sum(grad)) < 1e-6:
	break;
	return x, passing_dot

	def adagrad(x_start, u, step, g):
	x = np.array(x_start, dtype='float64')
	passing_dot = [x.copy()]
	prev_mom = np.zeros_like(x)
	for i in range(50):
	grad = g(x)
	cur_mom = prev_mom + np.square(grad)
	x -= step * grad / np.sqrt(cur_mom+u)
	prev_mom = cur_mom

	passing_dot.append(x.copy())
	print '[ Epoch {0} ] grad = {1}, x = {2}'.format(i, grad, x)
	if abs(sum(grad)) < 1e-6:
	break;
	return x, passing_dot

	def adadelta(x_start, u, step, g):
	x = np.array(x_start, dtype='float64')
	passing_dot = [x.copy()]
	prev_mom = np.zeros_like(x)
	for i in range(50):
	grad = g(x)
	cur_mom = prev_mom + np.square(grad)
	x -= step * grad / np.sqrt(cur_mom+u)
	prev_mom = cur_mom

	passing_dot.append(x.copy())
	print '[ Epoch {0} ] grad = {1}, x = {2}'.format(i, grad, x)
	if abs(sum(grad)) < 1e-6:
	break;
	return x, passing_dot

	xi = np.linspace(-200,200,1000)
	yi = np.linspace(-100,100,1000)
	X,Y = np.meshgrid(xi, yi)
	Z = X * X + 50 * Y * Y

	res, x_arr = adagrad([150,75], 0.7, 60, g)
	contour(X,Y,Z, x_arr,'adagrad')