tuelwer/pytorch-lbfgs-example.py

## pytorch-lbfgs-example.py
import torch
import torch.optim as optim
import matplotlib.pyplot as plt


# 2d Rosenbrock function
def f(x):
    return (1 - x[0])**2 + 100 * (x[1] - x[0]**2)**2


# Gradient descent
x_gd = 10*torch.ones(2, 1)
x_gd.requires_grad = True
gd = optim.SGD([x_gd], lr=1e-5)
history_gd = []
for i in range(100):
    gd.zero_grad()
    objective = f(x_gd)
    objective.backward()
    gd.step()
    history_gd.append(objective.item())


# L-BFGS
def closure():
    lbfgs.zero_grad()
    objective = f(x_lbfgs)
    objective.backward()
    return objective

x_lbfgs = 10*torch.ones(2, 1)
x_lbfgs.requires_grad = True

lbfgs = optim.LBFGS([x_lbfgs],
                    history_size=10,
                    max_iter=4,
                    line_search_fn="strong_wolfe")

history_lbfgs = []
for i in range(100):
    history_lbfgs.append(f(x_lbfgs).item())
    lbfgs.step(closure)


# Plotting
plt.semilogy(history_gd, label='GD')
plt.semilogy(history_lbfgs, label='L-BFGS')
plt.legend()
plt.show()
	import torch
	import torch.optim as optim
	import matplotlib.pyplot as plt


	# 2d Rosenbrock function
	def f(x):
	return (1 - x[0])*2 + 100 (x[1] - x[0]2)2


	# Gradient descent
	x_gd = 10*torch.ones(2, 1)
	x_gd.requires_grad = True
	gd = optim.SGD([x_gd], lr=1e-5)
	history_gd = []
	for i in range(100):
	gd.zero_grad()
	objective = f(x_gd)
	objective.backward()
	gd.step()
	history_gd.append(objective.item())


	# L-BFGS
	def closure():
	lbfgs.zero_grad()
	objective = f(x_lbfgs)
	objective.backward()
	return objective

	x_lbfgs = 10*torch.ones(2, 1)
	x_lbfgs.requires_grad = True

	lbfgs = optim.LBFGS([x_lbfgs],
	history_size=10,
	max_iter=4,
	line_search_fn="strong_wolfe")

	history_lbfgs = []
	for i in range(100):
	history_lbfgs.append(f(x_lbfgs).item())
	lbfgs.step(closure)


	# Plotting
	plt.semilogy(history_gd, label='GD')
	plt.semilogy(history_lbfgs, label='L-BFGS')
	plt.legend()
	plt.show()