Created
June 27, 2017 06:45
-
-
Save changkun/7f370d7668e62c139b00092e2b8708d4 to your computer and use it in GitHub Desktop.
批量梯度下降、随机梯度下降、小批量梯度下降法
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -- coding: utf-8 -- | |
import numpy as np | |
import matplotlib.pyplot as plt | |
# 生成随机数据集 | |
np.random.seed(42) | |
X = 2 * np.random.rand(100, 1) | |
y = 4 + 3 * X + np.random.randn(100, 1) | |
X_b = np.c_[np.ones((100, 1)), X] | |
X_new = np.array([[0], [2]]) | |
X_new_b = np.c_[np.ones((2, 1)), X_new] | |
plt.plot(X, y, 'b.') | |
# Batch Gradient Descent | |
eta = 0.1 | |
n_iterations = 1000 | |
m = 100 | |
theta = np.random.randn(2, 1) | |
for iteration in range(n_iterations): | |
gradients = 2.0 / m * X_b.T.dot(X_b.dot(theta) - y) | |
theta = theta - eta * gradients | |
y_predict = X_new_b.dot(theta) | |
plt.plot(X_new, y_predict, 'r-') | |
print(theta) | |
# Stochastic Gradient Descent | |
n_epochs = 50 | |
t0, t1 = 5, 50 | |
theta = np.random.randn(2, 1) | |
def learning_schedule(t): | |
return float(t0) / (t + t1) | |
for epoch in range(n_epochs): | |
for i in range(m): | |
random_index = np.random.randint(m) | |
xi = X_b[random_index:random_index + 1] | |
yi = y[random_index:random_index + 1] | |
gradients = 2.0 * xi.T.dot(xi.dot(theta) - yi) | |
eta = learning_schedule(epoch * m + i) | |
theta = theta - eta * gradients | |
y_predict = X_new_b.dot(theta) | |
plt.plot(X_new, y_predict, 'g:') | |
print(theta) | |
# Scikit-Learn SGD | |
from sklearn.linear_model import SGDRegressor | |
sgd_reg = SGDRegressor(n_iter=50, penalty=None, eta0=0.1) | |
sgd_reg.fit(X, y.ravel()) | |
print(np.c_[sgd_reg.intercept_, sgd_reg.coef_].T) | |
# Mini Batch Gradient Descent | |
n_iterations = 50 | |
minibatch_size = 20 | |
t0, t1 = 10, 1000 | |
theta = np.random.randn(2, 1) | |
t = 0 | |
for epoch in range(n_iterations): | |
shuffled_indices = np.random.permutation(m) | |
X_b_shuffled = X_b[shuffled_indices] | |
y_shuffled = y[shuffled_indices] | |
for i in range(0, m, minibatch_size): | |
t += 1 | |
xi = X_b_shuffled[i:i + minibatch_size] | |
yi = y_shuffled[i:i + minibatch_size] | |
gradients = 2 * xi.T.dot(xi.dot(theta) - yi) | |
eta = learning_schedule(t) | |
theta = theta - eta * gradients | |
y_predict = X_new_b.dot(theta) | |
plt.plot(X_new, y_predict, 'y--') | |
print(theta) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment