Skip to content

Instantly share code, notes, and snippets.

# changkun/gradient-descent.py

Created June 27, 2017 06:45
Show Gist options
• Save changkun/7f370d7668e62c139b00092e2b8708d4 to your computer and use it in GitHub Desktop.

This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 # -- coding: utf-8 -- import numpy as np import matplotlib.pyplot as plt # 生成随机数据集 np.random.seed(42) X = 2 * np.random.rand(100, 1) y = 4 + 3 * X + np.random.randn(100, 1) X_b = np.c_[np.ones((100, 1)), X] X_new = np.array([[0], [2]]) X_new_b = np.c_[np.ones((2, 1)), X_new] plt.plot(X, y, 'b.') # Batch Gradient Descent eta = 0.1 n_iterations = 1000 m = 100 theta = np.random.randn(2, 1) for iteration in range(n_iterations): gradients = 2.0 / m * X_b.T.dot(X_b.dot(theta) - y) theta = theta - eta * gradients y_predict = X_new_b.dot(theta) plt.plot(X_new, y_predict, 'r-') print(theta) # Stochastic Gradient Descent n_epochs = 50 t0, t1 = 5, 50 theta = np.random.randn(2, 1) def learning_schedule(t): return float(t0) / (t + t1) for epoch in range(n_epochs): for i in range(m): random_index = np.random.randint(m) xi = X_b[random_index:random_index + 1] yi = y[random_index:random_index + 1] gradients = 2.0 * xi.T.dot(xi.dot(theta) - yi) eta = learning_schedule(epoch * m + i) theta = theta - eta * gradients y_predict = X_new_b.dot(theta) plt.plot(X_new, y_predict, 'g:') print(theta) # Scikit-Learn SGD from sklearn.linear_model import SGDRegressor sgd_reg = SGDRegressor(n_iter=50, penalty=None, eta0=0.1) sgd_reg.fit(X, y.ravel()) print(np.c_[sgd_reg.intercept_, sgd_reg.coef_].T) # Mini Batch Gradient Descent n_iterations = 50 minibatch_size = 20 t0, t1 = 10, 1000 theta = np.random.randn(2, 1) t = 0 for epoch in range(n_iterations): shuffled_indices = np.random.permutation(m) X_b_shuffled = X_b[shuffled_indices] y_shuffled = y[shuffled_indices] for i in range(0, m, minibatch_size): t += 1 xi = X_b_shuffled[i:i + minibatch_size] yi = y_shuffled[i:i + minibatch_size] gradients = 2 * xi.T.dot(xi.dot(theta) - yi) eta = learning_schedule(t) theta = theta - eta * gradients y_predict = X_new_b.dot(theta) plt.plot(X_new, y_predict, 'y--') print(theta) plt.show()
to join this conversation on GitHub. Already have an account? Sign in to comment