Skip to content

Instantly share code, notes, and snippets.

@ilyarudyak
Created September 9, 2021 18:22
Show Gist options
  • Save ilyarudyak/af3fd05606a3329b89a710cb3de6641a to your computer and use it in GitHub Desktop.
Save ilyarudyak/af3fd05606a3329b89a710cb3de6641a to your computer and use it in GitHub Desktop.
Gist for medium post "Just another post about grad descend"
from builtins import range
import numpy as np
from random import shuffle
# from past.builtins import xrange
def softmax_loss_naive(W, X, y, reg=0.0):
"""
Softmax loss function, naive implementation (with loops)
Inputs have dimension D, there are C classes, and we operate on minibatches
of N examples.
Inputs:
- W: A numpy array of shape (D, C) containing weights.
- X: A numpy array of shape (N, D) containing a minibatch of data.
- y: A numpy array of shape (N,) containing training labels; y[i] = c means
that X[i] has label c, where 0 <= c < C.
- reg: (float) regularization strength
Returns a tuple of:
- loss as single float
- gradient with respect to weights W; an array of same shape as W
"""
# Initialize the loss and gradient to zero.
loss = 0.0
dW = np.zeros_like(W)
#############################################################################
# TODO: Compute the softmax loss and its gradient using explicit loops. #
# Store the loss in loss and the gradient in dW. If you are not careful #
# here, it is easy to run into numeric instability. Don't forget the #
# regularization! #
#############################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
D, C = W.shape
N, D = X.shape
F = X @ W ## N x C
S = np.exp(F) / np.sum(np.exp(F), axis=1, keepdims=True)
for i in range(N):
s = S[i, :]
loss -= np.log(s[y[i]])
loss /= N
dS = np.copy(S)
for i in range(N):
dS[i, y[i]] -= 1
dW = X.T @ dS
dW /= N
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
return loss, dW
def softmax_loss_vectorized(W, X, y, reg=0.0):
"""
Softmax loss function, vectorized version.
Inputs and outputs are t he same as softmax_loss_naive.
"""
# Initialize the loss and gradient to zero.
loss = 0.0
dW = np.zeros_like(W)
#############################################################################
# TODO: Compute the softmax loss and its gradient using no explicit loops. #
# Store the loss in loss and the gradient in dW. If you are not careful #
# here, it is easy to run into numeric instability. Don't forget the #
# regularization! #
#############################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
D, C = W.shape
N, D = X.shape
F = X @ W
F -= np.max(F, axis=1, keepdims=True)
S = np.exp(F) / np.sum(np.exp(F), axis=1, keepdims=True)
loss -= np.mean(np.log(S[np.arange(N), y]))
dS = np.copy(S)
dS[np.arange(N), y] -= 1
dW = X.T @ dS
dW /= N
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
return loss, dW
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment