ilyarudyak/softmax.py

## softmax.py
from builtins import range
import numpy as np
from random import shuffle
# from past.builtins import xrange


def softmax_loss_naive(W, X, y, reg=0.0):
    """
    Softmax loss function, naive implementation (with loops)

    Inputs have dimension D, there are C classes, and we operate on minibatches
    of N examples.

    Inputs:
    - W: A numpy array of shape (D, C) containing weights.
    - X: A numpy array of shape (N, D) containing a minibatch of data.
    - y: A numpy array of shape (N,) containing training labels; y[i] = c means
      that X[i] has label c, where 0 <= c < C.
    - reg: (float) regularization strength

    Returns a tuple of:
    - loss as single float
    - gradient with respect to weights W; an array of same shape as W
    """
    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)

    #############################################################################
    # TODO: Compute the softmax loss and its gradient using explicit loops.     #
    # Store the loss in loss and the gradient in dW. If you are not careful     #
    # here, it is easy to run into numeric instability. Don't forget the        #
    # regularization!                                                           #
    #############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    D, C = W.shape
    N, D = X.shape
    F = X @ W ## N x C
    S = np.exp(F) / np.sum(np.exp(F), axis=1, keepdims=True)
    for i in range(N):
        s = S[i, :]
        loss -= np.log(s[y[i]])
    loss /= N

    dS = np.copy(S)
    for i in range(N):
        dS[i, y[i]] -= 1
    dW = X.T @ dS
    dW /= N
    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    return loss, dW


def softmax_loss_vectorized(W, X, y, reg=0.0):
    """
    Softmax loss function, vectorized version.

    Inputs and outputs are t he same as softmax_loss_naive.
    """
    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)

    #############################################################################
    # TODO: Compute the softmax loss and its gradient using no explicit loops.  #
    # Store the loss in loss and the gradient in dW. If you are not careful     #
    # here, it is easy to run into numeric instability. Don't forget the        #
    # regularization!                                                           #
    #############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    D, C = W.shape
    N, D = X.shape

    F = X @ W
    F -= np.max(F, axis=1, keepdims=True)
    S = np.exp(F) / np.sum(np.exp(F), axis=1, keepdims=True)
    loss -= np.mean(np.log(S[np.arange(N), y]))

    dS = np.copy(S)
    dS[np.arange(N), y] -= 1
    dW = X.T @ dS
    dW /= N

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    return loss, dW

## softmax_debugging_v1.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              softmax_debugging_v1.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	from builtins import range
	import numpy as np
	from random import shuffle
	# from past.builtins import xrange


	def softmax_loss_naive(W, X, y, reg=0.0):
	"""
	Softmax loss function, naive implementation (with loops)

	Inputs have dimension D, there are C classes, and we operate on minibatches
	of N examples.

	Inputs:
	- W: A numpy array of shape (D, C) containing weights.
	- X: A numpy array of shape (N, D) containing a minibatch of data.
	- y: A numpy array of shape (N,) containing training labels; y[i] = c means
	that X[i] has label c, where 0 <= c < C.
	- reg: (float) regularization strength

	Returns a tuple of:
	- loss as single float
	- gradient with respect to weights W; an array of same shape as W
	"""
	# Initialize the loss and gradient to zero.
	loss = 0.0
	dW = np.zeros_like(W)

	#############################################################################
	# TODO: Compute the softmax loss and its gradient using explicit loops. #
	# Store the loss in loss and the gradient in dW. If you are not careful #
	# here, it is easy to run into numeric instability. Don't forget the #
	# regularization! #
	#############################################################################
	# ***START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***
	D, C = W.shape
	N, D = X.shape
	F = X @ W ## N x C
	S = np.exp(F) / np.sum(np.exp(F), axis=1, keepdims=True)
	for i in range(N):
	s = S[i, :]
	loss -= np.log(s[y[i]])
	loss /= N

	dS = np.copy(S)
	for i in range(N):
	dS[i, y[i]] -= 1
	dW = X.T @ dS
	dW /= N
	# ***END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***

	return loss, dW


	def softmax_loss_vectorized(W, X, y, reg=0.0):
	"""
	Softmax loss function, vectorized version.

	Inputs and outputs are t he same as softmax_loss_naive.
	"""
	# Initialize the loss and gradient to zero.
	loss = 0.0
	dW = np.zeros_like(W)

	#############################################################################
	# TODO: Compute the softmax loss and its gradient using no explicit loops. #
	# Store the loss in loss and the gradient in dW. If you are not careful #
	# here, it is easy to run into numeric instability. Don't forget the #
	# regularization! #
	#############################################################################
	# ***START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***

	D, C = W.shape
	N, D = X.shape

	F = X @ W
	F -= np.max(F, axis=1, keepdims=True)
	S = np.exp(F) / np.sum(np.exp(F), axis=1, keepdims=True)
	loss -= np.mean(np.log(S[np.arange(N), y]))

	dS = np.copy(S)
	dS[np.arange(N), y] -= 1
	dW = X.T @ dS
	dW /= N

	# ***END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***

	return loss, dW