iantimmis/softmax_cross_entropy.py

## softmax_cross_entropy.py
import numpy as np

def naive_softmax(logits):
    '''
    Failure modes:
    * If any entry is very large, exp overflows
    * if all entries are very negative, all exps underflow
    '''
    exp_logits = np.exp(logits)
    return exp_logits / np.sum(exp_logits)

def stable_softmax(logits):
    '''
    Mathematically equivalent to softmax.
    '''
    max_val = np.max(logits)
    safe_exp_logits = np.exp(logits - max_val)
    return safe_exp_logits / (max_val * np.sum(safe_exp_logits))

def naive_softmax_with_cross_entropy(logits, t):
    '''
    Softmax plugged into categorical cross entropy
    '''
    probs = naive_softmax(logits)
    return -np.sum(t * probs)

def stable_softmax_with_cross_entropy(logits, t):
    '''
    Mathematically equivalent to softmax with cross entropy
    '''
    max_val = np.max(logits)
    safe_logits = logits - max_val
    safe_logsumexp = max_val + np.log(np.sum(np.exp(safe_logits)))
    return safe_logsumexp - np.sum(t * safe_logits
	import numpy as np

	def naive_softmax(logits):
	'''
	Failure modes:
	* If any entry is very large, exp overflows
	* if all entries are very negative, all exps underflow
	'''
	exp_logits = np.exp(logits)
	return exp_logits / np.sum(exp_logits)

	def stable_softmax(logits):
	'''
	Mathematically equivalent to softmax.
	'''
	max_val = np.max(logits)
	safe_exp_logits = np.exp(logits - max_val)
	return safe_exp_logits / (max_val * np.sum(safe_exp_logits))

	def naive_softmax_with_cross_entropy(logits, t):
	'''
	Softmax plugged into categorical cross entropy
	'''
	probs = naive_softmax(logits)
	return -np.sum(t * probs)

	def stable_softmax_with_cross_entropy(logits, t):
	'''
	Mathematically equivalent to softmax with cross entropy
	'''
	max_val = np.max(logits)
	safe_logits = logits - max_val
	safe_logsumexp = max_val + np.log(np.sum(np.exp(safe_logits)))
	return safe_logsumexp - np.sum(t * safe_logits