Skip to content

Instantly share code, notes, and snippets.

@steven-mi
Last active April 13, 2019 18:07
Show Gist options
  • Save steven-mi/545a35c9e47b73a1d5cf27e67f0cae03 to your computer and use it in GitHub Desktop.
Save steven-mi/545a35c9e47b73a1d5cf27e67f0cae03 to your computer and use it in GitHub Desktop.
def softmax(class_scores):
"""
Calculate class probability distribution for each digit from given class scores.
:param class_scores: class scores of your function
:return: probability distribution
"""
class_scores -= np.max(class_scores)
return np.exp(class_scores) / np.sum(np.exp(class_scores),axis=1, keepdims=True)
def onehot_encode_label(label):
"""
Support function to convert label vector into a one hot encoding matrix
:param label: array with shape (D,) , D can be whatever you want
:return: one hot encoding matrix
"""
onehot_encoder = OneHotEncoder(sparse=False)
label = label.reshape(len(label), 1)
onehot_encoded_label = onehot_encoder.fit_transform(label)
return onehot_encoded_label
def data_loss(class_probabilities, onehot_encode_label):
"""
Compute data_loss L_i for the correct class with a onehot encoded label
:param class_probabilities: probabilities from the softmax function
:param onehot_encode_label: correct labels in a one hot enconding shape
:return: the data loss L_i
"""
return onehot_encode_label * -np.log(class_probabilities)
def loss(X, y, theta, lam):
"""
:param X: data
:param y: label of the data
:param theta: learnable parameters
:param lam: regularization factor
:return: loss and gradient as a tupel
"""
encoded_labels = onehot_encode_label(y) # also needed for the gradient, therefore separated calculated
probabilities = softmax(class_scores(X,theta)) # also needed for the gradient, therefore separated calculated
loss_Li = data_loss(probabilities,encoded_labels)
m = X.shape[0] # number of training data for normalization
l2_regularization = (lam/2)*np.sum(theta*theta) # regularization loss
loss = np.sum(loss_Li)/m + l2_regularization
dl2 = lam*theta
dloss = np.dot(X.T, (probabilities - encoded_labels)/m)
gradient = dloss + dl2
return loss, gradient
def sgd(training_data, training_label, theta, lam=0.5, iterations=100, learning_rate=1e-5, batch_size=256):
losses = []
for i in range(iterations):
shuffle_index = np.random.permutation(training_data.shape[0])
data, label = training_data[shuffle_index], training_label[shuffle_index]
data, label = data[:batch_size], label[:batch_size]
l, grad = loss(data, label, theta, lam)
losses.append(l)
theta -= learning_rate*grad
return theta, losses
# Initialize learnable parameters theta
theta = np.zeros([X_train.shape[1],len(np.unique(y_train))])
# Start optimization with traning data, theta and optional hyperparameters
opt_model, loss_history = sgd(X_train,y_train,theta,iterations=250)
# evaluation
print('last iteration loss:',loss_history[-1])
print('first iteration loss:',loss_history[0])
print('Is the first loss equal to ln(10)?', np.log(10) - loss_history[0] < 0.000001) # if its false you have a implementation error
# plot a loss curve
plt.plot(loss_history)
plt.ylabel('loss')
plt.xlabel('iterations')
plt.show()
# plot weights
plt.figure(figsize=(20, 20))
num_classes = 10
for c in range(num_classes):
f = plt.subplot(10, num_classes, 1 * num_classes + c + 1)
f.axis('off')
plt.imshow(np.reshape(opt_model[:,c],[28,28]))
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment