Last active
April 13, 2019 18:07
-
-
Save steven-mi/545a35c9e47b73a1d5cf27e67f0cae03 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def softmax(class_scores): | |
""" | |
Calculate class probability distribution for each digit from given class scores. | |
:param class_scores: class scores of your function | |
:return: probability distribution | |
""" | |
class_scores -= np.max(class_scores) | |
return np.exp(class_scores) / np.sum(np.exp(class_scores),axis=1, keepdims=True) | |
def onehot_encode_label(label): | |
""" | |
Support function to convert label vector into a one hot encoding matrix | |
:param label: array with shape (D,) , D can be whatever you want | |
:return: one hot encoding matrix | |
""" | |
onehot_encoder = OneHotEncoder(sparse=False) | |
label = label.reshape(len(label), 1) | |
onehot_encoded_label = onehot_encoder.fit_transform(label) | |
return onehot_encoded_label | |
def data_loss(class_probabilities, onehot_encode_label): | |
""" | |
Compute data_loss L_i for the correct class with a onehot encoded label | |
:param class_probabilities: probabilities from the softmax function | |
:param onehot_encode_label: correct labels in a one hot enconding shape | |
:return: the data loss L_i | |
""" | |
return onehot_encode_label * -np.log(class_probabilities) | |
def loss(X, y, theta, lam): | |
""" | |
:param X: data | |
:param y: label of the data | |
:param theta: learnable parameters | |
:param lam: regularization factor | |
:return: loss and gradient as a tupel | |
""" | |
encoded_labels = onehot_encode_label(y) # also needed for the gradient, therefore separated calculated | |
probabilities = softmax(class_scores(X,theta)) # also needed for the gradient, therefore separated calculated | |
loss_Li = data_loss(probabilities,encoded_labels) | |
m = X.shape[0] # number of training data for normalization | |
l2_regularization = (lam/2)*np.sum(theta*theta) # regularization loss | |
loss = np.sum(loss_Li)/m + l2_regularization | |
dl2 = lam*theta | |
dloss = np.dot(X.T, (probabilities - encoded_labels)/m) | |
gradient = dloss + dl2 | |
return loss, gradient | |
def sgd(training_data, training_label, theta, lam=0.5, iterations=100, learning_rate=1e-5, batch_size=256): | |
losses = [] | |
for i in range(iterations): | |
shuffle_index = np.random.permutation(training_data.shape[0]) | |
data, label = training_data[shuffle_index], training_label[shuffle_index] | |
data, label = data[:batch_size], label[:batch_size] | |
l, grad = loss(data, label, theta, lam) | |
losses.append(l) | |
theta -= learning_rate*grad | |
return theta, losses | |
# Initialize learnable parameters theta | |
theta = np.zeros([X_train.shape[1],len(np.unique(y_train))]) | |
# Start optimization with traning data, theta and optional hyperparameters | |
opt_model, loss_history = sgd(X_train,y_train,theta,iterations=250) | |
# evaluation | |
print('last iteration loss:',loss_history[-1]) | |
print('first iteration loss:',loss_history[0]) | |
print('Is the first loss equal to ln(10)?', np.log(10) - loss_history[0] < 0.000001) # if its false you have a implementation error | |
# plot a loss curve | |
plt.plot(loss_history) | |
plt.ylabel('loss') | |
plt.xlabel('iterations') | |
plt.show() | |
# plot weights | |
plt.figure(figsize=(20, 20)) | |
num_classes = 10 | |
for c in range(num_classes): | |
f = plt.subplot(10, num_classes, 1 * num_classes + c + 1) | |
f.axis('off') | |
plt.imshow(np.reshape(opt_model[:,c],[28,28])) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment