Created
January 19, 2018 21:47
-
-
Save ruggeri/2ae4464797752e32f7e9170fb500b6ee to your computer and use it in GitHub Desktop.
Lecture #2: Logistic Regression Demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.datasets import imdb | |
import numpy as np | |
TOP_N_WORDS = 1_000 | |
(x_train, y_train), (x_test, y_test) = imdb.load_data( | |
num_words = TOP_N_WORDS | |
) | |
# Transform dataset from variable-length word sequences to a | |
# binary valued dense matrix. | |
new_x_train = np.zeros((len(x_train), TOP_N_WORDS + 1)) | |
for example_idx, word_sequence in enumerate(x_train): | |
for word_idx in word_sequence: | |
new_x_train[example_idx, word_idx] = 1 | |
# We'll use a dummy column 0 to apply an intercept theta_0 to our model. It will always have value 1. | |
new_x_train[:, 0] = 1.0 | |
x_train = new_x_train | |
new_x_test = np.zeros((len(x_test), TOP_N_WORDS + 1)) | |
for example_idx, word_sequence in enumerate(x_test): | |
for word_idx in word_sequence: | |
new_x_test[example_idx, word_idx] = 1 | |
new_x_test[:, 0] = 1.0 | |
x_test = new_x_test | |
def sigma(z): | |
return 1 / (1 + np.exp(-z)) | |
def ce_error(probs, correct_ys): | |
loss_on_positives = -np.sum( | |
correct_ys * np.log(probs) | |
) | |
loss_on_negatives = -np.sum( | |
(1 - correct_ys) * np.log(1 - probs) | |
) | |
return (loss_on_positives + loss_on_negatives) / len(probs) | |
def deriv_ce_error_wrt_theta_i(correct_ys, x_is, probs): | |
return np.sum( | |
(probs - correct_ys) * x_is | |
) | |
def gradient(correct_ys, x_values, thetas): | |
probs = probabilities(x_values, thetas) | |
gradient = np.zeros(len(thetas)) | |
for i in range(0, len(thetas)): | |
x_is = x_values[:, i] | |
gradient[i] = ( | |
deriv_ce_error_wrt_theta_i(correct_ys, x_is, probs) | |
) | |
return gradient | |
def probabilities(x_values, thetas): | |
return sigma(x_values.dot(thetas)) | |
def accuracy(probs, correct_ys): | |
num_correct = len(probs) - np.sum( | |
np.abs(correct_ys - (probs > 0.5)) | |
) | |
return num_correct / len(probs) | |
NUM_EPOCHS = 5 | |
NUM_EXAMPLES = x_train.shape[0] | |
BATCH_SIZE = 32 | |
LEARNING_RATE = 0.01 | |
thetas = np.zeros(x_train.shape[1]) | |
# Initial performance. | |
probs = probabilities(x_train, thetas) | |
ce = ce_error(probs, y_train) | |
acc = accuracy(probs, y_train) | |
print(f'Epoch: {0} | CE: {ce:0.2f} | Acc: {acc:0.2f}') | |
print('beginning training') | |
for epoch_idx in range(1, NUM_EPOCHS + 1): | |
for batch_start_idx in range(0, NUM_EXAMPLES, BATCH_SIZE): | |
batch_end_idx = np.min([ | |
batch_start_idx + BATCH_SIZE, NUM_EXAMPLES - 1 | |
]) | |
x_batch = x_train[batch_start_idx:batch_end_idx, :] | |
y_batch = y_train[batch_start_idx:batch_end_idx] | |
thetas -= LEARNING_RATE * gradient(y_batch, x_batch, thetas) | |
probs = probabilities(x_train, thetas) | |
ce = ce_error(probs, y_train) | |
acc = accuracy(probs, y_train) | |
test_probs = probabilities(x_test, thetas) | |
test_acc = accuracy(test_probs, y_test) | |
print( | |
f'Epoch: {epoch_idx} | ' | |
f'CE: {ce:0.2f} | ' | |
f'Acc: {acc:0.2f} | ' | |
f'Test Acc: {test_acc:0.2f}' | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment