Skip to content

Instantly share code, notes, and snippets.

@ruggeri
Created January 19, 2018 21:47
Show Gist options
  • Save ruggeri/2ae4464797752e32f7e9170fb500b6ee to your computer and use it in GitHub Desktop.
Save ruggeri/2ae4464797752e32f7e9170fb500b6ee to your computer and use it in GitHub Desktop.
Lecture #2: Logistic Regression Demo
from keras.datasets import imdb
import numpy as np
TOP_N_WORDS = 1_000
(x_train, y_train), (x_test, y_test) = imdb.load_data(
num_words = TOP_N_WORDS
)
# Transform dataset from variable-length word sequences to a
# binary valued dense matrix.
new_x_train = np.zeros((len(x_train), TOP_N_WORDS + 1))
for example_idx, word_sequence in enumerate(x_train):
for word_idx in word_sequence:
new_x_train[example_idx, word_idx] = 1
# We'll use a dummy column 0 to apply an intercept theta_0 to our model. It will always have value 1.
new_x_train[:, 0] = 1.0
x_train = new_x_train
new_x_test = np.zeros((len(x_test), TOP_N_WORDS + 1))
for example_idx, word_sequence in enumerate(x_test):
for word_idx in word_sequence:
new_x_test[example_idx, word_idx] = 1
new_x_test[:, 0] = 1.0
x_test = new_x_test
def sigma(z):
return 1 / (1 + np.exp(-z))
def ce_error(probs, correct_ys):
loss_on_positives = -np.sum(
correct_ys * np.log(probs)
)
loss_on_negatives = -np.sum(
(1 - correct_ys) * np.log(1 - probs)
)
return (loss_on_positives + loss_on_negatives) / len(probs)
def deriv_ce_error_wrt_theta_i(correct_ys, x_is, probs):
return np.sum(
(probs - correct_ys) * x_is
)
def gradient(correct_ys, x_values, thetas):
probs = probabilities(x_values, thetas)
gradient = np.zeros(len(thetas))
for i in range(0, len(thetas)):
x_is = x_values[:, i]
gradient[i] = (
deriv_ce_error_wrt_theta_i(correct_ys, x_is, probs)
)
return gradient
def probabilities(x_values, thetas):
return sigma(x_values.dot(thetas))
def accuracy(probs, correct_ys):
num_correct = len(probs) - np.sum(
np.abs(correct_ys - (probs > 0.5))
)
return num_correct / len(probs)
NUM_EPOCHS = 5
NUM_EXAMPLES = x_train.shape[0]
BATCH_SIZE = 32
LEARNING_RATE = 0.01
thetas = np.zeros(x_train.shape[1])
# Initial performance.
probs = probabilities(x_train, thetas)
ce = ce_error(probs, y_train)
acc = accuracy(probs, y_train)
print(f'Epoch: {0} | CE: {ce:0.2f} | Acc: {acc:0.2f}')
print('beginning training')
for epoch_idx in range(1, NUM_EPOCHS + 1):
for batch_start_idx in range(0, NUM_EXAMPLES, BATCH_SIZE):
batch_end_idx = np.min([
batch_start_idx + BATCH_SIZE, NUM_EXAMPLES - 1
])
x_batch = x_train[batch_start_idx:batch_end_idx, :]
y_batch = y_train[batch_start_idx:batch_end_idx]
thetas -= LEARNING_RATE * gradient(y_batch, x_batch, thetas)
probs = probabilities(x_train, thetas)
ce = ce_error(probs, y_train)
acc = accuracy(probs, y_train)
test_probs = probabilities(x_test, thetas)
test_acc = accuracy(test_probs, y_test)
print(
f'Epoch: {epoch_idx} | '
f'CE: {ce:0.2f} | '
f'Acc: {acc:0.2f} | '
f'Test Acc: {test_acc:0.2f}'
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment