Skip to content

Instantly share code, notes, and snippets.

@DerekChia
Last active December 1, 2018 12:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DerekChia/b87ca38d7efa34cd4d94edf850790033 to your computer and use it in GitHub Desktop.
Save DerekChia/b87ca38d7efa34cd4d94edf850790033 to your computer and use it in GitHub Desktop.
w2v_training
# Training
w2v.train(training_data)
class word2vec():
def train(self, training_data):
# Initialising weight matrices
# Both s1 and s2 should be randomly initialised but for this demo, we pre-determine the arrays (getW1 and getW2)
# getW1 - shape (9x10) and getW2 - shape (10x9)
self.w1 = np.array(getW1)
self.w2 = np.array(getW2)
# self.w1 = np.random.uniform(-1, 1, (self.v_count, self.n))
# self.w2 = np.random.uniform(-1, 1, (self.n, self.v_count))
# Cycle through each epoch
for i in range(self.epochs):
# Intialise to 0 loss
self.loss = 0
# Cycle through each training sample
# w_t = vector for target word, w_c = vectors for context words
for w_t, w_c in training_data:
# Forward pass - Pass in vector for target word (w_t) to get:
# 1. predicted y using softmax (y_pred) 2. matrix of hidden layer (h) 3. output layer before softmax (u)
y_pred, h, u = self.forward_pass(w_t)
# Calculate error
# 1. For a target word, calculate difference between y_pred and each of the context words
# 2. Sum up the differences using np.sum to give us the error for this particular target word
EI = np.sum([np.subtract(y_pred, word) for word in w_c], axis=0)
# Backpropagation
# We use SGD to backpropagate errors - calculate loss on the output layer
self.backprop(EI, h, w_t)
# Calculate loss
# There are 2 parts to the loss function
# Part 1: -ve sum of all the output +
# Part 2: length of context words * log of sum for all elements (exponential-ed) in the output layer before softmax (u)
# Note: word.index(1) returns the index in the context word vector with value 1
# Note: u[word.index(1)] returns the value of the output layer before softmax
self.loss += -np.sum([u[word.index(1)] for word in w_c]) + len(w_c) * np.log(np.sum(np.exp(u)))
print('Epoch:', i, "Loss:", self.loss)
def forward_pass(self, x):
# x is one-hot vector for target word, shape - 9x1
# Run through first matrix (w1) to get hidden layer - 10x9 dot 9x1 gives us 10x1
h = np.dot(self.w1.T, x)
# Dot product hidden layer with second matrix (w2) - 9x10 dot 10x1 gives us 9x1
u = np.dot(self.w2.T, h)
# Run 1x9 through softmax to force each element to range of [0, 1] - 1x8
y_c = self.softmax(u)
return y_c, h, u
def softmax(self, x):
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum(axis=0)
def backprop(self, e, h, x):
# https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.outer.html
# Column vector EI represents row-wise sum of prediction errors across each context word for the current center word
# Going backwards, we need to take derivative of E with respect of w2
# h - shape 10x1, e - shape 9x1, dl_dw2 - shape 10x9
dl_dw2 = np.outer(h, e)
# x - shape 1x8, w2 - 5x8, e.T - 8x1
# x - 1x8, np.dot() - 5x1, dl_dw1 - 8x5
dl_dw1 = np.outer(x, np.dot(self.w2, e.T))
# Update weights
self.w1 = self.w1 - (self.lr * dl_dw1)
self.w2 = self.w2 - (self.lr * dl_dw2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment