Skip to content

Instantly share code, notes, and snippets.

🎯
Focusing

Derek Chia DerekChia

🎯
Focusing
Block or report user

Report or block DerekChia

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View w2v_find_similar_words.py
# Find similar words
w2v.vec_sim("machine", 3)
class word2vec():
## Removed##
# Input vector, returns nearest word(s)
def vec_sim(self, word, top_n):
v_w1 = self.word_vec(word)
word_sim = {}
View w2v_get_vector.py
# Get vector for word
vec = w2v.word_vec("machine")
class word2vec():
## Removed ##
# Get vector from word
def word_vec(self, word):
w_index = self.word_index[word]
v_w = self.w1[w_index]
@DerekChia
DerekChia / w2v_training_error_backpropagation.py
Last active Dec 1, 2018
w2v_training_error_backpropagation
View w2v_training_error_backpropagation.py
class word2vec():
##Removed##
for i in range(self.epochs):
self.loss = 0
for w_t, w_c in training_data:
##Removed##
# Calculate error
# 1. For a target word, calculate difference between y_pred and each of the context words
@DerekChia
DerekChia / w2v_training_forward_pass.py
Last active Dec 3, 2018
w2v_training_forward_pass
View w2v_training_forward_pass.py
class word2vec():
def train(self, training_data):
##Removed##
# Cycle through each epoch
for i in range(self.epochs):
# Intialise loss to 0
self.loss = 0
# Cycle through each training sample
View w2v_training_1.py
# Training
w2v.train(training_data)
class word2vec():
def train(self, training_data):
# Initialising weight matrices
# Both s1 and s2 should be randomly initialised but for this demo, we pre-determine the arrays (getW1 and getW2)
# getW1 - shape (9x10) and getW2 - shape (10x9)
self.w1 = np.array(getW1)
self.w2 = np.array(getW2)
View w2v_training.py
# Training
w2v.train(training_data)
class word2vec():
def train(self, training_data):
# Initialising weight matrices
# Both s1 and s2 should be randomly initialised but for this demo, we pre-determine the arrays (getW1 and getW2)
# getW1 - shape (9x10) and getW2 - shape (10x9)
self.w1 = np.array(getW1)
self.w2 = np.array(getW2)
@DerekChia
DerekChia / w2v_generate_training_data_2.py
Last active Dec 1, 2018
w2v_generate_training_data_2
View w2v_generate_training_data_2.py
# Initialise object
w2v = word2vec()
# Numpy ndarray with one-hot representation for [target_word, context_words]
training_data = w2v.generate_training_data(settings, corpus)
@DerekChia
DerekChia / w2v_generate_training_data_func.py
Last active Dec 1, 2018
w2v_generate_training_data_func
View w2v_generate_training_data_func.py
class word2vec():
def __init__(self):
self.n = settings['n']
self.lr = settings['learning_rate']
self.epochs = settings['epochs']
self.window = settings['window_size']
def generate_training_data(self, settings, corpus):
# Find unique word counts using dictonary
word_counts = defaultdict(int)
@DerekChia
DerekChia / w2v_generate_training_data.py
Last active Dec 1, 2018
w2v_generate_training_data
View w2v_generate_training_data.py
text = "natural language processing and machine learning is fun and exciting"
# Note the .lower() as upper and lowercase does not matter in our implementation
# [['natural', 'language', 'processing', 'and', 'machine', 'learning', 'is', 'fun', 'and', 'exciting']]
corpus = [[word.lower() for word in text.split()]]
View w2v_settings.py
settings = {
'window_size': 2 # context window +- center word
'n': 10, # dimensions of word embeddings, also refer to size of hidden layer
'epochs': 50, # number of training epochs
'learning_rate': 0.01 # learning rate
}
You can’t perform that action at this time.