This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
aaa | |
aarp | |
abb | |
abbott | |
abogado | |
ac | |
academy | |
accenture | |
accountant | |
accountants |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Locating UI elements ### | |
# By ID | |
<div id="coolestWidgetEvah">...</div> | |
element = driver.find_element_by_id("coolestWidgetEvah") | |
or | |
from selenium.webdriver.common.by import By | |
element = driver.find_element(by=By.ID, value="coolestWidgetEvah") | |
# By class name: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
text = "natural language processing and machine learning is fun and exciting" | |
# Note the .lower() as upper and lowercase does not matter in our implementation | |
# [['natural', 'language', 'processing', 'and', 'machine', 'learning', 'is', 'fun', 'and', 'exciting']] | |
corpus = [[word.lower() for word in text.split()]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Initialise object | |
w2v = word2vec() | |
# Numpy ndarray with one-hot representation for [target_word, context_words] | |
training_data = w2v.generate_training_data(settings, corpus) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class word2vec(): | |
def __init__(self): | |
self.n = settings['n'] | |
self.lr = settings['learning_rate'] | |
self.epochs = settings['epochs'] | |
self.window = settings['window_size'] | |
def generate_training_data(self, settings, corpus): | |
# Find unique word counts using dictonary | |
word_counts = defaultdict(int) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Training | |
w2v.train(training_data) | |
class word2vec(): | |
def train(self, training_data): | |
# Initialising weight matrices | |
# Both s1 and s2 should be randomly initialised but for this demo, we pre-determine the arrays (getW1 and getW2) | |
# getW1 - shape (9x10) and getW2 - shape (10x9) | |
self.w1 = np.array(getW1) | |
self.w2 = np.array(getW2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Training | |
w2v.train(training_data) | |
class word2vec(): | |
def train(self, training_data): | |
# Initialising weight matrices | |
# Both s1 and s2 should be randomly initialised but for this demo, we pre-determine the arrays (getW1 and getW2) | |
# getW1 - shape (9x10) and getW2 - shape (10x9) | |
self.w1 = np.array(getW1) | |
self.w2 = np.array(getW2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class word2vec(): | |
##Removed## | |
for i in range(self.epochs): | |
self.loss = 0 | |
for w_t, w_c in training_data: | |
##Removed## | |
# Calculate error | |
# 1. For a target word, calculate difference between y_pred and each of the context words |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get vector for word | |
vec = w2v.word_vec("machine") | |
class word2vec(): | |
## Removed ## | |
# Get vector from word | |
def word_vec(self, word): | |
w_index = self.word_index[word] | |
v_w = self.w1[w_index] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Find similar words | |
w2v.vec_sim("machine", 3) | |
class word2vec(): | |
## Removed## | |
# Input vector, returns nearest word(s) | |
def vec_sim(self, word, top_n): | |
v_w1 = self.word_vec(word) | |
word_sim = {} |
OlderNewer