Derek Chia DerekChia

## tldfinal.txt
aaa
aarp
abb
abbott
abogado
ac
academy
accenture
accountant
accountants

## gist:46a92aaf5119a1f73190454e440753e0
### Locating UI elements ###

# By ID
<div id="coolestWidgetEvah">...</div>
element = driver.find_element_by_id("coolestWidgetEvah")
or
from selenium.webdriver.common.by import By
element = driver.find_element(by=By.ID, value="coolestWidgetEvah")

# By class name:

## w2v_generate_training_data.py
text = "natural language processing and machine learning is fun and exciting"

# Note the .lower() as upper and lowercase does not matter in our implementation
# [['natural', 'language', 'processing', 'and', 'machine', 'learning', 'is', 'fun', 'and', 'exciting']]
corpus = [[word.lower() for word in text.split()]]

## w2v_generate_training_data_2.py
# Initialise object
w2v = word2vec()
# Numpy ndarray with one-hot representation for [target_word, context_words]
training_data = w2v.generate_training_data(settings, corpus)

## w2v_generate_training_data_func.py
class word2vec():
  def __init__(self):
    self.n = settings['n']
    self.lr = settings['learning_rate']
    self.epochs = settings['epochs']
    self.window = settings['window_size']

  def generate_training_data(self, settings, corpus):
    # Find unique word counts using dictonary
    word_counts = defaultdict(int)

## w2v_training_1.py
# Training
w2v.train(training_data)

class word2vec():
  def train(self, training_data):
  # Initialising weight matrices
  # Both s1 and s2 should be randomly initialised but for this demo, we pre-determine the arrays (getW1 and getW2)
  # getW1 - shape (9x10) and getW2 - shape (10x9)
  self.w1 = np.array(getW1)
  self.w2 = np.array(getW2)

## w2v_training.py
# Training
w2v.train(training_data)

class word2vec():
  def train(self, training_data):
    # Initialising weight matrices
    # Both s1 and s2 should be randomly initialised but for this demo, we pre-determine the arrays (getW1 and getW2)
    # getW1 - shape (9x10) and getW2 - shape (10x9)
    self.w1 = np.array(getW1)
    self.w2 = np.array(getW2)

## w2v_training_error_backpropagation.py
class word2vec():
  ##Removed##

  for i in range(self.epochs):
    self.loss = 0
    for w_t, w_c in training_data:
    ##Removed##

      # Calculate error
      # 1. For a target word, calculate difference between y_pred and each of the context words

## w2v_get_vector.py
# Get vector for word
vec = w2v.word_vec("machine")

class word2vec():
  ## Removed ##

  # Get vector from word
  def word_vec(self, word):
    w_index = self.word_index[word]
    v_w = self.w1[w_index]

## w2v_find_similar_words.py
# Find similar words
w2v.vec_sim("machine", 3)

class word2vec():
  ## Removed##

  # Input vector, returns nearest word(s)
  def vec_sim(self, word, top_n):
    v_w1 = self.word_vec(word)
    word_sim = {}
	aaa
	aarp
	abb
	abbott
	abogado
	ac
	academy
	accenture
	accountant
	accountants
	### Locating UI elements ###

	# By ID
	<div id="coolestWidgetEvah">...</div>
	element = driver.find_element_by_id("coolestWidgetEvah")
	or
	from selenium.webdriver.common.by import By
	element = driver.find_element(by=By.ID, value="coolestWidgetEvah")

	# By class name:
	text = "natural language processing and machine learning is fun and exciting"

	# Note the .lower() as upper and lowercase does not matter in our implementation
	# [['natural', 'language', 'processing', 'and', 'machine', 'learning', 'is', 'fun', 'and', 'exciting']]
	corpus = [[word.lower() for word in text.split()]]
	# Initialise object
	w2v = word2vec()
	# Numpy ndarray with one-hot representation for [target_word, context_words]
	training_data = w2v.generate_training_data(settings, corpus)
	class word2vec():
	def __init__(self):
	self.n = settings['n']
	self.lr = settings['learning_rate']
	self.epochs = settings['epochs']
	self.window = settings['window_size']

	def generate_training_data(self, settings, corpus):
	# Find unique word counts using dictonary
	word_counts = defaultdict(int)
	# Training
	w2v.train(training_data)

	class word2vec():
	def train(self, training_data):
	# Initialising weight matrices
	# Both s1 and s2 should be randomly initialised but for this demo, we pre-determine the arrays (getW1 and getW2)
	# getW1 - shape (9x10) and getW2 - shape (10x9)
	self.w1 = np.array(getW1)
	self.w2 = np.array(getW2)
	class word2vec():
	##Removed##

	for i in range(self.epochs):
	self.loss = 0
	for w_t, w_c in training_data:
	##Removed##

	# Calculate error
	# 1. For a target word, calculate difference between y_pred and each of the context words
	# Get vector for word
	vec = w2v.word_vec("machine")

	class word2vec():
	## Removed ##

	# Get vector from word
	def word_vec(self, word):
	w_index = self.word_index[word]
	v_w = self.w1[w_index]
	# Find similar words
	w2v.vec_sim("machine", 3)

	class word2vec():
	## Removed##

	# Input vector, returns nearest word(s)
	def vec_sim(self, word, top_n):
	v_w1 = self.word_vec(word)
	word_sim = {}