RawPlutonium/ex1.py

## ex1.py
import torch
import torch.nn as nn
from torch.autograd import Variable


class SimpleW2V(nn.Module):
    def __init__(self, nwords, ncontexts, vec_size):
        super(SimpleW2V, self).__init__()
        # randomly initialized vectors
        self.words_emb = nn.Embedding(nwords, vec_size)
        self.ctxt_emb = nn.Embedding(ncontexts, vec_size)

    def forward(self, target_idx, context_idx):
        # compute the dot product + activation
        return torch.log(nn.functional.sigmoid(
                self.words_emb(target_idx).dot(self.ctxt_emb(context_idx))))

#
#
#  Here you can build the vocabulary based on the words present in the text.
#  (Note the two vocabularies can actually be the same)
#
#
words = {'love': 0, 'affection': 1, 'computsetting it to targeter': 2, 'football': 3}
ctxt = {'feeling': 0, 'letter': 1, 'chip': 2, 'team': 3}

model = SimpleW2V(len(words), len(ctxt), vec_size=10)
optim = torch.optim.SGD(model.parameters(), lr = 0.1)  # this is the alpha constant

def wrap(idx):
    # wrap an integer as a Pytorch Variable so it can be fed to the models
    return torch.autograd.Variable(torch.LongTensor([idx]))

# "training"
print("log(σ(love ⋅ feeling)) (before) = {:.2f}".format(
    model(wrap(words["love"]), wrap(ctxt["feeling"])).data[0]))
print("log(σ(love ⋅ team)) (before) = {:.2f}".format(
    model(wrap(words["love"]), wrap(ctxt["team"])).data[0]))
#
#
#  TODO: Here you can add a for loop iterating over a large file with text,
#  where for each of the words, you look at 2 words to the left and
#  2 words to the right. E.g.
#  for i, target in enumerate(words):
#      for context in words[i-CONTEXT_SIZE:i] + words[i+1:i+CONTEXT_SIZE]:
#          fake_contexts = random.choice(ctxt.keys(), NUM_NEGATIVES)  # remember to check that "context" is not sampled by chance
#
#
#
target = 'love'
context = 'feeling'
fake_contexts = ['chip', 'team']
# convert to index and wrap in variables
target_idx = wrap([words[target]])
context_idx = wrap([ctxt[context]])
fake_contexts_idx = [wrap(ctxt[fake_context])
        for fake_context in fake_contexts]
# evaluate the objective
obj = model(target_idx, context_idx) \
        - sum(model(target_idx, fake_context_idx)
                for fake_context_idx in fake_contexts_idx)
# the loss will be minimized, and thus, to maximize the objective we minimize the negative
loss = -obj
# optimize the objective (all automatically done for you!)
model.zero_grad()  # reset gradients
loss.backward()  # compute gradients
optim.step()  # update vectors
print("log(σ(love ⋅ feeling)) (after) = {:.2f}".format(
    model(wrap(words["love"]), wrap(ctxt["feeling"])).data[0]))
print("log(σ(love ⋅ team)) (after) = {:.2f}".format(
    model(wrap(words["love"]), wrap(ctxt["team"])).data[0]))
	import torch
	import torch.nn as nn
	from torch.autograd import Variable


	class SimpleW2V(nn.Module):
	def __init__(self, nwords, ncontexts, vec_size):
	super(SimpleW2V, self).__init__()
	# randomly initialized vectors
	self.words_emb = nn.Embedding(nwords, vec_size)
	self.ctxt_emb = nn.Embedding(ncontexts, vec_size)

	def forward(self, target_idx, context_idx):
	# compute the dot product + activation
	return torch.log(nn.functional.sigmoid(
	self.words_emb(target_idx).dot(self.ctxt_emb(context_idx))))

	#
	#
	# Here you can build the vocabulary based on the words present in the text.
	# (Note the two vocabularies can actually be the same)
	#
	#
	words = {'love': 0, 'affection': 1, 'computsetting it to targeter': 2, 'football': 3}
	ctxt = {'feeling': 0, 'letter': 1, 'chip': 2, 'team': 3}

	model = SimpleW2V(len(words), len(ctxt), vec_size=10)
	optim = torch.optim.SGD(model.parameters(), lr = 0.1) # this is the alpha constant

	def wrap(idx):
	# wrap an integer as a Pytorch Variable so it can be fed to the models
	return torch.autograd.Variable(torch.LongTensor([idx]))

	# "training"
	print("log(σ(love ⋅ feeling)) (before) = {:.2f}".format(
	model(wrap(words["love"]), wrap(ctxt["feeling"])).data[0]))
	print("log(σ(love ⋅ team)) (before) = {:.2f}".format(
	model(wrap(words["love"]), wrap(ctxt["team"])).data[0]))
	#
	#
	# TODO: Here you can add a for loop iterating over a large file with text,
	# where for each of the words, you look at 2 words to the left and
	# 2 words to the right. E.g.
	# for i, target in enumerate(words):
	# for context in words[i-CONTEXT_SIZE:i] + words[i+1:i+CONTEXT_SIZE]:
	# fake_contexts = random.choice(ctxt.keys(), NUM_NEGATIVES) # remember to check that "context" is not sampled by chance
	#
	#
	#
	target = 'love'
	context = 'feeling'
	fake_contexts = ['chip', 'team']
	# convert to index and wrap in variables
	target_idx = wrap([words[target]])
	context_idx = wrap([ctxt[context]])
	fake_contexts_idx = [wrap(ctxt[fake_context])
	for fake_context in fake_contexts]
	# evaluate the objective
	obj = model(target_idx, context_idx) \
	- sum(model(target_idx, fake_context_idx)
	for fake_context_idx in fake_contexts_idx)
	# the loss will be minimized, and thus, to maximize the objective we minimize the negative
	loss = -obj
	# optimize the objective (all automatically done for you!)
	model.zero_grad() # reset gradients
	loss.backward() # compute gradients
	optim.step() # update vectors
	print("log(σ(love ⋅ feeling)) (after) = {:.2f}".format(
	model(wrap(words["love"]), wrap(ctxt["feeling"])).data[0]))
	print("log(σ(love ⋅ team)) (after) = {:.2f}".format(
	model(wrap(words["love"]), wrap(ctxt["team"])).data[0]))