Skip to content

Instantly share code, notes, and snippets.

@RawPlutonium
Forked from germank/ex1.py
Created December 18, 2017 16:03
Show Gist options
  • Save RawPlutonium/3acb26cc2c55d713977718cb5b66d41a to your computer and use it in GitHub Desktop.
Save RawPlutonium/3acb26cc2c55d713977718cb5b66d41a to your computer and use it in GitHub Desktop.
Code stub for a simple word2vec model
import torch
import torch.nn as nn
from torch.autograd import Variable
class SimpleW2V(nn.Module):
def __init__(self, nwords, ncontexts, vec_size):
super(SimpleW2V, self).__init__()
# randomly initialized vectors
self.words_emb = nn.Embedding(nwords, vec_size)
self.ctxt_emb = nn.Embedding(ncontexts, vec_size)
def forward(self, target_idx, context_idx):
# compute the dot product + activation
return torch.log(nn.functional.sigmoid(
self.words_emb(target_idx).dot(self.ctxt_emb(context_idx))))
#
#
# Here you can build the vocabulary based on the words present in the text.
# (Note the two vocabularies can actually be the same)
#
#
words = {'love': 0, 'affection': 1, 'computsetting it to targeter': 2, 'football': 3}
ctxt = {'feeling': 0, 'letter': 1, 'chip': 2, 'team': 3}
model = SimpleW2V(len(words), len(ctxt), vec_size=10)
optim = torch.optim.SGD(model.parameters(), lr = 0.1) # this is the alpha constant
def wrap(idx):
# wrap an integer as a Pytorch Variable so it can be fed to the models
return torch.autograd.Variable(torch.LongTensor([idx]))
# "training"
print("log(σ(love ⋅ feeling)) (before) = {:.2f}".format(
model(wrap(words["love"]), wrap(ctxt["feeling"])).data[0]))
print("log(σ(love ⋅ team)) (before) = {:.2f}".format(
model(wrap(words["love"]), wrap(ctxt["team"])).data[0]))
#
#
# TODO: Here you can add a for loop iterating over a large file with text,
# where for each of the words, you look at 2 words to the left and
# 2 words to the right. E.g.
# for i, target in enumerate(words):
# for context in words[i-CONTEXT_SIZE:i] + words[i+1:i+CONTEXT_SIZE]:
# fake_contexts = random.choice(ctxt.keys(), NUM_NEGATIVES) # remember to check that "context" is not sampled by chance
#
#
#
target = 'love'
context = 'feeling'
fake_contexts = ['chip', 'team']
# convert to index and wrap in variables
target_idx = wrap([words[target]])
context_idx = wrap([ctxt[context]])
fake_contexts_idx = [wrap(ctxt[fake_context])
for fake_context in fake_contexts]
# evaluate the objective
obj = model(target_idx, context_idx) \
- sum(model(target_idx, fake_context_idx)
for fake_context_idx in fake_contexts_idx)
# the loss will be minimized, and thus, to maximize the objective we minimize the negative
loss = -obj
# optimize the objective (all automatically done for you!)
model.zero_grad() # reset gradients
loss.backward() # compute gradients
optim.step() # update vectors
print("log(σ(love ⋅ feeling)) (after) = {:.2f}".format(
model(wrap(words["love"]), wrap(ctxt["feeling"])).data[0]))
print("log(σ(love ⋅ team)) (after) = {:.2f}".format(
model(wrap(words["love"]), wrap(ctxt["team"])).data[0]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment