Created
November 30, 2018 23:02
-
-
Save weiyangfb/b2b16c88997f0e85e6d8f0845bbf9531 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data = [("me gusta comer en la cafeteria".split(), "SPANISH"), | |
("Give it to me".split(), "ENGLISH"), | |
("No creo que sea una buena idea".split(), "SPANISH"), | |
("No it is not a good idea to get lost at sea".split(), "ENGLISH")] | |
test_data = [("Yo creo que si".split(), "SPANISH"), | |
("it is lost on me".split(), "ENGLISH")] | |
# word_to_ix maps each word in the vocab to a unique integer, which will be its | |
# index into the Bag of words vector | |
word_to_ix = {} | |
for sent, _ in data + test_data: | |
for word in sent: | |
if word not in word_to_ix: | |
word_to_ix[word] = len(word_to_ix) | |
print(word_to_ix) | |
VOCAB_SIZE = len(word_to_ix) | |
NUM_LABELS = 2 | |
class BoWClassifier(nn.Module): # inheriting from nn.Module! | |
def __init__(self, num_labels, vocab_size): | |
# calls the init function of nn.Module. Dont get confused by syntax, | |
# just always do it in an nn.Module | |
super(BoWClassifier, self).__init__() | |
# Define the parameters that you will need. In this case, we need A and b, | |
# the parameters of the affine mapping. | |
# Torch defines nn.Linear(), which provides the affine map. | |
# Make sure you understand why the input dimension is vocab_size | |
# and the output is num_labels! | |
self.W = torch.nn.Parameter(torch.zeros(size=(vocab_size, num_labels))) | |
torch.nn.init.xavier_normal_(self.W, gain=1.414) | |
# NOTE! The non-linearity log softmax does not have parameters! So we don't need | |
# to worry about that here | |
def forward(self, bow_vec): | |
# Pass the input through the linear layer, | |
# then pass that through log_softmax. | |
# Many non-linearities and other functions are in torch.nn.functional | |
out = torch.sparse.mm(bow_vec, self.W) | |
return torch.nn.functional.log_softmax(out, dim=1) | |
def make_bow_vector(sentence, word_to_ix): | |
vec = torch.zeros(len(word_to_ix)) | |
for word in sentence: | |
vec[word_to_ix[word]] += 1 | |
vec = torch.stack([vec.to_sparse()]) | |
return vec | |
def make_target(label, label_to_ix): | |
return torch.LongTensor([label_to_ix[label]]) | |
model = BoWClassifier(NUM_LABELS, VOCAB_SIZE) | |
# the model knows its parameters. The first output below is A, the second is b. | |
# Whenever you assign a component to a class variable in the __init__ function | |
# of a module, which was done with the line | |
# self.linear = nn.Linear(...) | |
# Then through some Python magic from the PyTorch devs, your module | |
# (in this case, BoWClassifier) will store knowledge of the nn.Linear's parameters | |
for param in model.parameters(): | |
print(param) | |
# To run the model, pass in a BoW vector | |
# Here we don't need to train, so the code is wrapped in torch.no_grad() | |
with torch.no_grad(): | |
sample = data[0] | |
bow_vector = make_bow_vector(sample[0], word_to_ix) | |
log_probs = model(bow_vector) | |
print(log_probs) | |
# Run on test data before we train, just to see a before-and-after | |
with torch.no_grad(): | |
for instance, label in test_data: | |
bow_vec = make_bow_vector(instance, word_to_ix) | |
log_probs = model(bow_vec) | |
print(log_probs, '\n log_probs-----------') | |
# Print the matrix column corresponding to "creo" | |
print(next(model.parameters())[word_to_ix["creo"], :]) | |
loss_function = nn.NLLLoss() | |
optimizer = torch.optim.SGD(model.parameters(), lr=0.1) | |
# Usually you want to pass over the training data several times. | |
# 100 is much bigger than on a real data set, but real datasets have more than | |
# two instances. Usually, somewhere between 5 and 30 epochs is reasonable. | |
for epoch in range(100): | |
for instance, label in data: | |
# Step 1. Remember that PyTorch accumulates gradients. | |
# We need to clear them out before each instance | |
model.zero_grad() | |
# Step 2. Make our BOW vector and also we must wrap the target in a | |
# Tensor as an integer. For example, if the target is SPANISH, then | |
# we wrap the integer 0. The loss function then knows that the 0th | |
# element of the log probabilities is the log probability | |
# corresponding to SPANISH | |
bow_vec = make_bow_vector(instance, word_to_ix) | |
target = make_target(label, label_to_ix) | |
# Step 3. Run our forward pass. | |
log_probs = model(bow_vec) | |
# Step 4. Compute the loss, gradients, and update the parameters by | |
# calling optimizer.step() | |
loss = loss_function(log_probs, target) | |
loss.backward() | |
optimizer.step() | |
with torch.no_grad(): | |
for instance, label in test_data: | |
bow_vec = make_bow_vector(instance, word_to_ix) | |
log_probs = model(bow_vec) | |
print(log_probs, '\n log_probs------------') | |
# Index corresponding to Spanish goes up, English goes down! | |
print(next(model.parameters())[word_to_ix["creo"], :]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment