weiyangfb/pytorch_sparse_BoW_LR.py

## pytorch_sparse_BoW_LR.py
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

# word_to_ix maps each word in the vocab to a unique integer, which will be its
# index into the Bag of words vector
word_to_ix = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

class BoWClassifier(nn.Module):  # inheriting from nn.Module!

    def __init__(self, num_labels, vocab_size):
        # calls the init function of nn.Module.  Dont get confused by syntax,
        # just always do it in an nn.Module
        super(BoWClassifier, self).__init__()

        # Define the parameters that you will need.  In this case, we need A and b,
        # the parameters of the affine mapping.
        # Torch defines nn.Linear(), which provides the affine map.
        # Make sure you understand why the input dimension is vocab_size
        # and the output is num_labels!
        self.W = torch.nn.Parameter(torch.zeros(size=(vocab_size, num_labels)))
        torch.nn.init.xavier_normal_(self.W, gain=1.414)

        # NOTE! The non-linearity log softmax does not have parameters! So we don't need
        # to worry about that here

    def forward(self, bow_vec):
        # Pass the input through the linear layer,
        # then pass that through log_softmax.
        # Many non-linearities and other functions are in torch.nn.functional
        out = torch.sparse.mm(bow_vec, self.W)
        return torch.nn.functional.log_softmax(out, dim=1)


def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    vec = torch.stack([vec.to_sparse()])
    return vec


def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])

model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

# the model knows its parameters.  The first output below is A, the second is b.
# Whenever you assign a component to a class variable in the __init__ function
# of a module, which was done with the line
# self.linear = nn.Linear(...)
# Then through some Python magic from the PyTorch devs, your module
# (in this case, BoWClassifier) will store knowledge of the nn.Linear's parameters
for param in model.parameters():
    print(param)

# To run the model, pass in a BoW vector
# Here we don't need to train, so the code is wrapped in torch.no_grad()
with torch.no_grad():
    sample = data[0]
    bow_vector = make_bow_vector(sample[0], word_to_ix)
    log_probs = model(bow_vector)
    print(log_probs)


# Run on test data before we train, just to see a before-and-after
with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs, '\n log_probs-----------')

# Print the matrix column corresponding to "creo"
print(next(model.parameters())[word_to_ix["creo"], :])

loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

# Usually you want to pass over the training data several times.
# 100 is much bigger than on a real data set, but real datasets have more than
# two instances.  Usually, somewhere between 5 and 30 epochs is reasonable.
for epoch in range(100):
    for instance, label in data:
        # Step 1. Remember that PyTorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Make our BOW vector and also we must wrap the target in a
        # Tensor as an integer. For example, if the target is SPANISH, then
        # we wrap the integer 0. The loss function then knows that the 0th
        # element of the log probabilities is the log probability
        # corresponding to SPANISH
        bow_vec = make_bow_vector(instance, word_to_ix)
        target = make_target(label, label_to_ix)

        # Step 3. Run our forward pass.
        log_probs = model(bow_vec)

        # Step 4. Compute the loss, gradients, and update the parameters by
        # calling optimizer.step()
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()

with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs, '\n log_probs------------')

# Index corresponding to Spanish goes up, English goes down!
print(next(model.parameters())[word_to_ix["creo"], :])
	data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
	("Give it to me".split(), "ENGLISH"),
	("No creo que sea una buena idea".split(), "SPANISH"),
	("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

	test_data = [("Yo creo que si".split(), "SPANISH"),
	("it is lost on me".split(), "ENGLISH")]

	# word_to_ix maps each word in the vocab to a unique integer, which will be its
	# index into the Bag of words vector
	word_to_ix = {}
	for sent, _ in data + test_data:
	for word in sent:
	if word not in word_to_ix:
	word_to_ix[word] = len(word_to_ix)
	print(word_to_ix)

	VOCAB_SIZE = len(word_to_ix)
	NUM_LABELS = 2

	class BoWClassifier(nn.Module): # inheriting from nn.Module!

	def __init__(self, num_labels, vocab_size):
	# calls the init function of nn.Module. Dont get confused by syntax,
	# just always do it in an nn.Module
	super(BoWClassifier, self).__init__()

	# Define the parameters that you will need. In this case, we need A and b,
	# the parameters of the affine mapping.
	# Torch defines nn.Linear(), which provides the affine map.
	# Make sure you understand why the input dimension is vocab_size
	# and the output is num_labels!
	self.W = torch.nn.Parameter(torch.zeros(size=(vocab_size, num_labels)))
	torch.nn.init.xavier_normal_(self.W, gain=1.414)

	# NOTE! The non-linearity log softmax does not have parameters! So we don't need
	# to worry about that here

	def forward(self, bow_vec):
	# Pass the input through the linear layer,
	# then pass that through log_softmax.
	# Many non-linearities and other functions are in torch.nn.functional
	out = torch.sparse.mm(bow_vec, self.W)
	return torch.nn.functional.log_softmax(out, dim=1)


	def make_bow_vector(sentence, word_to_ix):
	vec = torch.zeros(len(word_to_ix))
	for word in sentence:
	vec[word_to_ix[word]] += 1
	vec = torch.stack([vec.to_sparse()])
	return vec


	def make_target(label, label_to_ix):
	return torch.LongTensor([label_to_ix[label]])

	model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

	# the model knows its parameters. The first output below is A, the second is b.
	# Whenever you assign a component to a class variable in the __init__ function
	# of a module, which was done with the line
	# self.linear = nn.Linear(...)
	# Then through some Python magic from the PyTorch devs, your module
	# (in this case, BoWClassifier) will store knowledge of the nn.Linear's parameters
	for param in model.parameters():
	print(param)

	# To run the model, pass in a BoW vector
	# Here we don't need to train, so the code is wrapped in torch.no_grad()
	with torch.no_grad():
	sample = data[0]
	bow_vector = make_bow_vector(sample[0], word_to_ix)
	log_probs = model(bow_vector)
	print(log_probs)


	# Run on test data before we train, just to see a before-and-after
	with torch.no_grad():
	for instance, label in test_data:
	bow_vec = make_bow_vector(instance, word_to_ix)
	log_probs = model(bow_vec)
	print(log_probs, '\n log_probs-----------')

	# Print the matrix column corresponding to "creo"
	print(next(model.parameters())[word_to_ix["creo"], :])

	loss_function = nn.NLLLoss()
	optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

	# Usually you want to pass over the training data several times.
	# 100 is much bigger than on a real data set, but real datasets have more than
	# two instances. Usually, somewhere between 5 and 30 epochs is reasonable.
	for epoch in range(100):
	for instance, label in data:
	# Step 1. Remember that PyTorch accumulates gradients.
	# We need to clear them out before each instance
	model.zero_grad()

	# Step 2. Make our BOW vector and also we must wrap the target in a
	# Tensor as an integer. For example, if the target is SPANISH, then
	# we wrap the integer 0. The loss function then knows that the 0th
	# element of the log probabilities is the log probability
	# corresponding to SPANISH
	bow_vec = make_bow_vector(instance, word_to_ix)
	target = make_target(label, label_to_ix)

	# Step 3. Run our forward pass.
	log_probs = model(bow_vec)

	# Step 4. Compute the loss, gradients, and update the parameters by
	# calling optimizer.step()
	loss = loss_function(log_probs, target)
	loss.backward()
	optimizer.step()

	with torch.no_grad():
	for instance, label in test_data:
	bow_vec = make_bow_vector(instance, word_to_ix)
	log_probs = model(bow_vec)
	print(log_probs, '\n log_probs------------')

	# Index corresponding to Spanish goes up, English goes down!
	print(next(model.parameters())[word_to_ix["creo"], :])