RawPlutonium/ex2.py

## ex2.py
# encoding=utf-8
# --- Adapted From ---
# Project: learn-pytorch
# Author: xingjunjie    github: @gavinxing
# Create Time: 29/07/2017 11:58 AM on PyCharm
# Original code at: https://gist.github.com/GavinXing/9954ea846072e115bb07d9758892382c

import torch
import torch.nn as nn
import torch.autograd as autograd
import torch.optim as optim
import torch.nn.functional as F

class CBOW(nn.Module):

    def __init__(self, vocab_size, num_classes=2, embedding_size=100):
        super(CBOW, self).__init__()
        self.word_embeddings = nn.Embedding(vocab_size, embedding_size)
        self.class_embeddings = nn.Linear(embedding_size, num_classes)

    def forward(self, inputs):
        input_embeddings = self.word_embeddings(inputs)
        sent_embedding = input_embeddings.sum(dim=0)
        out = self.class_embeddings(sent_embedding)
        out = F.log_softmax(out)
        return out


def make_sentence_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    tensor = torch.LongTensor(idxs)
    return autograd.Variable(tensor)


if __name__ == '__main__':
        EMBEDDING_SIZE = 10
        #
        #
        # TODO: here is some example training data, but we need more.
        # Download some movie review data from http://ai.stanford.edu/~amaas/data/sentiment/
        # and load it so you can train the classifier!
        #
        #
        positive = ["' stanley and iris ' show the triumph of the human spirit.".split(),
            "what a fun movie !".split()]
        negative = ["there are times when finishing a film one wishes to have a refund for the time just spent .".split(),
            "this movie was so unrelentingly bad , I could hardly believe I was watching it .".split()]

        # for each sentence we label them as "1" for positive and "0" for negative
        data = [(s, 1) for s in positive] + [(s, 0) for s in negative]


        # we extract the vocabulary
        vocab = set(sum(positive, []) + sum(negative, []))
        vocab_size = len(vocab)

        word_to_ix = {word: i for i, word in enumerate(vocab)}

        loss_func = nn.CrossEntropyLoss()
        net = CBOW(num_classes=2, embedding_size=EMBEDDING_SIZE, vocab_size=vocab_size)
        optimizer = optim.SGD(net.parameters(), lr=0.01)

        for epoch in range(200):
            total_loss = 0
            for sentence, label in data:
                # creates a vector with the indexes of each word in the sentence
                sentence_var = make_sentence_vector(sentence, word_to_ix)
                # compute predictions
                log_probs = net(sentence_var)

                # compute error function
                loss = loss_func(log_probs, autograd.Variable(
                    torch.LongTensor([label])
                    ))

                net.zero_grad()  # reset gradients
                loss.backward()  # compute updates
                optimizer.step()  # update vectors

                total_loss += loss.data
            print("loss =", total_loss[0])


# Sanity check that we fitted the training set, but there is no glory in that!
# We will need (much) more training data to generalize to new sentences
sentence_var = make_sentence_vector("what a fun movie".split(), word_to_ix)
print("Positive prediction: ", net(sentence_var).exp())
sentence_var = make_sentence_vector("this movie was so unrelentingly bad".split(), word_to_ix)
print("Negative prediction: ", net(sentence_var).exp())
	# encoding=utf-8
	# --- Adapted From ---
	# Project: learn-pytorch
	# Author: xingjunjie github: @gavinxing
	# Create Time: 29/07/2017 11:58 AM on PyCharm
	# Original code at: https://gist.github.com/GavinXing/9954ea846072e115bb07d9758892382c

	import torch
	import torch.nn as nn
	import torch.autograd as autograd
	import torch.optim as optim
	import torch.nn.functional as F

	class CBOW(nn.Module):

	def __init__(self, vocab_size, num_classes=2, embedding_size=100):
	super(CBOW, self).__init__()
	self.word_embeddings = nn.Embedding(vocab_size, embedding_size)
	self.class_embeddings = nn.Linear(embedding_size, num_classes)

	def forward(self, inputs):
	input_embeddings = self.word_embeddings(inputs)
	sent_embedding = input_embeddings.sum(dim=0)
	out = self.class_embeddings(sent_embedding)
	out = F.log_softmax(out)
	return out



	def make_sentence_vector(context, word_to_ix):
	idxs = [word_to_ix[w] for w in context]
	tensor = torch.LongTensor(idxs)
	return autograd.Variable(tensor)


	if __name__ == '__main__':
	EMBEDDING_SIZE = 10
	#
	#
	# TODO: here is some example training data, but we need more.
	# Download some movie review data from http://ai.stanford.edu/~amaas/data/sentiment/
	# and load it so you can train the classifier!
	#
	#
	positive = ["' stanley and iris ' show the triumph of the human spirit.".split(),
	"what a fun movie !".split()]
	negative = ["there are times when finishing a film one wishes to have a refund for the time just spent .".split(),
	"this movie was so unrelentingly bad , I could hardly believe I was watching it .".split()]

	# for each sentence we label them as "1" for positive and "0" for negative
	data = [(s, 1) for s in positive] + [(s, 0) for s in negative]


	# we extract the vocabulary
	vocab = set(sum(positive, []) + sum(negative, []))
	vocab_size = len(vocab)

	word_to_ix = {word: i for i, word in enumerate(vocab)}

	loss_func = nn.CrossEntropyLoss()
	net = CBOW(num_classes=2, embedding_size=EMBEDDING_SIZE, vocab_size=vocab_size)
	optimizer = optim.SGD(net.parameters(), lr=0.01)

	for epoch in range(200):
	total_loss = 0
	for sentence, label in data:
	# creates a vector with the indexes of each word in the sentence
	sentence_var = make_sentence_vector(sentence, word_to_ix)
	# compute predictions
	log_probs = net(sentence_var)

	# compute error function
	loss = loss_func(log_probs, autograd.Variable(
	torch.LongTensor([label])
	))

	net.zero_grad() # reset gradients
	loss.backward() # compute updates
	optimizer.step() # update vectors

	total_loss += loss.data
	print("loss =", total_loss[0])


	# Sanity check that we fitted the training set, but there is no glory in that!
	# We will need (much) more training data to generalize to new sentences
	sentence_var = make_sentence_vector("what a fun movie".split(), word_to_ix)
	print("Positive prediction: ", net(sentence_var).exp())
	sentence_var = make_sentence_vector("this movie was so unrelentingly bad".split(), word_to_ix)
	print("Negative prediction: ", net(sentence_var).exp())