yogi-bp/word2vec.py

## word2vec.py
import logging
from typing import List

import numpy as np
import torch
import torch.functional as F
import torch.nn.functional as F
from torch.autograd import Variable

logger = logging.getLogger(__name__)

Document = List[str]

NUM_EPOCHS = 500
LEARNING_RATE = 0.001
EMBEDDING_DIMENSION = 256


class Word2Vec:
    def __init__(self):
        self.vocabulary = None
        self.idx_pair = None
        return

    def set_data(self, vocabulary, idx_pair):
        self.vocabulary = vocabulary
        self.idx_pair = idx_pair

    def create_input_layer(self, word_idx):
        x = torch.zeros(len(self.vocabulary)).float()
        x[word_idx] = 1.0
        return x

    def word2vec_net(self):
        vocabulary_size = len(self.vocabulary)
        W1 = Variable(torch.randn(EMBEDDING_DIMENSION, vocabulary_size).float(), requires_grad=True)
        W2 = Variable(torch.randn(vocabulary_size, EMBEDDING_DIMENSION).float(), requires_grad=True)

        logger.debug("Neural net with EPOCHS={}, LEARNING_RATE={}, EMBEDDING_DIMENSION={}".format(
            str(NUM_EPOCHS), str(LEARNING_RATE), str(EMBEDDING_DIMENSION)))

        for epo in range(NUM_EPOCHS):
            loss_val = 0
            for data, target in self.idx_pair:
                x = Variable(self.create_input_layer(data)).float()
                y_true = Variable(torch.from_numpy(np.array([target])).long())

                z1 = torch.matmul(W1, x)
                z2 = torch.matmul(W2, z1)

                log_softmax = F.log_softmax(z2, dim=0)

                loss = F.nll_loss(log_softmax.view(1, -1), y_true)
                loss_val += loss.item()
                loss.backward()
                W1.data -= LEARNING_RATE * W1.grad.data
                W2.data -= LEARNING_RATE * W2.grad.data

                W1.grad.data.zero_()
                W2.grad.data.zero_()
            if epo % 10 == 0:
                logger.info(f'Loss at epo {epo}: {loss_val / len(self.idx_pair)}')
	import logging
	from typing import List

	import numpy as np
	import torch
	import torch.functional as F
	import torch.nn.functional as F
	from torch.autograd import Variable

	logger = logging.getLogger(__name__)

	Document = List[str]

	NUM_EPOCHS = 500
	LEARNING_RATE = 0.001
	EMBEDDING_DIMENSION = 256


	class Word2Vec:
	def __init__(self):
	self.vocabulary = None
	self.idx_pair = None
	return

	def set_data(self, vocabulary, idx_pair):
	self.vocabulary = vocabulary
	self.idx_pair = idx_pair

	def create_input_layer(self, word_idx):
	x = torch.zeros(len(self.vocabulary)).float()
	x[word_idx] = 1.0
	return x

	def word2vec_net(self):
	vocabulary_size = len(self.vocabulary)
	W1 = Variable(torch.randn(EMBEDDING_DIMENSION, vocabulary_size).float(), requires_grad=True)
	W2 = Variable(torch.randn(vocabulary_size, EMBEDDING_DIMENSION).float(), requires_grad=True)

	logger.debug("Neural net with EPOCHS={}, LEARNING_RATE={}, EMBEDDING_DIMENSION={}".format(
	str(NUM_EPOCHS), str(LEARNING_RATE), str(EMBEDDING_DIMENSION)))

	for epo in range(NUM_EPOCHS):
	loss_val = 0
	for data, target in self.idx_pair:
	x = Variable(self.create_input_layer(data)).float()
	y_true = Variable(torch.from_numpy(np.array([target])).long())

	z1 = torch.matmul(W1, x)
	z2 = torch.matmul(W2, z1)

	log_softmax = F.log_softmax(z2, dim=0)

	loss = F.nll_loss(log_softmax.view(1, -1), y_true)
	loss_val += loss.item()
	loss.backward()
	W1.data -= LEARNING_RATE * W1.grad.data
	W2.data -= LEARNING_RATE * W2.grad.data

	W1.grad.data.zero_()
	W2.grad.data.zero_()
	if epo % 10 == 0:
	logger.info(f'Loss at epo {epo}: {loss_val / len(self.idx_pair)}')