ASHUTOSH KUMAR ashunigion

## hyperparameters.py
# Instantiate the model w/ hyperparams
vocab_size = len(vocab_to_int) + 1
output_size = 1
embedding_dim = 200
hidden_dim = 256
n_layers = 2

## RNN-sentiment.py
import torch.nn as nn

class SentimentRNN(nn.Module):
    """
    The RNN model that will be used to perform Sentiment analysis.
    """

    def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):
        """
        Initialize the model by setting up the layers.

## Dataloader.py
import torch
from torch.utils.data import TensorDataset, DataLoader

# create Tensor datasets
train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
valid_data = TensorDataset(torch.from_numpy(valid_x), torch.from_numpy(valid_y))
test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))

# dataloaders
batch_size = 50

## Train_test_validation.py
from sklearn.cross_validation import train_test_split

split_frac = 0.8

## split data into training, validation, and test data (features and labels, x and y)
train_x,test_x,train_y,test_y = train_test_split(features, encoded_labels, test_size = 0.2)
test_x,valid_x,test_y,valid_y = train_test_split(test_x,test_y, test_size = 0.5)

## print out the shapes of your resultant feature data
print((train_x.shape), (test_x.shape), (valid_x.shape))

## padding.py
def pad_features(reviews_ints, seq_length):
    ''' Return features of review_ints, where each review is padded with 0's
        or truncated to the input seq_length.
    '''
    features = []

    ## implement function
    for review in reviews_ints:
        if len(review)<seq_length:
            features.append(list(np.zeros(seq_length-len(review)))+review)

## Preprocess.py
# feel free to use this import
from collections import Counter

temp = Counter(words)
temp = temp.most_common()

## Build a dictionary that maps words to integers
vocab_to_int = {}
i = 1
for pair in temp:

## NegativeSamplingLoss.py
class NegativeSamplingLoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input_vectors, output_vectors, noise_vectors):

        batch_size, embed_size = input_vectors.shape

        # Input vectors should be a batch of column vectors
        input_vectors = input_vectors.view(batch_size, embed_size, 1)

## SkipGramNeg.py
class SkipGramNeg(nn.Module):
    def __init__(self, n_vocab, n_embed, noise_dist=None):
        super().__init__()

        self.n_vocab = n_vocab
        self.n_embed = n_embed
        self.noise_dist = noise_dist

        # define embedding layers for input and output words
        self.in_embed = nn.Embedding(n_vocab, n_embed)

## Model_arch.py
class SkipGram(nn.Module):
    def __init__(self, n_vocab, n_embed):
        super().__init__()

        # complete this SkipGram model
        self.embed = nn.Embedding(n_vocab, n_embed)
        self.output = nn.Linear(n_embed, n_vocab)
        self.log_softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):

## batching.py
def get_batches(words, batch_size, window_size=5):
    ''' Create a generator of word batches as a tuple (inputs, targets) '''

    n_batches = len(words)//batch_size

    # only full batches
    words = words[:n_batches*batch_size]

    for idx in range(0, len(words), batch_size):
        x, y = [], []
	# Instantiate the model w/ hyperparams
	vocab_size = len(vocab_to_int) + 1
	output_size = 1
	embedding_dim = 200
	hidden_dim = 256
	n_layers = 2
	import torch.nn as nn

	class SentimentRNN(nn.Module):
	"""
	The RNN model that will be used to perform Sentiment analysis.
	"""

	def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):
	"""
	Initialize the model by setting up the layers.
	import torch
	from torch.utils.data import TensorDataset, DataLoader

	# create Tensor datasets
	train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
	valid_data = TensorDataset(torch.from_numpy(valid_x), torch.from_numpy(valid_y))
	test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))

	# dataloaders
	batch_size = 50
	from sklearn.cross_validation import train_test_split

	split_frac = 0.8

	## split data into training, validation, and test data (features and labels, x and y)
	train_x,test_x,train_y,test_y = train_test_split(features, encoded_labels, test_size = 0.2)
	test_x,valid_x,test_y,valid_y = train_test_split(test_x,test_y, test_size = 0.5)

	## print out the shapes of your resultant feature data
	print((train_x.shape), (test_x.shape), (valid_x.shape))
	def pad_features(reviews_ints, seq_length):
	''' Return features of review_ints, where each review is padded with 0's
	or truncated to the input seq_length.
	'''
	features = []

	## implement function
	for review in reviews_ints:
	if len(review)<seq_length:
	features.append(list(np.zeros(seq_length-len(review)))+review)
	# feel free to use this import
	from collections import Counter

	temp = Counter(words)
	temp = temp.most_common()

	## Build a dictionary that maps words to integers
	vocab_to_int = {}
	i = 1
	for pair in temp:
	class NegativeSamplingLoss(nn.Module):
	def __init__(self):
	super().__init__()

	def forward(self, input_vectors, output_vectors, noise_vectors):

	batch_size, embed_size = input_vectors.shape

	# Input vectors should be a batch of column vectors
	input_vectors = input_vectors.view(batch_size, embed_size, 1)
	class SkipGramNeg(nn.Module):
	def __init__(self, n_vocab, n_embed, noise_dist=None):
	super().__init__()

	self.n_vocab = n_vocab
	self.n_embed = n_embed
	self.noise_dist = noise_dist

	# define embedding layers for input and output words
	self.in_embed = nn.Embedding(n_vocab, n_embed)
	class SkipGram(nn.Module):
	def __init__(self, n_vocab, n_embed):
	super().__init__()

	# complete this SkipGram model
	self.embed = nn.Embedding(n_vocab, n_embed)
	self.output = nn.Linear(n_embed, n_vocab)
	self.log_softmax = nn.LogSoftmax(dim=1)

	def forward(self, x):
	def get_batches(words, batch_size, window_size=5):
	''' Create a generator of word batches as a tuple (inputs, targets) '''

	n_batches = len(words)//batch_size

	# only full batches
	words = words[:n_batches*batch_size]

	for idx in range(0, len(words), batch_size):
	x, y = [], []