Skip to content

Instantly share code, notes, and snippets.

View ashunigion's full-sized avatar
💭
Woking on private projects

ASHUTOSH KUMAR ashunigion

💭
Woking on private projects
View GitHub Profile
@ashunigion
ashunigion / hyperparameters.py
Created June 13, 2019 02:53
The hyperparameters used to instantiate the RNN-sentiment model
# Instantiate the model w/ hyperparams
vocab_size = len(vocab_to_int) + 1
output_size = 1
embedding_dim = 200
hidden_dim = 256
n_layers = 2
@ashunigion
ashunigion / RNN-sentiment.py
Created June 13, 2019 02:50
RNN model architecture for sentiment-classification
import torch.nn as nn
class SentimentRNN(nn.Module):
"""
The RNN model that will be used to perform Sentiment analysis.
"""
def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):
"""
Initialize the model by setting up the layers.
@ashunigion
ashunigion / Dataloader.py
Created June 12, 2019 00:27
Creation of train, test,validation dataloader
import torch
from torch.utils.data import TensorDataset, DataLoader
# create Tensor datasets
train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
valid_data = TensorDataset(torch.from_numpy(valid_x), torch.from_numpy(valid_y))
test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))
# dataloaders
batch_size = 50
@ashunigion
ashunigion / Train_test_validation.py
Created June 12, 2019 00:00
Train, test, validation split
from sklearn.cross_validation import train_test_split
split_frac = 0.8
## split data into training, validation, and test data (features and labels, x and y)
train_x,test_x,train_y,test_y = train_test_split(features, encoded_labels, test_size = 0.2)
test_x,valid_x,test_y,valid_y = train_test_split(test_x,test_y, test_size = 0.5)
## print out the shapes of your resultant feature data
print((train_x.shape), (test_x.shape), (valid_x.shape))
@ashunigion
ashunigion / padding.py
Created June 11, 2019 23:45
Padding the smaller reviews and truncating the large reviews
def pad_features(reviews_ints, seq_length):
''' Return features of review_ints, where each review is padded with 0's
or truncated to the input seq_length.
'''
features = []
## implement function
for review in reviews_ints:
if len(review)<seq_length:
features.append(list(np.zeros(seq_length-len(review)))+review)
@ashunigion
ashunigion / Preprocess.py
Created June 11, 2019 15:20
Tokenizing the reviews for sentiment analysis
# feel free to use this import
from collections import Counter
temp = Counter(words)
temp = temp.most_common()
## Build a dictionary that maps words to integers
vocab_to_int = {}
i = 1
for pair in temp:
@ashunigion
ashunigion / NegativeSamplingLoss.py
Created June 11, 2019 03:48
custom loss function with Negative Sampling
class NegativeSamplingLoss(nn.Module):
def __init__(self):
super().__init__()
def forward(self, input_vectors, output_vectors, noise_vectors):
batch_size, embed_size = input_vectors.shape
# Input vectors should be a batch of column vectors
input_vectors = input_vectors.view(batch_size, embed_size, 1)
@ashunigion
ashunigion / SkipGramNeg.py
Created June 11, 2019 03:45
The skip gram model with negative sampling
class SkipGramNeg(nn.Module):
def __init__(self, n_vocab, n_embed, noise_dist=None):
super().__init__()
self.n_vocab = n_vocab
self.n_embed = n_embed
self.noise_dist = noise_dist
# define embedding layers for input and output words
self.in_embed = nn.Embedding(n_vocab, n_embed)
@ashunigion
ashunigion / Model_arch.py
Created June 10, 2019 16:23
Model architecture to train word-embedding
class SkipGram(nn.Module):
def __init__(self, n_vocab, n_embed):
super().__init__()
# complete this SkipGram model
self.embed = nn.Embedding(n_vocab, n_embed)
self.output = nn.Linear(n_embed, n_vocab)
self.log_softmax = nn.LogSoftmax(dim=1)
def forward(self, x):
@ashunigion
ashunigion / batching.py
Created June 10, 2019 01:27
creating the batch of words and and their corresponding context words.
def get_batches(words, batch_size, window_size=5):
''' Create a generator of word batches as a tuple (inputs, targets) '''
n_batches = len(words)//batch_size
# only full batches
words = words[:n_batches*batch_size]
for idx in range(0, len(words), batch_size):
x, y = [], []