Skip to content

Instantly share code, notes, and snippets.

@JoshZastrow
Created May 28, 2018 01:27
Show Gist options
  • Save JoshZastrow/b724308bd9ca5eab83a577ae5b1a21cb to your computer and use it in GitHub Desktop.
Save JoshZastrow/b724308bd9ca5eab83a577ae5b1a21cb to your computer and use it in GitHub Desktop.
PyTorch implementation of a sentiment analysis classifier using Embeddings
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils import data
from keras import preprocessing
from keras.datasets import imdb
import numpy as np
# Get Data
(X_train, Y_train), (X_test, Y_test) = imdb.load_data(seed=7, num_words=10000)
# Pad to proper dimensions
x_train = preprocessing.sequence.pad_sequences(X_train, maxlen=100)
y_train = Y_train[..., np.newaxis]
# Convert to torch tensors
x_train = torch.from_numpy(x_train).long()
y_train = torch.from_numpy(y_train).float() # directly create a tensor on GPU
# Create a data loader
train = data.TensorDataset(x_train, y_train)
datagen = data.DataLoader(train, batch_size=32, shuffle=False)
# PyTorch model
class sentimentNet(nn.Module):
def __init__(self, batch_size, num_words, input_size, embedding_size):
super(sentimentNet, self).__init__()
self.embedd1 = nn.Embedding(num_words, embedding_size)
self.linear1 = nn.Linear(input_size * embedding_size, 1)
def forward(self, inputs):
embed1 = self.embedd1(inputs)
flatt1 = embed1.view(-1, self.num_flat_features(embed1))
layer1 = self.linear1(flatt1)
output = F.log_softmax(layer1, dim=1)
return output
def num_flat_features(self, x):
"""multiplies all features to flatten a layer"""
size = x.size()[1:] # all dimensions except the batch dimension
num_features = 1
for s in size:
num_features *= s
return num_features
# Instantiate a model
modelT = sentimentNet(batch_size=32, num_words=10000, input_size=100, embedding_size=8)
# Training time and metric recorder
num_epochs = 5
train_hist = collections.defaultdict(lambda:[])
# PyTorch optimizers
cost_func = nn.BCELoss()
optimizer = torch.optim.SGD(modelT.parameters(), lr=1e-3, momentum=0.9)
# Training routing
for epoch in range(num_epochs):
print("Epoch {} / {}".format(epoch, num_epochs))
for i, data_batch in enumerate(datagen, 0):
inputs, labels = data_batch
optimizer.zero_grad() # reset grads
pred = modelT(inputs)
loss = cost_func(pred, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 200 == 199: # print every 2000 mini-batches
train_hist['loss'] += [loss.item()]
print('sample {} --> loss: {}'.format(i + 1, loss.item()))
print('Finished Training')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment