Skip to content

Instantly share code, notes, and snippets.

View MaximumEntropy's full-sized avatar

Sandeep Subramanian MaximumEntropy

  • Mistral AI
  • San Fransisco, Bay Area
View GitHub Profile
class DilatedConvSentenceEncoder(nn.Module):
"""A Sentence Encoder with Dilated Convs."""
def __init__(
self, input_dim=512, hidden_dim=4096, n_layers=7,
dropout=0.5, batch_first=True
):
"""Initialize params."""
super(DilatedConvSentenceEncoder, self).__init__()
self.input_dim = input_dim
class DataIterator(object):
"""Data Iterator."""
def _trim_vocab(self, vocab, vocab_size):
# Discard start, end, pad and unk tokens if already present
if '<s>' in vocab:
del vocab['<s>']
if '<pad>' in vocab:
del vocab['<pad>']
if '</s>' in vocab:
@MaximumEntropy
MaximumEntropy / peephole_gru.py
Created October 18, 2017 20:40
Peephole GRU
class PeepholeGRU(nn.Module):
"""A Gated Recurrent Unit (GRU) cell with peepholes."""
def __init__(
self, input_dim, hidden_dim, n_layers,
dropout=0., batch_first=True
):
"""Initialize params."""
super(PeepholeGRU, self).__init__()
self.input_dim = input_dim
@MaximumEntropy
MaximumEntropy / padded_rnn.py
Last active July 23, 2018 12:46
Padded RNN PyTorch
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence
x = Variable(torch.randn(10, 20, 30)).cuda()
lens = range(10)
x = pack_padded_sequence(x, lens[::-1], batch_first=True)
lstm = nn.LSTM(512, 512, 3).cuda()
x_val = Variable(torch.randn(200, 128, 512)).cuda()
y_val = Variable(torch.randn(200, 128, 512)).cuda()
h0_val = Variable(torch.randn(3, 128, 512)).cuda()
c0_val = Variable(torch.randn(3, 128, 512)).cuda()
for i in xrange(1000):
output, (_, _) = lstm(x_val, (h0_val, c0_val))
@MaximumEntropy
MaximumEntropy / rnn_minibatch_generator.py
Created January 17, 2017 22:29
Returns a minibatch for teacher forcing with a mask and lengths of each sentence in minibatch
def get_minibatch(lines, index, batch_size, word2ind, max_len, add_start=False, add_end=True):
"""Prepare minibatch."""
if add_start and add_end:
lines = [
['<s>'] + line + ['</s>']
for line in lines[index:index + batch_size]
]
elif add_start and not add_end:
lines = [
['<s>'] + line
@MaximumEntropy
MaximumEntropy / moses_tokenizer.py
Last active November 11, 2017 10:23
Simple python interface to the moses tokenizer
import subprocess
import sys
tokenizer_path = sys.argv[1] # Path to the moses tokenizer mosesdecoder/scripts/tokenizer.perl
text = sys.argv[2] # Text to be tokenized
lang = sys.argv[3] # Input language ex: en, fr, de
pipe = subprocess.Popen(["perl", tokenizer_path, '-l', lang, text], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
pipe.stdin.write(text.encode('utf-8'))
pipe.stdin.close()