Skip to content

Instantly share code, notes, and snippets.

@Lexie88rus
Created August 30, 2019 20:49
Show Gist options
  • Save Lexie88rus/37dff389bbd8bfa3597ba6d77b3249f8 to your computer and use it in GitHub Desktop.
Save Lexie88rus/37dff389bbd8bfa3597ba6d77b3249f8 to your computer and use it in GitHub Desktop.
Encode words as tensors
import torch
# Translate word to an index from vocabulary
def wordToIndex(word):
if (word != end_of_sentence):
word = clean_title(word)
return vocab[word]
# Translate word to 1-hot tensor
def wordToTensor(word):
tensor = torch.zeros(1, 1, vocab_size)
tensor[0][0][wordToIndex(word)] = 1
return tensor
# Turn a title into a <title_length x 1 x vocab_size>,
# or an array of one-hot vectors
def titleToTensor(title):
words = extract_words(title)
tensor = torch.zeros(len(words) + 1, 1, vocab_size)
for index in range(len(words)):
tensor[index][0][wordToIndex(words[index])] = 1
tensor[len(words)][0][vocab[end_of_sentence]] = 1
return tensor
# Turn a sequence of words from title into tensor <sequence_length x 1 x vocab_size>
def sequenceToTensor(sequence):
tensor = torch.zeros(len(sequence), 1, vocab_size)
for index in range(len(sequence)):
tensor[index][0][wordToIndex(sequence[index])] = 1
return tensor
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment