Skip to content

Instantly share code, notes, and snippets.

@lazuxd
Created July 19, 2021 18:28
Show Gist options
  • Save lazuxd/c74a8264f78708d66f2a6d323320ea88 to your computer and use it in GitHub Desktop.
Save lazuxd/c74a8264f78708d66f2a6d323320ea88 to your computer and use it in GitHub Desktop.
EOS = chr(10) # End of sentence
def build_vocabulary() -> list:
# builds a vocabulary using ASCII characters
vocabulary = [chr(i) for i in range(10, 128)]
return vocabulary
def word2index(vocabulary: list, word: str) -> int:
# returns the index of 'word' in the vocabulary
return vocabulary.index(word)
def words2onehot(vocabulary: list, words: list) -> np.ndarray:
# transforms the list of words given as argument into
# a one-hot matrix representation using the index in the vocabulary
n_words = len(words)
n_voc = len(vocabulary)
indices = np.array([word2index(vocabulary, word) for word in words])
a = np.zeros((n_words, n_voc))
a[np.arange(n_words), indices] = 1
return a
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment