This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def __call__(self, | |
a: Union[np.ndarray, tf.Tensor], | |
x: Union[np.ndarray, tf.Tensor], | |
y: Union[np.ndarray, tf.Tensor, None] = None) -> tuple: | |
a_new = tf.math.tanh(tf.linalg.matmul(tf.concat([a, x], axis=1), self.wa)+self.ba) | |
y_logits = tf.linalg.matmul(a_new, self.wy)+self.by | |
if y is None: | |
# during prediction return softmax probabilities | |
return (a_new, tf.nn.softmax(y_logits)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Model: | |
def __init__(self, vocabulary: list = [], a_size: int = 0): | |
self.vocab = vocabulary | |
self.vocab_size = len(vocabulary) | |
self.a_size = a_size | |
self.combined_size = self.vocab_size + self.a_size | |
# weights and bias used to compute the new a | |
# (a = vector that is passes to the next time step) | |
self.wa = tf.Variable(tf.random.normal( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sample_word(vocabulary: list, prob: np.ndarray) -> str: | |
# sample a word from the vocabulary according to 'prob' | |
# probability distribution (the softmax output of our model) | |
# until it is != <UNK> | |
while True: | |
word = np.random.choice(vocabulary, p=prob) | |
if word != UNK: | |
return word |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def word2index(vocabulary: list, word: str) -> int: | |
# returns the index of 'word' in the vocabulary | |
return vocabulary.index(word) | |
def words2onehot(vocabulary: list, words: list) -> np.ndarray: | |
# transforms the list of words given as argument into | |
# a one-hot matrix representation using the index in the vocabulary | |
n_words = len(words) | |
n_voc = len(vocabulary) | |
indices = np.array([word2index(vocabulary, word) for word in words]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def build_sentences(vocabulary: list, sentences: list) -> list: | |
# transforms the list of sentences into a list of lists of words | |
# replacing words that are not in the vocabulary with <UNK> | |
# and appending <EOS> at the end of each sentence | |
processed_sent = [] | |
n = len(sentences) | |
for i, sent in enumerate(sentences): | |
print('Creating sentences list: %05.2f%%' % (100*(i+1)/n,), end='\r') | |
s = [] | |
for word in sent.strip().split(): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
UNK = '<UNK>' # Unknown word | |
EOS = '<EOS>' # End of sentence | |
def build_vocabulary(sentences: list, words_to_keep: int) -> list: | |
# builds a vocabulary using 'words_to_keep' most frequent words | |
# encountered in the list of sentences | |
vocabulary = {} | |
n = len(sentences) | |
for i, s in enumerate(sentences): | |
print('Creating vocabulary: %05.2f%%' % (100*(i+1)/n,), end='\r') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Execute this with "pypy script_name.py" | |
from timeit import timeit | |
n_iter = 100000 | |
py_time = timeit( | |
stmt='py_fib(20)', | |
setup='from fib import fib as py_fib', | |
number=n_iter) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from timeit import timeit | |
n_iter = 100000 | |
py_time = timeit( | |
stmt='py_fib(20)', | |
setup='from fib import fib as py_fib', | |
number=n_iter) | |
c_module_time = timeit( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import platform | |
from fib import fib as py_fib | |
from my_c_module import fib as c_module_fib | |
from ctypes import * | |
def print_c_array(ptr, n): | |
print('[', end='') | |
for i in range(n): | |
if i == n-1: | |
print(ptr[i], end=']\n') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fib(n): | |
fib_lst = [] | |
if n >= 1: | |
fib_lst.append(0) | |
if n >= 2: | |
fib_lst.append(1) | |
for i in range(2, n): | |
fib_lst.append(fib_lst[-2]+fib_lst[-1]) | |