lazuxd/deep_gru_lstm_fit.py

## deep_gru_lstm_fit.py
def fit(self,
        sentences: list,
        batch_size: int = 128,
        epochs: int = 10) -> None:

    n_sent = len(sentences)
    num_batches = ceil(n_sent / batch_size)

    for epoch in range(epochs):

        random.shuffle(sentences)
        start = 0
        batch_idx = 0

        while start < n_sent:

            print('Training model: %05.2f%%' %
                    (100*(epoch*num_batches+batch_idx+1)/(epochs*num_batches),),
                    end='\r')

            batch_idx += 1
            end = min(start+batch_size, n_sent)
            batch_sent = sentences[start:end]
            start = end
            batch_sent.sort(reverse=True, key=lambda s: len(s))

            init_num_words = len(batch_sent)
            self.reset_state(init_num_words)
            x = np.zeros((init_num_words, self.vocab_size))

            time_steps = len(batch_sent[0])+1

            with tf.GradientTape() as tape:

                losses = []
                for t in range(time_steps):
                    words = []
                    for i in range(init_num_words):
                        if t > len(batch_sent[i]):
                            break
                        if t == len(batch_sent[i]):
                            words.append(EOS)
                            break
                        words.append(batch_sent[i][t])

                    y = words2onehot(self.vocab, words)
                    n = y.shape[0]
                    loss = self(x[0:n], y)
                    losses.append(loss)
                    x = y

                loss_value = tf.math.reduce_mean(losses)

            grads = tape.gradient(loss_value, self.weights)
            self.optimizer.apply_gradients(zip(grads, self.weights))
	def fit(self,
	sentences: list,
	batch_size: int = 128,
	epochs: int = 10) -> None:

	n_sent = len(sentences)
	num_batches = ceil(n_sent / batch_size)

	for epoch in range(epochs):

	random.shuffle(sentences)
	start = 0
	batch_idx = 0

	while start < n_sent:

	print('Training model: %05.2f%%' %
	(100(epochnum_batches+batch_idx+1)/(epochs*num_batches),),
	end='\r')

	batch_idx += 1
	end = min(start+batch_size, n_sent)
	batch_sent = sentences[start:end]
	start = end
	batch_sent.sort(reverse=True, key=lambda s: len(s))

	init_num_words = len(batch_sent)
	self.reset_state(init_num_words)
	x = np.zeros((init_num_words, self.vocab_size))

	time_steps = len(batch_sent[0])+1

	with tf.GradientTape() as tape:

	losses = []
	for t in range(time_steps):
	words = []
	for i in range(init_num_words):
	if t > len(batch_sent[i]):
	break
	if t == len(batch_sent[i]):
	words.append(EOS)
	break
	words.append(batch_sent[i][t])

	y = words2onehot(self.vocab, words)
	n = y.shape[0]
	loss = self(x[0:n], y)
	losses.append(loss)
	x = y

	loss_value = tf.math.reduce_mean(losses)

	grads = tape.gradient(loss_value, self.weights)
	self.optimizer.apply_gradients(zip(grads, self.weights))