edumunozsala/encode_input_target_CLTG.py

## encode_input_target_CLTG.py
def one_hot_encode(indices, dict_size):
    ''' Define one hot encode matrix for our sequences'''
    # Creating a multi-dimensional array with the desired output shape
    # Encode every integer with its one hot representation
    features = np.eye(dict_size, dtype=np.float32)[indices.flatten()]

    # Finally reshape it to get back to the original array
    features = features.reshape((*indices.shape, dict_size))

    return features

def encode_text(input_text, vocab, one_hot = False):
    # Replace every char by its integer value based on the vocabulary
    output = [vocab.char2int.get(character,0) for character in input_text]

    if one_hot:
    # One hot encode every integer of the sequence
        dict_size = len(vocab.char2int)
        return one_hot_encode(output, dict_size)
    else:
        return np.array(output)

# Encode the train dataset
train_data = encode_text(sentences, vocab, one_hot = False)

# Create the input sequence, from 0 to len-1
input_seq=train_data[:-1]
# Create the target sequence, from 1 to len. It is right-shifted one place
target_seq=train_data[1:]
	def one_hot_encode(indices, dict_size):
	''' Define one hot encode matrix for our sequences'''
	# Creating a multi-dimensional array with the desired output shape
	# Encode every integer with its one hot representation
	features = np.eye(dict_size, dtype=np.float32)[indices.flatten()]

	# Finally reshape it to get back to the original array
	features = features.reshape((*indices.shape, dict_size))

	return features

	def encode_text(input_text, vocab, one_hot = False):
	# Replace every char by its integer value based on the vocabulary
	output = [vocab.char2int.get(character,0) for character in input_text]

	if one_hot:
	# One hot encode every integer of the sequence
	dict_size = len(vocab.char2int)
	return one_hot_encode(output, dict_size)
	else:
	return np.array(output)

	# Encode the train dataset
	train_data = encode_text(sentences, vocab, one_hot = False)

	# Create the input sequence, from 0 to len-1
	input_seq=train_data[:-1]
	# Create the target sequence, from 1 to len. It is right-shifted one place
	target_seq=train_data[1:]