bharatc9530/one_hot.py

## one_hot.py
t = Tokenizer()
t.fit_on_texts(docs)
vocab_size = len(t.word_index) + 1
# integer encode the documents
print(vocab_size)

X_train = [one_hot(d, vocab_size,filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~',lower=True, split=' ') for d in X_train]
X_test = [one_hot(d, vocab_size,filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~',lower=True, split=' ') for d in X_test]
	t = Tokenizer()
	t.fit_on_texts(docs)
	vocab_size = len(t.word_index) + 1
	# integer encode the documents
	print(vocab_size)

	X_train = [one_hot(d, vocab_size,filters='!"#$%&()*+,-./:;<=>?@[\]^_`{\|}~',lower=True, split=' ') for d in X_train]
	X_test = [one_hot(d, vocab_size,filters='!"#$%&()*+,-./:;<=>?@[\]^_`{\|}~',lower=True, split=' ') for d in X_test]