Created
March 12, 2020 09:13
-
-
Save aravindpai/95f42e1e61d387e55b57c7d405e1a5c8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.preprocessing.text import Tokenizer | |
from keras.preprocessing.sequence import pad_sequences | |
#Tokenize the sentences | |
tokenizer = Tokenizer() | |
#preparing vocabulary | |
tokenizer.fit_on_texts(list(x_tr)) | |
#converting text into integer sequences | |
x_tr_seq = tokenizer.texts_to_sequences(x_tr) | |
x_val_seq = tokenizer.texts_to_sequences(x_val) | |
#padding to prepare sequences of same length | |
x_tr_seq = pad_sequences(x_tr_seq, maxlen=100) | |
x_val_seq = pad_sequences(x_val_seq, maxlen=100) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment