Skip to content

Instantly share code, notes, and snippets.

@amankharwal
Created August 21, 2020 05:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amankharwal/e94776a6f312cc1e71cf2628083360d4 to your computer and use it in GitHub Desktop.
Save amankharwal/e94776a6f312cc1e71cf2628083360d4 to your computer and use it in GitHub Desktop.
from keras.preprocessing.text import Tokenizer
embeddings, dim = getEmbeddings('emosense.300d.txt')
tokenizer = Tokenizer(filters='')
tokenizer.fit_on_texts([' '.join(list(embeddings.keys()))])
wordIndex = tokenizer.word_index
embeddings_matrix = getEmbeddingMatrix(wordIndex, embeddings, dim)
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
MAX_SEQUENCE_LENGTH = 24
X_train, X_val, y_train, y_val = train_test_split(texts_train, labels_train, test_size=0.2, random_state=42)
labels_categorical_train = to_categorical(np.asarray(y_train))
labels_categorical_val = to_categorical(np.asarray(y_val))
labels_categorical_dev = to_categorical(np.asarray(labels_dev))
labels_categorical_test = to_categorical(np.asarray(labels_test))
def get_sequances(texts, sequence_length):
message_first = pad_sequences(tokenizer.texts_to_sequences(texts[:, 0]), sequence_length)
message_second = pad_sequences(tokenizer.texts_to_sequences(texts[:, 1]), sequence_length)
message_third = pad_sequences(tokenizer.texts_to_sequences(texts[:, 2]), sequence_length)
return message_first, message_second, message_third
message_first_message_train, message_second_message_train, message_third_message_train = get_sequances(X_train, MAX_SEQUENCE_LENGTH)
message_first_message_val, message_second_message_val, message_third_message_val = get_sequances(X_val, MAX_SEQUENCE_LENGTH)
message_first_message_dev, message_second_message_dev, message_third_message_dev = get_sequances(texts_dev, MAX_SEQUENCE_LENGTH)
message_first_message_test, message_second_message_test, message_third_message_test = get_sequances(texts_test, MAX_SEQUENCE_LENGTH)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment