Last active
May 7, 2021 04:56
-
-
Save sciencelee/2b2de0c26870548a298b1bf9cffe5767 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.preprocessing.sequence import pad_sequences | |
from keras.layers import Input, Dense, LSTM, Embedding | |
from keras.layers import Dropout, Activation, Bidirectional, GlobalMaxPool1D | |
from keras.models import Sequential | |
from keras import initializers, regularizers, constraints, optimizers, layers | |
from keras.preprocessing import text, sequence | |
# set the emotion/sentiment as our target | |
target = df['emotion'] | |
# use one hot encoding since our target is categorical | |
y = pd.get_dummies(target).values | |
# use keras to create a Tokenizer object | |
tokenizer = text.Tokenizer(num_words=20000) # limit to the num_words most important ones | |
tokenizer.fit_on_texts(list(df['tweet_text'])) | |
tokenized_texts = tokenizer.texts_to_sequences(df['tweet_text']) | |
X = sequence.pad_sequences(tokenized_texts, maxlen=100) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment