Skip to content

Instantly share code, notes, and snippets.

@sciencelee
Last active May 7, 2021 04:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sciencelee/2b2de0c26870548a298b1bf9cffe5767 to your computer and use it in GitHub Desktop.
Save sciencelee/2b2de0c26870548a298b1bf9cffe5767 to your computer and use it in GitHub Desktop.
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Input, Dense, LSTM, Embedding
from keras.layers import Dropout, Activation, Bidirectional, GlobalMaxPool1D
from keras.models import Sequential
from keras import initializers, regularizers, constraints, optimizers, layers
from keras.preprocessing import text, sequence
# set the emotion/sentiment as our target
target = df['emotion']
# use one hot encoding since our target is categorical
y = pd.get_dummies(target).values
# use keras to create a Tokenizer object
tokenizer = text.Tokenizer(num_words=20000) # limit to the num_words most important ones
tokenizer.fit_on_texts(list(df['tweet_text']))
tokenized_texts = tokenizer.texts_to_sequences(df['tweet_text'])
X = sequence.pad_sequences(tokenized_texts, maxlen=100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment