This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model = models.Sequential([ | |
layers.Dense(16, activation = 'relu', input_shape = Xtrain[0].shape), | |
layers.Dense(8, activation = 'relu'), | |
layers.Dense(6, activation = 'softmax') | |
]) | |
cb = callbacks.EarlyStopping(patience = 5, restore_best_weights = True) | |
model.compile(optimizer = optimizers.Adam(0.001), loss = losses.CategoricalCrossentropy(), metrics = ['accuracy']) | |
history = model.fit(Xtrain, ytrain, validation_data = (Xval, yval), epochs = 256, callbacks = cb) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ss = StandardScaler() | |
Xtrain = ss.fit_transform(Xtrain) | |
Xval = ss.transform(Xval) | |
Xtest = ss.transform(Xtest) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Splitting into train, val and test set -- 80-10-10 split | |
# First, an 80-20 split | |
Xtrain, X_val_test, ytrain, y_val_test = train_test_split(df, y, test_size = 0.2) | |
# Then split the 20% into half | |
Xval, Xtest, yval, ytest = train_test_split(X_val_test, y_val_test, test_size = 0.5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# One hot encoding the output column | |
y = pd.get_dummies(df['Star type']) | |
# Dropping the encoded columns | |
df = df.drop(columns = ['Spectral Class', 'Star type', 'Star color']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# One hot encoding the input column | |
df_dummies = pd.get_dummies(df['Spectral Class'], drop_first = True, prefix = 'Spectral') | |
for column in df_dummies: | |
df[column] = df_dummies[column] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
colours = ['Blu', 'Whit', 'Yellow', 'Orang', 'Red'] # using root word of colours as the spelling can differ while specifying shades | |
df[colours] = 0 | |
for c in colours: | |
df.loc[df['Star color'].str.contains(c, case = False), c]=1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Build and train neural network | |
embedding_dim = 128 | |
model = models.Sequential([ | |
layers.Embedding(vocab, embedding_dim, input_length = mlen), | |
layers.LSTM(128, activation='tanh'), | |
layers.Dense(32, activation = 'relu'), | |
layers.Dense(16, activation = 'relu'), | |
layers.Dense(1, activation = 'sigmoid') | |
]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Tokenization | |
vocab = 1500 | |
mlen = 200 | |
tokenizer = Tokenizer(num_words = vocab, oov_token = '<UNK>') | |
tokenizer.fit_on_texts(Xtrain) | |
Xtrain = tokenizer.texts_to_sequences(Xtrain) | |
Xtrain = pad_sequences(Xtrain, maxlen=mlen) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Splitting into train and val set -- 80-20 split | |
Xtrain, Xval, ytrain, yval = train_test_split(X, y, test_size = 0.2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sno = nltk.stem.SnowballStemmer('english') # Initializing stemmer | |
wordcloud = [[], []] | |
all_sentences = [] # All cleaned sentences | |
for x in range(len(df['headline'].values)): | |
headline = df['headline'].values[x] | |
sarcasm = df['is_sarcastic'].values[x] | |
cleaned_sentence = [] |