Skip to content

Instantly share code, notes, and snippets.

model = models.Sequential([
layers.Dense(16, activation = 'relu', input_shape = Xtrain[0].shape),
layers.Dense(8, activation = 'relu'),
layers.Dense(6, activation = 'softmax')
])
cb = callbacks.EarlyStopping(patience = 5, restore_best_weights = True)
model.compile(optimizer = optimizers.Adam(0.001), loss = losses.CategoricalCrossentropy(), metrics = ['accuracy'])
history = model.fit(Xtrain, ytrain, validation_data = (Xval, yval), epochs = 256, callbacks = cb)
ss = StandardScaler()
Xtrain = ss.fit_transform(Xtrain)
Xval = ss.transform(Xval)
Xtest = ss.transform(Xtest)
# Splitting into train, val and test set -- 80-10-10 split
# First, an 80-20 split
Xtrain, X_val_test, ytrain, y_val_test = train_test_split(df, y, test_size = 0.2)
# Then split the 20% into half
Xval, Xtest, yval, ytest = train_test_split(X_val_test, y_val_test, test_size = 0.5)
# One hot encoding the output column
y = pd.get_dummies(df['Star type'])
# Dropping the encoded columns
df = df.drop(columns = ['Spectral Class', 'Star type', 'Star color'])
# One hot encoding the input column
df_dummies = pd.get_dummies(df['Spectral Class'], drop_first = True, prefix = 'Spectral')
for column in df_dummies:
df[column] = df_dummies[column]
colours = ['Blu', 'Whit', 'Yellow', 'Orang', 'Red'] # using root word of colours as the spelling can differ while specifying shades
df[colours] = 0
for c in colours:
df.loc[df['Star color'].str.contains(c, case = False), c]=1
# Build and train neural network
embedding_dim = 128
model = models.Sequential([
layers.Embedding(vocab, embedding_dim, input_length = mlen),
layers.LSTM(128, activation='tanh'),
layers.Dense(32, activation = 'relu'),
layers.Dense(16, activation = 'relu'),
layers.Dense(1, activation = 'sigmoid')
])
# Tokenization
vocab = 1500
mlen = 200
tokenizer = Tokenizer(num_words = vocab, oov_token = '<UNK>')
tokenizer.fit_on_texts(Xtrain)
Xtrain = tokenizer.texts_to_sequences(Xtrain)
Xtrain = pad_sequences(Xtrain, maxlen=mlen)
# Splitting into train and val set -- 80-20 split
Xtrain, Xval, ytrain, yval = train_test_split(X, y, test_size = 0.2)
sno = nltk.stem.SnowballStemmer('english') # Initializing stemmer
wordcloud = [[], []]
all_sentences = [] # All cleaned sentences
for x in range(len(df['headline'].values)):
headline = df['headline'].values[x]
sarcasm = df['is_sarcastic'].values[x]
cleaned_sentence = []