Ayisha AyishaR

## star_model.py
model = models.Sequential([
    layers.Dense(16, activation = 'relu', input_shape = Xtrain[0].shape),
    layers.Dense(8, activation = 'relu'),
    layers.Dense(6, activation = 'softmax')
])

cb = callbacks.EarlyStopping(patience = 5, restore_best_weights = True)

model.compile(optimizer = optimizers.Adam(0.001), loss = losses.CategoricalCrossentropy(), metrics = ['accuracy'])
history = model.fit(Xtrain, ytrain, validation_data = (Xval, yval), epochs = 256, callbacks = cb)

## star_standardization.py
ss = StandardScaler()

Xtrain = ss.fit_transform(Xtrain)
Xval = ss.transform(Xval)
Xtest = ss.transform(Xtest)

## star_dataset_split.py
# Splitting into train, val and test set -- 80-10-10 split

# First, an 80-20 split
Xtrain, X_val_test, ytrain, y_val_test = train_test_split(df, y, test_size = 0.2)

# Then split the 20% into half
Xval, Xtest, yval, ytest = train_test_split(X_val_test, y_val_test, test_size = 0.5)

## star_target_column.py
# One hot encoding the output column
y = pd.get_dummies(df['Star type'])

# Dropping the encoded columns
df = df.drop(columns = ['Spectral Class', 'Star type', 'Star color'])

## star_spectralClass.py
# One hot encoding the input column
df_dummies = pd.get_dummies(df['Spectral Class'], drop_first = True, prefix = 'Spectral')
for column in df_dummies:
    df[column] = df_dummies[column]

## star_colour_column.py
colours = ['Blu', 'Whit', 'Yellow', 'Orang', 'Red']    # using root word of colours as the spelling can differ while specifying shades

df[colours] = 0

for c in colours:
    df.loc[df['Star color'].str.contains(c, case = False), c]=1

## sarcasm_model.py
# Build and train neural network
embedding_dim = 128

model = models.Sequential([
    layers.Embedding(vocab, embedding_dim, input_length = mlen),
    layers.LSTM(128, activation='tanh'),
    layers.Dense(32, activation = 'relu'),
    layers.Dense(16, activation = 'relu'),
    layers.Dense(1, activation = 'sigmoid')
])

## sarcasm_tokenization.py
# Tokenization
vocab = 1500
mlen = 200

tokenizer = Tokenizer(num_words = vocab, oov_token = '<UNK>')
tokenizer.fit_on_texts(Xtrain)

Xtrain = tokenizer.texts_to_sequences(Xtrain)
Xtrain = pad_sequences(Xtrain, maxlen=mlen)

## sarcasm_dataset_split.py
# Splitting into train and val set -- 80-20 split

Xtrain, Xval, ytrain, yval = train_test_split(X, y, test_size = 0.2)

## sarcasm preprocessing.py
sno = nltk.stem.SnowballStemmer('english')    # Initializing stemmer
wordcloud = [[], []]
all_sentences = []    # All cleaned sentences


for x in range(len(df['headline'].values)):
    headline = df['headline'].values[x]
    sarcasm = df['is_sarcastic'].values[x]

    cleaned_sentence = []
	model = models.Sequential([
	layers.Dense(16, activation = 'relu', input_shape = Xtrain[0].shape),
	layers.Dense(8, activation = 'relu'),
	layers.Dense(6, activation = 'softmax')
	])

	cb = callbacks.EarlyStopping(patience = 5, restore_best_weights = True)

	model.compile(optimizer = optimizers.Adam(0.001), loss = losses.CategoricalCrossentropy(), metrics = ['accuracy'])
	history = model.fit(Xtrain, ytrain, validation_data = (Xval, yval), epochs = 256, callbacks = cb)
	ss = StandardScaler()

	Xtrain = ss.fit_transform(Xtrain)
	Xval = ss.transform(Xval)
	Xtest = ss.transform(Xtest)
	# Splitting into train, val and test set -- 80-10-10 split

	# First, an 80-20 split
	Xtrain, X_val_test, ytrain, y_val_test = train_test_split(df, y, test_size = 0.2)

	# Then split the 20% into half
	Xval, Xtest, yval, ytest = train_test_split(X_val_test, y_val_test, test_size = 0.5)
	# One hot encoding the output column
	y = pd.get_dummies(df['Star type'])

	# Dropping the encoded columns
	df = df.drop(columns = ['Spectral Class', 'Star type', 'Star color'])
	# One hot encoding the input column
	df_dummies = pd.get_dummies(df['Spectral Class'], drop_first = True, prefix = 'Spectral')
	for column in df_dummies:
	df[column] = df_dummies[column]
	colours = ['Blu', 'Whit', 'Yellow', 'Orang', 'Red'] # using root word of colours as the spelling can differ while specifying shades

	df[colours] = 0

	for c in colours:
	df.loc[df['Star color'].str.contains(c, case = False), c]=1
	# Build and train neural network
	embedding_dim = 128

	model = models.Sequential([
	layers.Embedding(vocab, embedding_dim, input_length = mlen),
	layers.LSTM(128, activation='tanh'),
	layers.Dense(32, activation = 'relu'),
	layers.Dense(16, activation = 'relu'),
	layers.Dense(1, activation = 'sigmoid')
	])
	# Tokenization
	vocab = 1500
	mlen = 200

	tokenizer = Tokenizer(num_words = vocab, oov_token = '<UNK>')
	tokenizer.fit_on_texts(Xtrain)

	Xtrain = tokenizer.texts_to_sequences(Xtrain)
	Xtrain = pad_sequences(Xtrain, maxlen=mlen)
	# Splitting into train and val set -- 80-20 split

	Xtrain, Xval, ytrain, yval = train_test_split(X, y, test_size = 0.2)
	sno = nltk.stem.SnowballStemmer('english') # Initializing stemmer
	wordcloud = [[], []]
	all_sentences = [] # All cleaned sentences


	for x in range(len(df['headline'].values)):
	headline = df['headline'].values[x]
	sarcasm = df['is_sarcastic'].values[x]

	cleaned_sentence = []