Alina Zhang alinazhanguwo

## kerasTokenizer.py
max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)
X = pad_sequences(X)

## kerasLSTM.py
embed_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(3,activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())

## kerasLSTMTraining.py
# create train and test datasets
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.33, random_state = 42)
print("Trianing ", X_train.shape,Y_train.shape)
print("Testing ",X_test.shape,Y_test.shape)

batch_size = 32
model.fit(X_train, Y_train, epochs = 20, batch_size=batch_size, verbose = 2)

## basicVersionOfKMeans
# set up the parameters
n_init = 12
max_iter = 225
tol = 0.0001
random_state = 42
n_jobs = -1
n_clusters = 3

t0 = dt.now()
print("=========  Start training ... ")

## overallAccuracy.py
def overallAccuracy(clusterDF, labelsDF):
    countByCluster = pd.DataFrame(data=clusterDF['cluster'].value_counts())
    countByCluster.reset_index(inplace=True, drop=False)
    countByCluster.columns = ['cluster', 'clusterCount']
    # print('countByCluster \n', countByCluster)

    preds = pd.concat([labelsDF, clusterDF], axis=1)
    preds.columns = ['trueLabel', 'cluster']
    # print('preds \n', preds)


## overallAccuracy.py
def overallAccuracy(clusterDF, labelsDF):
    countByCluster = pd.DataFrame(data=clusterDF['cluster'].value_counts())
    countByCluster.reset_index(inplace=True, drop=False)
    countByCluster.columns = ['cluster', 'clusterCount']
    # print('countByCluster \n', countByCluster)

    preds = pd.concat([labelsDF, clusterDF], axis=1)
    preds.columns = ['trueLabel', 'cluster']
    # print('preds \n', preds)


## advancedKMeans.py
# set up the parameters
n_init = 12
max_iter = 225
tol = 0.0001
random_state = 42
n_jobs = -1

t0 = dt.now()
print("=========  Start training ... ")

## DeltaOfTwoDates.py
# top 10 states in America
'''['California',
 'Florida',
 'Georgia',
 'Illinois',
 'Massachusetts',
 'Michigan',
 'New Jersey',
 'New York',
 'Pennsylvania',

## NumberOfDeathsCovid.py
fig1, (ax1, ax2) = plt.subplots(1,2, figsize=(12,8), sharey=False)
fig1.tight_layout()
fig1.subplots_adjust(top=0.86)

sns.set_color_codes("pastel")
sns.barplot(x="Deaths", y="Province/State", data=bar_plot_df, label="num of Deaths", color="b", ax=ax1)

ax1.legend(ncol=2, loc="lower left", frameon=True)
ax1.set(xlim=(0, 33323), ylabel="", xlabel="Num of Deaths by states")
sns.despine(left=True, bottom=True,ax=ax1)

## christmas_wishes_promt_to_gpt-3.txt
Christmas Wishes for Colleagues

###
You have worked very hard this year. You really deserve to have a wonderful Christmas. Merry Christmas my colleague.

###
It has always been a pleasure working with you! I wish you a Merry Christmas!

###
It feels great working with someone as knowledgeable as you and yet so humble. I wish you a Merry Christmas!
	max_fatures = 2000
	tokenizer = Tokenizer(num_words=max_fatures, split=' ')
	tokenizer.fit_on_texts(X)
	X = tokenizer.texts_to_sequences(X)
	X = pad_sequences(X)
	embed_dim = 128
	lstm_out = 196

	model = Sequential()
	model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
	model.add(SpatialDropout1D(0.4))
	model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
	model.add(Dense(3,activation='softmax'))
	model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
	print(model.summary())
	# create train and test datasets
	X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.33, random_state = 42)
	print("Trianing ", X_train.shape,Y_train.shape)
	print("Testing ",X_test.shape,Y_test.shape)

	batch_size = 32
	model.fit(X_train, Y_train, epochs = 20, batch_size=batch_size, verbose = 2)
	# set up the parameters
	n_init = 12
	max_iter = 225
	tol = 0.0001
	random_state = 42
	n_jobs = -1
	n_clusters = 3

	t0 = dt.now()
	print("========= Start training ... ")
	def overallAccuracy(clusterDF, labelsDF):
	countByCluster = pd.DataFrame(data=clusterDF['cluster'].value_counts())
	countByCluster.reset_index(inplace=True, drop=False)
	countByCluster.columns = ['cluster', 'clusterCount']
	# print('countByCluster \n', countByCluster)

	preds = pd.concat([labelsDF, clusterDF], axis=1)
	preds.columns = ['trueLabel', 'cluster']
	# print('preds \n', preds)
	# top 10 states in America
	'''['California',
	'Florida',
	'Georgia',
	'Illinois',
	'Massachusetts',
	'Michigan',
	'New Jersey',
	'New York',
	'Pennsylvania',
	fig1, (ax1, ax2) = plt.subplots(1,2, figsize=(12,8), sharey=False)
	fig1.tight_layout()
	fig1.subplots_adjust(top=0.86)

	sns.set_color_codes("pastel")
	sns.barplot(x="Deaths", y="Province/State", data=bar_plot_df, label="num of Deaths", color="b", ax=ax1)

	ax1.legend(ncol=2, loc="lower left", frameon=True)
	ax1.set(xlim=(0, 33323), ylabel="", xlabel="Num of Deaths by states")
	sns.despine(left=True, bottom=True,ax=ax1)
	Christmas Wishes for Colleagues

	###
	You have worked very hard this year. You really deserve to have a wonderful Christmas. Merry Christmas my colleague.

	###
	It has always been a pleasure working with you! I wish you a Merry Christmas!

	###
	It feels great working with someone as knowledgeable as you and yet so humble. I wish you a Merry Christmas!