Doug Steen douglaspsteen

## ssl_plots.py
# Plot f1 scores and number of pseudo-labels added for all iterations

fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(6,8))
ax1.plot(range(iterations), test_f1s)
ax1.set_ylabel('f1 Score')
ax2.bar(x=range(iterations), height=pseudo_labels)
ax2.set_ylabel('Pseudo-Labels Created')
ax2.set_xlabel('# Iterations');

# View confusion matrix after self-training

## ssl_self_train.py
# Initiate iteration counter
iterations = 0

# Containers to hold f1_scores and # of pseudo-labels
train_f1s = []
test_f1s = []
pseudo_labels = []

# Assign value to initiate while loop
high_prob = [1]

## ssl_prob.py
# Generate probabilities for each prediction

clf.predict_proba(X_test)

## ssl_lr_clf.py
# Logistic Regression Classifier

clf = LogisticRegression(max_iter=1000)

clf.fit(X_train, y_train)
y_hat_test = clf.predict(X_test)
y_hat_train = clf.predict(X_train)

train_f1 = f1_score(y_train, y_hat_train)
test_f1 = f1_score(y_test, y_hat_test)

## ssl_class_dist.py
# Visualize class distribution

y_train.value_counts().plot(kind='bar')
plt.xticks([0,1], ['No Complication', 'Complication'])
plt.ylabel('Count');

## ssl_scrub.py
# Shuffle the data

df = df.sample(frac=1, random_state=15).reset_index(drop=True)


# Generate indices for splits

test_ind = round(len(df)*0.25)
train_ind = test_ind + round(len(df)*0.01)
unlabeled_ind = train_ind + round(len(df)*0.74)

## ssl_load.py
# Load data

df = pd.read_csv('surgical_deepnet.csv')
df.info()

## ssl_imports.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import f1_score
from sklearn.metrics import plot_confusion_matrix

## final_df.py
# Create dictionary of results for 'leagues' key
leagues_dict = resp.json()['api']['leagues']

# Visualize df for all English Premier league seasons available
leagues_df = pd.DataFrame.from_dict(leagues_dict)
display(leagues_df)

## check_keys2.py
# Check keys at next level of response
resp.json()['api'].keys()
	# Plot f1 scores and number of pseudo-labels added for all iterations

	fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(6,8))
	ax1.plot(range(iterations), test_f1s)
	ax1.set_ylabel('f1 Score')
	ax2.bar(x=range(iterations), height=pseudo_labels)
	ax2.set_ylabel('Pseudo-Labels Created')
	ax2.set_xlabel('# Iterations');

	# View confusion matrix after self-training
	# Initiate iteration counter
	iterations = 0

	# Containers to hold f1_scores and # of pseudo-labels
	train_f1s = []
	test_f1s = []
	pseudo_labels = []

	# Assign value to initiate while loop
	high_prob = [1]
	# Generate probabilities for each prediction

	clf.predict_proba(X_test)
	# Logistic Regression Classifier

	clf = LogisticRegression(max_iter=1000)

	clf.fit(X_train, y_train)
	y_hat_test = clf.predict(X_test)
	y_hat_train = clf.predict(X_train)

	train_f1 = f1_score(y_train, y_hat_train)
	test_f1 = f1_score(y_test, y_hat_test)
	# Visualize class distribution

	y_train.value_counts().plot(kind='bar')
	plt.xticks([0,1], ['No Complication', 'Complication'])
	plt.ylabel('Count');
	# Shuffle the data

	df = df.sample(frac=1, random_state=15).reset_index(drop=True)


	# Generate indices for splits

	test_ind = round(len(df)*0.25)
	train_ind = test_ind + round(len(df)*0.01)
	unlabeled_ind = train_ind + round(len(df)*0.74)
	# Load data

	df = pd.read_csv('surgical_deepnet.csv')
	df.info()
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt

	from sklearn.linear_model import LogisticRegression

	from sklearn.metrics import f1_score
	from sklearn.metrics import plot_confusion_matrix
	# Create dictionary of results for 'leagues' key
	leagues_dict = resp.json()['api']['leagues']

	# Visualize df for all English Premier league seasons available
	leagues_df = pd.DataFrame.from_dict(leagues_dict)
	display(leagues_df)
	# Check keys at next level of response
	resp.json()['api'].keys()