Skip to content

Instantly share code, notes, and snippets.

View douglaspsteen's full-sized avatar

Doug Steen douglaspsteen

View GitHub Profile
# Plot f1 scores and number of pseudo-labels added for all iterations
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(6,8))
ax1.plot(range(iterations), test_f1s)
ax1.set_ylabel('f1 Score')
ax2.bar(x=range(iterations), height=pseudo_labels)
ax2.set_ylabel('Pseudo-Labels Created')
ax2.set_xlabel('# Iterations');
# View confusion matrix after self-training
# Initiate iteration counter
iterations = 0
# Containers to hold f1_scores and # of pseudo-labels
train_f1s = []
test_f1s = []
pseudo_labels = []
# Assign value to initiate while loop
high_prob = [1]
# Generate probabilities for each prediction
clf.predict_proba(X_test)
# Logistic Regression Classifier
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
y_hat_test = clf.predict(X_test)
y_hat_train = clf.predict(X_train)
train_f1 = f1_score(y_train, y_hat_train)
test_f1 = f1_score(y_test, y_hat_test)
# Visualize class distribution
y_train.value_counts().plot(kind='bar')
plt.xticks([0,1], ['No Complication', 'Complication'])
plt.ylabel('Count');
# Shuffle the data
df = df.sample(frac=1, random_state=15).reset_index(drop=True)
# Generate indices for splits
test_ind = round(len(df)*0.25)
train_ind = test_ind + round(len(df)*0.01)
unlabeled_ind = train_ind + round(len(df)*0.74)
# Load data
df = pd.read_csv('surgical_deepnet.csv')
df.info()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.metrics import plot_confusion_matrix
# Create dictionary of results for 'leagues' key
leagues_dict = resp.json()['api']['leagues']
# Visualize df for all English Premier league seasons available
leagues_df = pd.DataFrame.from_dict(leagues_dict)
display(leagues_df)
# Check keys at next level of response
resp.json()['api'].keys()