Skip to content

Instantly share code, notes, and snippets.

@mgbckr
Created August 18, 2018 07:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mgbckr/54782462d4f3002cd3f41914f1062b10 to your computer and use it in GitHub Desktop.
Save mgbckr/54782462d4f3002cd3f41914f1062b10 to your computer and use it in GitHub Desktop.
import numpy as np
from sklearn import svm
import sklearn as sk
def exp(majority_vote=False, n=1000):
# data
y = np.array([1,2] * 14)
x = np.random.rand(len(y), 100)
# prediction results
results = np.zeros((len(y), n))
results[:] = np.nan
# AUC values of each iteration
aucs = np.zeros(n)
aucs[:] = np.nan
# precision values of each iteration
precs = np.zeros(n)
precs[:] = np.nan
# accuracy values of each iteration
accs = np.zeros(n)
accs[:] = np.nan
bias_in_train = 0
majority = 0
for i in range(0, n):
#print("Iter ", i)
# select train and test set
perm = np.random.permutation(range(0, len(y)))
train = perm[0:int(len(y)/2)]
test = perm[int(len(y)/2):]
#print("Train indices: ", train)
#print("Test indices: ", test)
x_train = x[train,]
y_train = y[train]
x_test = x[test,]
y_test = y[test]
if majority_vote:
# train and predict (majority)
counts = np.unique(y_train, return_counts=True)
pred = counts[0][np.argmax(counts[1])]
prediction = np.repeat(pred, len(y_test))
else:
# train and predict (SVM)
clf = svm.SVC()
clf.fit(x_train, y_train)
prediction = clf.predict(x_test)
# check majority vote
y_u = np.unique(y_train, return_counts=True)
p_u = np.unique(prediction, return_counts=True)
#print(y_u, p_u)
if y_u[1][0] != y_u[1][1]:
bias_in_train += 1
idx = np.argmax(y_u[1])
val = y_u[0][idx]
if len(p_u[0]) is 1 and p_u[0][0] == val:
#print("True")
majority += 1
#else:
#print("False")
#print("Train: ", y_train)
#print(np.unique(y_train, return_counts=True))
#print("Test: ", y_test)
#print("Prediction: ", prediction)
# store results
results[test, i] = prediction
# calculate AUC
aucs[i] = sk.metrics.roc_auc_score(y_test == 2, prediction)
precs[i] = sk.metrics.precision_score(y_test == 2, prediction == 2)
accs[i] = sk.metrics.accuracy_score(y_test == 2, prediction == 2)
#print(aucs[i], y_test == 2, prediction)
classification_mean = np.nanmean(results, axis=1)
auc = sk.metrics.roc_auc_score(y == 2, np.round(means))
auc_mean = aucs.mean()
prec = sk.metrics.precision_score(y == 2, np.round(means) == 2)
prec_mean = precs.mean()
acc = sk.metrics.accuracy_score(y == 2, np.round(means) == 2)
acc_mean = accs.mean()
# calculate precision using the result matrix
tp = (results == 2) & (np.transpose(np.repeat(np.array([y]), n, axis=0)) == 2)
fp = (results == 2) & (np.transpose(np.repeat(np.array([y]), n, axis=0)) == 1)
prec_correct = np.nan_to_num((tp.sum(axis=0) / (tp.sum(axis=0) + fp.sum(axis=0)))).mean()
prec_overall = (tp.sum() / (tp.sum() + fp.sum()))
# calculate accuracy using the result matrix
correct = np.equal(results, np.transpose(np.repeat(np.array([y]), n, axis=0))).sum(axis=0)
overall = np.sum(np.nan_to_num(results) > 0, axis=0)
acc_correct = (correct / overall).mean()
acc_overall = (correct.sum() / overall.sum())
print("---")
print("AUC: {:.4f} (meanRows) / {:.4f} (independent)".format(auc, auc_mean))
print("Prec: {:.4f} (meanRows) / {:.4f} (independent) / {:.4f} (from matrix, independent) / {:.4f} (from matrix, overall)".format(prec, prec_mean, prec_correct, prec_overall))
print("Acc: {:.4f} (meanRows) / {:.4f} (independent) / {:.4f} (from matrix, independent) / {:.4f} (from matrix, overall)".format(acc, acc_mean, acc_correct, acc_overall))
print("Runs: {}, Class bias in train: {}, Majority vote: {}".format(n, bias_in_train, majority))
return auc, auc_mean
exp(majority_vote=False, n=100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment