Skip to content

Instantly share code, notes, and snippets.

@bagustris
Created June 26, 2024 09:55
Show Gist options
  • Save bagustris/913777d6506395a501c3f848f484f25f to your computer and use it in GitHub Desktop.
Save bagustris/913777d6506395a501c3f848f484f25f to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import torch
from sklearn.metrics import (
accuracy_score,
balanced_accuracy_score,
f1_score,
roc_auc_score,
)
df_1 = pd.read_csv("bagus_tests/results/exp_ravdess_praat_knn/store/pred_df.csv")
df_2 = pd.read_csv("bagus_tests/results/exp_ravdess_os_xgb/store/pred_df.csv")
labels = ["angry", "happy", "neutral", "sad"]
# mapping labels to integer/numeric
label_to_int = {label: i for i, label in enumerate(labels)}
prediction_proba_cs = df_1[labels].values
prediction_proba_sv = df_2[labels].values
# calculate softmax
prediction_proba_cs = torch.nn.functional.softmax(
torch.tensor(prediction_proba_cs), dim=1
).numpy()
prediction_proba_sv = torch.nn.functional.softmax(
torch.tensor(prediction_proba_sv), dim=1
).numpy()
prediction_cs = df_1["predicted"].values
prediction_sv = df_2["predicted"].values
truth = df_1["truth"].values
# mapping labels to integer/numeric
truth = [label_to_int[label] for label in truth]
threshold_entropy = list(np.arange(0.1, 3.0, 0.1))
best_accuracy = 0
best_f1 = 0
best_auc = 0
best_top3_acc = 0
best_te = 0
best_predictions = []
best_ensemble_confidence_scores = []
label = "category" # "s/p" for binary
for te in threshold_entropy:
ensemble_confidence_scores = []
final_predictions_entropy = []
for proba_cs, proba_sv, pred_cs, pred_sv in zip(
prediction_proba_cs, prediction_proba_sv, prediction_cs, prediction_sv
):
entropy_cs = np.sum(-proba_cs * np.log(proba_cs))
entropy_sv = np.sum(-proba_sv * np.log(proba_sv))
entropy = np.argmin(np.array([entropy_cs, entropy_sv]))
if entropy_sv < te:
final_predictions_entropy.append(pred_cs)
ensemble_confidence_scores.append(proba_cs)
else:
final_predictions_entropy.append(pred_sv)
ensemble_confidence_scores.append(proba_sv)
final_predictions_entropy = [label_to_int[i] for i in final_predictions_entropy]
accuracy = accuracy_score(truth, final_predictions_entropy)
f1 = f1_score(truth, final_predictions_entropy, average="macro")
if accuracy > best_accuracy or (accuracy == best_accuracy and f1 > best_f1):
best_accuracy = accuracy
best_f1 = f1
if label == "s/p":
best_auc = roc_auc_score(truth, final_predictions_entropy)
else:
top3 = 0
for i, (pred, label) in enumerate(zip(final_predictions_entropy, truth)):
top3 += int(label in np.argsort(df_1[labels].values[i])[-3:])
best_top3_acc = top3 / len(truth)
best_te = te
best_predictions = final_predictions_entropy
best_ensemble_confidence_scores = ensemble_confidence_scores
print("----------------")
print("Threshold ", best_te)
print("F1 Macro: ", round(best_f1, 4))
print("Accuracy: ", round(best_accuracy, 4))
if label == "s/p":
print("AUC: ", round(best_auc, 4))
else:
print("Top-3 Accuracy: ", round(best_top3_acc, 4))
# print best accuracies
uar = balanced_accuracy_score(truth, best_predictions)
print(f"UAR = {uar}")
@bagustris
Copy link
Author

Entropy-based ensemble evaluation

dataset: ravdess
ua/wa
praat_knn = 0.5/0.544
os_xgb = 0.601/0.642
moe_test = 0.5625/0.5982

Calculate using softmax
Threshold 0.1
F1 Macro: 0.6001
Accuracy: 0.6429
Top-3 Accuracy: 0.9196
UAR = 0.6015625

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment