bagustris/entropy_ensemble.py

## entropy_ensemble.py
import numpy as np
import pandas as pd
import torch
from sklearn.metrics import (
    accuracy_score,
    balanced_accuracy_score,
    f1_score,
    roc_auc_score,
)

df_1 = pd.read_csv("bagus_tests/results/exp_ravdess_praat_knn/store/pred_df.csv")
df_2 = pd.read_csv("bagus_tests/results/exp_ravdess_os_xgb/store/pred_df.csv")

labels = ["angry", "happy", "neutral", "sad"]

# mapping labels to integer/numeric
label_to_int = {label: i for i, label in enumerate(labels)}

prediction_proba_cs = df_1[labels].values
prediction_proba_sv = df_2[labels].values

# calculate softmax
prediction_proba_cs = torch.nn.functional.softmax(
    torch.tensor(prediction_proba_cs), dim=1
).numpy()
prediction_proba_sv = torch.nn.functional.softmax(
    torch.tensor(prediction_proba_sv), dim=1
).numpy()

prediction_cs = df_1["predicted"].values
prediction_sv = df_2["predicted"].values

truth = df_1["truth"].values
# mapping labels to integer/numeric
truth = [label_to_int[label] for label in truth]

threshold_entropy = list(np.arange(0.1, 3.0, 0.1))
best_accuracy = 0
best_f1 = 0
best_auc = 0
best_top3_acc = 0
best_te = 0
best_predictions = []
best_ensemble_confidence_scores = []
label = "category"  # "s/p" for binary

for te in threshold_entropy:
    ensemble_confidence_scores = []
    final_predictions_entropy = []
    for proba_cs, proba_sv, pred_cs, pred_sv in zip(
        prediction_proba_cs, prediction_proba_sv, prediction_cs, prediction_sv
    ):
        entropy_cs = np.sum(-proba_cs * np.log(proba_cs))
        entropy_sv = np.sum(-proba_sv * np.log(proba_sv))
        entropy = np.argmin(np.array([entropy_cs, entropy_sv]))
        if entropy_sv < te:
            final_predictions_entropy.append(pred_cs)
            ensemble_confidence_scores.append(proba_cs)
        else:
            final_predictions_entropy.append(pred_sv)
            ensemble_confidence_scores.append(proba_sv)

    final_predictions_entropy = [label_to_int[i] for i in final_predictions_entropy]
    accuracy = accuracy_score(truth, final_predictions_entropy)
    f1 = f1_score(truth, final_predictions_entropy, average="macro")
    if accuracy > best_accuracy or (accuracy == best_accuracy and f1 > best_f1):
        best_accuracy = accuracy
        best_f1 = f1
        if label == "s/p":
            best_auc = roc_auc_score(truth, final_predictions_entropy)
        else:
            top3 = 0
            for i, (pred, label) in enumerate(zip(final_predictions_entropy, truth)):
                top3 += int(label in np.argsort(df_1[labels].values[i])[-3:])
            best_top3_acc = top3 / len(truth)
        best_te = te
        best_predictions = final_predictions_entropy
        best_ensemble_confidence_scores = ensemble_confidence_scores

print("----------------")
print("Threshold ", best_te)
print("F1 Macro: ", round(best_f1, 4))
print("Accuracy: ", round(best_accuracy, 4))
if label == "s/p":
    print("AUC: ", round(best_auc, 4))
else:
    print("Top-3 Accuracy: ", round(best_top3_acc, 4))

# print best accuracies
uar = balanced_accuracy_score(truth, best_predictions)
print(f"UAR = {uar}")
	import numpy as np
	import pandas as pd
	import torch
	from sklearn.metrics import (
	accuracy_score,
	balanced_accuracy_score,
	f1_score,
	roc_auc_score,
	)

	df_1 = pd.read_csv("bagus_tests/results/exp_ravdess_praat_knn/store/pred_df.csv")
	df_2 = pd.read_csv("bagus_tests/results/exp_ravdess_os_xgb/store/pred_df.csv")

	labels = ["angry", "happy", "neutral", "sad"]

	# mapping labels to integer/numeric
	label_to_int = {label: i for i, label in enumerate(labels)}

	prediction_proba_cs = df_1[labels].values
	prediction_proba_sv = df_2[labels].values

	# calculate softmax
	prediction_proba_cs = torch.nn.functional.softmax(
	torch.tensor(prediction_proba_cs), dim=1
	).numpy()
	prediction_proba_sv = torch.nn.functional.softmax(
	torch.tensor(prediction_proba_sv), dim=1
	).numpy()

	prediction_cs = df_1["predicted"].values
	prediction_sv = df_2["predicted"].values

	truth = df_1["truth"].values
	# mapping labels to integer/numeric
	truth = [label_to_int[label] for label in truth]

	threshold_entropy = list(np.arange(0.1, 3.0, 0.1))
	best_accuracy = 0
	best_f1 = 0
	best_auc = 0
	best_top3_acc = 0
	best_te = 0
	best_predictions = []
	best_ensemble_confidence_scores = []
	label = "category" # "s/p" for binary

	for te in threshold_entropy:
	ensemble_confidence_scores = []
	final_predictions_entropy = []
	for proba_cs, proba_sv, pred_cs, pred_sv in zip(
	prediction_proba_cs, prediction_proba_sv, prediction_cs, prediction_sv
	):
	entropy_cs = np.sum(-proba_cs * np.log(proba_cs))
	entropy_sv = np.sum(-proba_sv * np.log(proba_sv))
	entropy = np.argmin(np.array([entropy_cs, entropy_sv]))
	if entropy_sv < te:
	final_predictions_entropy.append(pred_cs)
	ensemble_confidence_scores.append(proba_cs)
	else:
	final_predictions_entropy.append(pred_sv)
	ensemble_confidence_scores.append(proba_sv)

	final_predictions_entropy = [label_to_int[i] for i in final_predictions_entropy]
	accuracy = accuracy_score(truth, final_predictions_entropy)
	f1 = f1_score(truth, final_predictions_entropy, average="macro")
	if accuracy > best_accuracy or (accuracy == best_accuracy and f1 > best_f1):
	best_accuracy = accuracy
	best_f1 = f1
	if label == "s/p":
	best_auc = roc_auc_score(truth, final_predictions_entropy)
	else:
	top3 = 0
	for i, (pred, label) in enumerate(zip(final_predictions_entropy, truth)):
	top3 += int(label in np.argsort(df_1[labels].values[i])[-3:])
	best_top3_acc = top3 / len(truth)
	best_te = te
	best_predictions = final_predictions_entropy
	best_ensemble_confidence_scores = ensemble_confidence_scores

	print("----------------")
	print("Threshold ", best_te)
	print("F1 Macro: ", round(best_f1, 4))
	print("Accuracy: ", round(best_accuracy, 4))
	if label == "s/p":
	print("AUC: ", round(best_auc, 4))
	else:
	print("Top-3 Accuracy: ", round(best_top3_acc, 4))

	# print best accuracies
	uar = balanced_accuracy_score(truth, best_predictions)
	print(f"UAR = {uar}")