Skip to content

Instantly share code, notes, and snippets.

@WojciechKusa
Created September 7, 2023 13:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save WojciechKusa/e8614f262f8aa72735b6cafa0667ec6b to your computer and use it in GitHub Desktop.
Save WojciechKusa/e8614f262f8aa72735b6cafa0667ec6b to your computer and use it in GitHub Desktop.
Convert WSS score to TNR
from typing import Dict, List, Union
import numpy as np
ScoreType = Union[float, List[float], Dict[str, float]]
def convert_single_score(score: float, min_score: float, max_score: float) -> float:
"""
Convert a single score using min-max normalization.
Args:
score (float): Score to be converted. Score should not be larger than 1.
min_score (float): Minimum score value.
max_score (float): Maximum score value.
Returns:
float: Converted score.
"""
if score > 1:
raise ValueError(
f"Invalid score value {score}. Score values should not be larger than 1."
)
return (score - min_score) / (max_score - min_score)
def wss_to_tnr(
scores: ScoreType,
dataset_size: int,
num_relevant: int,
recall: float = 0.95,
) -> ScoreType:
"""
Convert WSS@95% scores to TNR@95% scores using min-max normalization.
Args:
scores (Union[float, List[Union[float, str]], Dict[str, float]]): WSS@95% scores to be converted.
WSS scores should be between 0 and 1.
dataset_size (int): Total size of the dataset.
num_relevant (int): Number of relevant documents in the dataset.
recall (float): Recall value used in the conversion.
Returns:
Union[float, List[float], Dict[str, float]]: Converted TNR@95% scores.
Formula:
TNR@95% = (WSS@95 - min(WSS@95)) / (max(WSS@95) - min(WSS@95)).
max(WSS@95) = (num_irrelevant + 0.05 * num_relevant) / dataset_size - 0.05
min(WSS@95) = (0.05 * num_relevant) / dataset_size - 0.05
"""
num_irrelevant = dataset_size - num_relevant
max_score = (num_irrelevant + (1 - recall) * num_relevant) / dataset_size - (
1 - recall
)
min_score = ((1 - recall) * num_relevant) / dataset_size - (1 - recall)
if isinstance(scores, float):
return convert_single_score(scores, min_score, max_score)
if isinstance(scores, list):
try:
return [
convert_single_score(score, min_score, max_score) for score in scores
]
except ValueError:
raise ValueError("The list contains invalid non-numeric data.")
if isinstance(scores, dict):
return {
model: convert_single_score(score, min_score, max_score)
for model, score in scores.items()
}
raise TypeError(
"scores should be either a float, a list of floats, or a dictionary with model names as keys and scores as values."
)
if __name__ == "__main__":
dataset_size = 2544
num_relevant = 41
wss = 0.566
expected_tnr = 0.625
tnr = wss_to_tnr(wss, dataset_size, num_relevant)
print(tnr)
assert np.isclose(tnr, expected_tnr, atol=0.001)
scores = [0.566, 0.523, 0.733, 0.801, 0.787, 0.783, 0.783]
expected_tnrs = [0.625, 0.582, 0.795, 0.864, 0.850, 0.846, 0.846]
tnrs = wss_to_tnr(scores, dataset_size, num_relevant)
print(tnrs)
assert np.isclose(tnrs, expected_tnrs, atol=0.001).all()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment