Skip to content

Instantly share code, notes, and snippets.

@GiliardGodoi
Created May 5, 2023 16:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save GiliardGodoi/766be7333f9b9e46691ada7a15b504a4 to your computer and use it in GitHub Desktop.
Save GiliardGodoi/766be7333f9b9e46691ada7a15b504a4 to your computer and use it in GitHub Desktop.
def measure_certainty(df, bins=[0.75, 0.85, 0.95], diff_max=0.10):
bins=[0.75, 0.85, 0.95] # bins deve corresponder com o array de categorias
categories = np.array(['tema não identificado', 'dúvida', 'semelhante ao tema', 'certeza'])
columns = [c for c in df.columns if c.startswith("Proba")]
values = -(df[columns].values) # valores das probabilidades
idxs = np.argsort(values, axis=1) # indexes que ordena os vetores (linhas) das probabilidades
ordered = -(np.take_along_axis(values, idxs, axis=1)) # pode ser substituido simplesmente por np.sort
binned = np.digitize(ordered[:, 0], bins)
df['confiança_nivel'] = binned
df['confiança'] = categories[binned]
df['duvida_1'] = np.where(ordered[:, 0] < min(bins), (ordered[:, 0] + ordered[:, 1]) > max(bins), False)
df['duvida_2'] = np.abs(ordered[:, 0] - ordered[:, 1]) < diff_max
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment