GiliardGodoi/tmp.py

## tmp.py
def measure_certainty(df, bins=[0.75, 0.85, 0.95], diff_max=0.10):

  bins=[0.75, 0.85, 0.95] # bins deve corresponder com o array de categorias
  categories = np.array(['tema não identificado', 'dúvida', 'semelhante ao tema', 'certeza'])
  columns = [c for c in df.columns if c.startswith("Proba")]
  values = -(df[columns].values) # valores das probabilidades
  idxs = np.argsort(values, axis=1) # indexes que ordena os vetores (linhas) das probabilidades
  ordered = -(np.take_along_axis(values, idxs, axis=1)) # pode ser substituido simplesmente por np.sort
  binned = np.digitize(ordered[:, 0], bins)


  df['confiança_nivel'] = binned
  df['confiança'] = categories[binned]
  df['duvida_1'] = np.where(ordered[:, 0] < min(bins), (ordered[:, 0] + ordered[:, 1]) > max(bins), False)
  df['duvida_2'] = np.abs(ordered[:, 0] - ordered[:, 1]) < diff_max

  return df
	def measure_certainty(df, bins=[0.75, 0.85, 0.95], diff_max=0.10):

	bins=[0.75, 0.85, 0.95] # bins deve corresponder com o array de categorias
	categories = np.array(['tema não identificado', 'dúvida', 'semelhante ao tema', 'certeza'])
	columns = [c for c in df.columns if c.startswith("Proba")]
	values = -(df[columns].values) # valores das probabilidades
	idxs = np.argsort(values, axis=1) # indexes que ordena os vetores (linhas) das probabilidades
	ordered = -(np.take_along_axis(values, idxs, axis=1)) # pode ser substituido simplesmente por np.sort
	binned = np.digitize(ordered[:, 0], bins)


	df['confiança_nivel'] = binned
	df['confiança'] = categories[binned]
	df['duvida_1'] = np.where(ordered[:, 0] < min(bins), (ordered[:, 0] + ordered[:, 1]) > max(bins), False)
	df['duvida_2'] = np.abs(ordered[:, 0] - ordered[:, 1]) < diff_max

	return df