Skip to content

Instantly share code, notes, and snippets.

@jaradc
Last active April 3, 2024 18:16
Show Gist options
  • Star 33 You must be signed in to star a gist
  • Fork 10 You must be signed in to fork a gist
  • Save jaradc/eeddf20932c0347928d0da5a09298147 to your computer and use it in GitHub Desktop.
Save jaradc/eeddf20932c0347928d0da5a09298147 to your computer and use it in GitHub Desktop.
Four different ways to calculate entropy in Python
import numpy as np
from scipy.stats import entropy
from math import log, e
import pandas as pd
import timeit
def entropy1(labels, base=None):
value,counts = np.unique(labels, return_counts=True)
return entropy(counts, base=base)
def entropy2(labels, base=None):
""" Computes entropy of label distribution. """
n_labels = len(labels)
if n_labels <= 1:
return 0
value,counts = np.unique(labels, return_counts=True)
probs = counts / n_labels
n_classes = np.count_nonzero(probs)
if n_classes <= 1:
return 0
ent = 0.
# Compute entropy
base = e if base is None else base
for i in probs:
ent -= i * log(i, base)
return ent
def entropy3(labels, base=None):
vc = pd.Series(labels).value_counts(normalize=True, sort=False)
base = e if base is None else base
return -(vc * np.log(vc)/np.log(base)).sum()
def entropy4(labels, base=None):
value,counts = np.unique(labels, return_counts=True)
norm_counts = counts / counts.sum()
base = e if base is None else base
return -(norm_counts * np.log(norm_counts)/np.log(base)).sum()
labels = [1,3,5,2,3,5,3,2,1,3,4,5]
print(entropy1(labels))
print(entropy2(labels))
print(entropy3(labels))
print(entropy4(labels))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment