Skip to content

Instantly share code, notes, and snippets.

@nikdon
Created June 8, 2023 23:54
Show Gist options
  • Save nikdon/a4658bd0aca1d1f3c978213f5359c57b to your computer and use it in GitHub Desktop.
Save nikdon/a4658bd0aca1d1f3c978213f5359c57b to your computer and use it in GitHub Desktop.
A code to test different versions of shannon entropy calculation that were generated using Google Bard and Chat GPT. This is part of the work on [pyEntropy](https://github.com/nikdon/pyEntropy) library
from collections import Counter
import numpy as np
import timeit
def shannon_entropy_original(time_series):
"""
Return the Shannon Entropy of the sample data.
Args:
time_series: Vector or string of the sample data
Returns:
The Shannon Entropy as float value
"""
# Check if string
if not isinstance(time_series, str):
time_series = list(time_series)
# Create a frequency data
data_set = list(set(time_series))
freq_list = []
for entry in data_set:
counter = 0.0
for i in time_series:
if i == entry:
counter += 1
freq_list.append(float(counter) / len(time_series))
# Shannon entropy
ent = 0.0
for freq in freq_list:
ent += freq * np.log2(freq)
ent = -ent
return ent
def shannon_entropy_efficient(time_series):
"""
Return the Shannon Entropy of the sample data.
Args:
time_series: Vector or string of the sample data
Returns:
The Shannon Entropy as float value
"""
# BARD
# # Check if string
# if not isinstance(time_series, str):
# time_series = list(time_series)
#
# # Create a frequency data
# data_set = set(time_series)
# freq_list = np.array([float(time_series.count(entry)) / len(time_series) for entry in data_set])
#
# # Shannon entropy
# ent = -np.sum(freq_list * np.log2(freq_list))
# return ent
# Chat GPT v1
# # Check if string
# if not isinstance(time_series, str):
# time_series = list(time_series)
#
# # Calculate frequency counts
# counter = Counter(time_series)
# total_count = len(time_series)
#
# # Calculate frequencies and Shannon entropy
# ent = 0.0
# for count in counter.values():
# freq = count / total_count
# ent += freq * np.log2(freq)
#
# ent = -ent
# return ent
# Chat GPT v2
# Check if string
if not isinstance(time_series, str):
time_series = np.array(time_series)
# Calculate frequency counts
unique_vals, counts = np.unique(time_series, return_counts=True)
total_count = len(time_series)
# Calculate frequencies and Shannon entropy
frequencies = counts / total_count
ent = -np.sum(frequencies * np.log2(frequencies))
return ent
if __name__ == "__main__":
np.random.seed(42)
# Define a list of orders and delays to test
sizes = [10, 100, 500, 1000]
summary = []
for size in sizes:
time_series = np.random.rand(size)
Y_original = shannon_entropy_original(time_series)
Y_efficient = shannon_entropy_efficient(time_series)
if np.allclose(Y_original, Y_efficient, atol=1e-5):
print(f"The outputs of the two versions are equal: {size=}")
else:
print(f"The outputs of the two versions are not equal: {size=}")
number = 100
repeat = 5
stmt = f"shannon_entropy_original(time_series)"
original_times = timeit.repeat(stmt, globals=globals(), number=number, repeat=repeat)
original_avg_time = np.mean(original_times)
stmt = f"shannon_entropy_efficient(time_series)"
efficient_times = timeit.repeat(stmt, globals=globals(), number=number, repeat=repeat)
efficient_avg_time = np.mean(efficient_times)
time_difference = original_avg_time - efficient_avg_time
percentage_difference = (time_difference / original_avg_time) * 100
summary.append((size, original_avg_time, efficient_avg_time, percentage_difference))
# Print the summary
print("Size\tOriginal Time\tEfficient Time\tPercentage Difference")
for entry in summary:
size, original_time, efficient_time, percentage_difference = entry
print(f"{size}\t{original_time:.6f}\t{efficient_time:.6f}\t{percentage_difference:.2f}%")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment