Created
June 8, 2023 23:54
-
-
Save nikdon/a4658bd0aca1d1f3c978213f5359c57b to your computer and use it in GitHub Desktop.
A code to test different versions of shannon entropy calculation that were generated using Google Bard and Chat GPT. This is part of the work on [pyEntropy](https://github.com/nikdon/pyEntropy) library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
import numpy as np | |
import timeit | |
def shannon_entropy_original(time_series): | |
""" | |
Return the Shannon Entropy of the sample data. | |
Args: | |
time_series: Vector or string of the sample data | |
Returns: | |
The Shannon Entropy as float value | |
""" | |
# Check if string | |
if not isinstance(time_series, str): | |
time_series = list(time_series) | |
# Create a frequency data | |
data_set = list(set(time_series)) | |
freq_list = [] | |
for entry in data_set: | |
counter = 0.0 | |
for i in time_series: | |
if i == entry: | |
counter += 1 | |
freq_list.append(float(counter) / len(time_series)) | |
# Shannon entropy | |
ent = 0.0 | |
for freq in freq_list: | |
ent += freq * np.log2(freq) | |
ent = -ent | |
return ent | |
def shannon_entropy_efficient(time_series): | |
""" | |
Return the Shannon Entropy of the sample data. | |
Args: | |
time_series: Vector or string of the sample data | |
Returns: | |
The Shannon Entropy as float value | |
""" | |
# BARD | |
# # Check if string | |
# if not isinstance(time_series, str): | |
# time_series = list(time_series) | |
# | |
# # Create a frequency data | |
# data_set = set(time_series) | |
# freq_list = np.array([float(time_series.count(entry)) / len(time_series) for entry in data_set]) | |
# | |
# # Shannon entropy | |
# ent = -np.sum(freq_list * np.log2(freq_list)) | |
# return ent | |
# Chat GPT v1 | |
# # Check if string | |
# if not isinstance(time_series, str): | |
# time_series = list(time_series) | |
# | |
# # Calculate frequency counts | |
# counter = Counter(time_series) | |
# total_count = len(time_series) | |
# | |
# # Calculate frequencies and Shannon entropy | |
# ent = 0.0 | |
# for count in counter.values(): | |
# freq = count / total_count | |
# ent += freq * np.log2(freq) | |
# | |
# ent = -ent | |
# return ent | |
# Chat GPT v2 | |
# Check if string | |
if not isinstance(time_series, str): | |
time_series = np.array(time_series) | |
# Calculate frequency counts | |
unique_vals, counts = np.unique(time_series, return_counts=True) | |
total_count = len(time_series) | |
# Calculate frequencies and Shannon entropy | |
frequencies = counts / total_count | |
ent = -np.sum(frequencies * np.log2(frequencies)) | |
return ent | |
if __name__ == "__main__": | |
np.random.seed(42) | |
# Define a list of orders and delays to test | |
sizes = [10, 100, 500, 1000] | |
summary = [] | |
for size in sizes: | |
time_series = np.random.rand(size) | |
Y_original = shannon_entropy_original(time_series) | |
Y_efficient = shannon_entropy_efficient(time_series) | |
if np.allclose(Y_original, Y_efficient, atol=1e-5): | |
print(f"The outputs of the two versions are equal: {size=}") | |
else: | |
print(f"The outputs of the two versions are not equal: {size=}") | |
number = 100 | |
repeat = 5 | |
stmt = f"shannon_entropy_original(time_series)" | |
original_times = timeit.repeat(stmt, globals=globals(), number=number, repeat=repeat) | |
original_avg_time = np.mean(original_times) | |
stmt = f"shannon_entropy_efficient(time_series)" | |
efficient_times = timeit.repeat(stmt, globals=globals(), number=number, repeat=repeat) | |
efficient_avg_time = np.mean(efficient_times) | |
time_difference = original_avg_time - efficient_avg_time | |
percentage_difference = (time_difference / original_avg_time) * 100 | |
summary.append((size, original_avg_time, efficient_avg_time, percentage_difference)) | |
# Print the summary | |
print("Size\tOriginal Time\tEfficient Time\tPercentage Difference") | |
for entry in summary: | |
size, original_time, efficient_time, percentage_difference = entry | |
print(f"{size}\t{original_time:.6f}\t{efficient_time:.6f}\t{percentage_difference:.2f}%") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment