Created
April 28, 2023 19:21
-
-
Save kamaci/63099729575f6e1380590d5df622275a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string | |
import matplotlib.pyplot as plt | |
class StatsCollector: | |
def __init__(self, data, label="input"): | |
self.data = data | |
self.label = label | |
self.freq_dict = {char: 0 for char in string.ascii_uppercase} | |
self.total_chars = 0 | |
self.num_unique_chars = 0 | |
self._analyze() | |
def _analyze(self): | |
for char in self.data: | |
self.total_chars += 1 | |
self.freq_dict[char] += 1 | |
self._sort_dict_by_values() | |
self.num_unique_chars = sum(v > 0 for k, v in self.freq_dict.items()) | |
def plot(self): | |
plt.bar(self.freq_dict.keys(), self.freq_dict.values()) | |
plt.xlabel("Characters") | |
plt.ylabel("Frequency") | |
plt.title(f"Character Frequency of {self.label}") | |
plt.show() | |
def print_stats(self): | |
print("Label: ", self.label) | |
print("Number of characters: ", self.total_chars) | |
print("Number of unique characters: ", self.num_unique_chars) | |
print("Character frequency: ", self.freq_dict) | |
print("\n") | |
def _sort_dict_by_values(self): | |
self.freq_dict = dict(sorted(self.freq_dict.items(), key=lambda item: item[1], reverse=True)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment