Skip to content

Instantly share code, notes, and snippets.

@kamaci
Created April 28, 2023 19:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kamaci/856bd268be9d5a831071c7ff388e6ef2 to your computer and use it in GitHub Desktop.
Save kamaci/856bd268be9d5a831071c7ff388e6ef2 to your computer and use it in GitHub Desktop.
from stats_collector import StatsCollector
LETTER_PROBABILITIES = {"A": 0.0856, "B": 0.0139, "C": 0.0279, "D": 0.0378,
"E": 0.1304, "F": 0.0289, "G": 0.0199, "H": 0.0528,
"I": 0.0627, "J": 0.0013, "K": 0.0042, "L": 0.0339,
"M": 0.0249, "N": 0.0707, "O": 0.0797, "P": 0.0199,
"Q": 0.0012, "R": 0.0677, "S": 0.0607, "T": 0.1045,
"U": 0.0249, "V": 0.0092, "W": 0.0149, "X": 0.0017,
"Y": 0.0199, "Z": 0.0008}
class ChiSquared:
def __init__(self, data):
self.data = data
self.val = 0
self._calculate()
def _calculate(self):
stats_collector = StatsCollector(self.data)
for c, f in stats_collector.freq_dict.items():
expected_c = LETTER_PROBABILITIES[c] * stats_collector.total_chars
self.val += (f - expected_c) ** 2 / expected_c
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment