Created
May 1, 2023 19:38
-
-
Save kamaci/4c514fd7691163888241758a0d97c5eb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import datetime | |
import nltk | |
from deap import base, creator, tools, algorithms | |
from matplotlib import pyplot as plt | |
from nltk import FreqDist | |
from nltk.corpus import brown | |
from timeit import default_timer as timer | |
from permutation_cipher import PermutationCipher | |
tableau = { | |
0: "NOPQRSTUVWXYZABCDEFGHIJKLM", | |
1: "OPQRSTUVWXYZNMABCDEFGHIJKL", | |
2: "PQRSTUVWXYZNOLMABCDEFGHIJK", | |
3: "QRSTUVWXYZNOPKLMABCDEFGHIJ", | |
4: "RSTUVWXYZNOPQJKLMABCDEFGHI", | |
5: "STUVWXYZNOPQRIJKLMABCDEFGH", | |
6: "TUVWXYZNOPQRSHIJKLMABCDEFG", | |
7: "UVWXYZNOPQRSTGHIJKLMABCDEF", | |
8: "VWXYZNOPQRSTUFGHIJKLMABCDE", | |
9: "WXYZNOPQRSTUVEFGHIJKLMABCD", | |
10: "XYZNOPQRSTUVWDEFGHIJKLMABC", | |
11: "YZNOPQRSTUVWXCDEFGHIJKLMAB", | |
12: "ZNOPQRSTUVWXYBCDEFGHIJKLMA" | |
} | |
trigrams = nltk.ngrams(''.join(brown.words()).upper(), 3) | |
cfd = nltk.FreqDist(''.join(t) for t in trigrams) | |
class GeneticCracker: | |
def __init__(self, cipher_text: str): | |
self.cipher_text = cipher_text | |
self.toolbox = base.Toolbox() | |
self.chromosome_length = 6 | |
self.pop_size = 20 | |
self.ngen = 10 | |
self.mutation_rate = 0.9 | |
self.cx_prob = 0.8 | |
self.evaluation_count = 0 | |
self.min_error_history = [] | |
self.init_deap() | |
def init_deap(self): | |
print(f'Population Size: {self.pop_size} Ngen: {self.ngen}') | |
creator.create("FitnessMax", base.Fitness, weights=(-1.0,)) | |
creator.create("Individual", list, fitness=creator.FitnessMax) | |
self.toolbox = base.Toolbox() | |
self.toolbox.register("individual", tools.initIterate, creator.Individual, | |
lambda: random.sample(range(self.chromosome_length), self.chromosome_length)) | |
self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual) | |
self.toolbox.register("mate", tools.cxOrdered) | |
self.toolbox.register("mutate", self.mutate_individual) | |
self.toolbox.register("select", tools.selTournament, tournsize=10) | |
self.toolbox.register("evaluate", self.trigram_score) | |
def mutate_individual(self, individual): | |
index1, index2 = random.sample(range(len(individual)), 2) | |
individual_clone = self.toolbox.clone(individual) | |
individual_clone[index1], individual_clone[index2] = individual_clone[index2], individual_clone[index1] | |
return individual_clone, | |
def _mutate(self, perm): | |
for i in range(self.chromosome_length): | |
if random.random() < self.mutation_rate: | |
j = random.randint(0, self.chromosome_length - 1) | |
perm[i], perm[j] = perm[j], perm[i] | |
return perm | |
def trigram_score(self, individual): | |
self.evaluation_count += 1 | |
text = PermutationCipher(individual).decrypt(self.cipher_text) | |
trigrams_text = [text[i:i + 3] for i in range(len(text) - 2)] | |
fdist = FreqDist(trigrams_text) | |
score = sum( | |
fdist[trigram] * cfd[trigram] | |
if trigram in cfd | |
else fdist[trigram] * 0.00001 | |
for trigram in fdist | |
) | |
return (10 ** 7 / score), | |
@staticmethod | |
def encrypt(key, plain_text): | |
key = key.upper() | |
if plain_text is None: | |
breakpoint() | |
plain_text = plain_text.upper() | |
return ''.join(tableau[(ord(key[i % len(key)]) - ord('A')) // 2][ord(c) - ord('A')] | |
for i, c in enumerate(plain_text)) | |
@staticmethod | |
def decrypt(key, cipher_text): | |
return ObsidianGenetic.encrypt(key, cipher_text) | |
def run(self): | |
start = timer() | |
print(datetime.datetime.now()) | |
population = self.toolbox.population(n=self.pop_size) | |
min_error, best_individual = min( | |
(self.toolbox.evaluate(individual)[0], individual) for individual in population) | |
self.min_error_history.append(min_error) | |
delta = 0.123 | |
if min_error < delta: | |
self._plot() | |
print(f"Best found: {best_individual}") | |
print(f"Time elapsed: {timer() - start}") | |
return best_individual | |
for _ in range(self.ngen): | |
offspring = algorithms.varAnd(population, self.toolbox, cxpb=self.cx_prob, mutpb=self.mutation_rate) | |
fits = self.toolbox.map(self.toolbox.evaluate, offspring) | |
for fit, ind in zip(fits, offspring): | |
ind.fitness.values = fit | |
population = self.toolbox.select(offspring, k=len(population)) | |
min_error, best_individual = min( | |
(self.toolbox.evaluate(individual)[0], individual) for individual in population) | |
self.min_error_history.append(min_error) | |
if min_error < delta: | |
print(f"Best found: {best_individual}") | |
print(f"Time elapsed: {timer() - start}") | |
self._plot() | |
return best_individual | |
print(f"Not found. Best one: {best_individual}") | |
print(datetime.datetime.now()) | |
print(f"Time elapsed min: {round((timer() - start) / 60, 2)}") | |
self._plot() | |
return population[0] | |
def get_evaluation_count(self): | |
return self.evaluation_count | |
def get_min_error(self): | |
return self.min_error_history[-1] | |
def _plot(self): | |
plt.plot(self.min_error_history) | |
plt.xlabel(f'Generations for Population Size: {self.pop_size} and Evaluation Count: {self.evaluation_count}') | |
plt.ylabel('Minimum Error') | |
best_error = min(self.min_error_history) | |
plt.axhline(y=best_error, color='r', linestyle='-', label=f"Best Error: {best_error}") | |
plt.legend() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment