Skip to content

Instantly share code, notes, and snippets.

@GaretJax
Last active September 15, 2021 08:05
Show Gist options
  • Save GaretJax/3bdba874f023674c651bc871ac6ac3e1 to your computer and use it in GitHub Desktop.
Save GaretJax/3bdba874f023674c651bc871ac6ac3e1 to your computer and use it in GitHub Desktop.
Genetic algorithm to decrypt substitution ciphers
from __future__ import print_function
import collections
import re
import random
##############################################################################
# Program parameters
# Path to the text file containing the ciphertext
INFILE = 'ciphertext.txt'
# Path to the text file containing the reference text
REFFILE = 'paradiso.txt'
# Encrypted chars in the ciphertext
CHARS = 'ABCDEFGHILMNOPQRSTUVZ'
# Size of the population to use for the genetic algorithm
POPULATION_SIZE = 50
# Size of the population slice of best peforming solutions to keep at each
# iteration
TOP_POPULATION = 10
# Number of intervals for which the best score has to be stable before aborting
# the genetic algorith
STABILITY_INTERVALS = 20
# Number of crossovers to execute for each new child in the genetic algorithm
CROSSOVER_COUNT = 2
# Number of random mutation to introduce for each new child in the genetic
# algorithm
MUTATIONS_COUNT = 1
##############################################################################
# Implementation
def pairwise(iterable):
prev = None
for item in iterable:
if prev is not None:
yield prev + item
prev = item
def bigram(text):
counter = collections.Counter()
words = re.sub('[^{}]'.format(CHARS), ' ', text).split()
for word in words:
for pair in pairwise(word):
counter[pair] += 1
return counter
def decode(ciphertext, key):
cleartext = ''
for char in ciphertext:
cleartext += key.get(char, char)
return cleartext
def init_mapping():
# Generate a randomly initialized solution
repls = set(CHARS)
mapping = {}
for c in CHARS:
if c in mapping:
continue
repl = random.choice(list(repls))
repls.remove(repl)
repls.discard(c)
mapping[c] = repl
mapping[repl] = c
return mapping
def update_mapping(mapping, char, repl):
# Update the solution by switching `char` with `repl`
# and `repl` with `char`.
current_repl = mapping[char]
current_char = mapping[repl]
if current_char == repl:
current_char = current_repl
elif current_repl == char:
current_repl = current_char
mapping[current_char] = current_repl
mapping[current_repl] = current_char
mapping[char] = repl
mapping[repl] = char
##############################################################################
# Genetic algorithm routines
def select(population, ciphertext, ref_bigram):
scores = []
# Compute the score of each solution
for p in population:
scores.append((score(decode(ciphertext, p), ref_bigram), p))
# Sort the solutions by their score
sorted_population = sorted(scores, reverse=True)
# Select only the best TOP_POPULATION solutions
selected_population = sorted_population[:TOP_POPULATION]
return selected_population[0][0], [m for _, m in selected_population]
def generate(population):
new_population = population[:]
while len(new_population) < POPULATION_SIZE:
# Randomly select two parent solutions
x, y = random.choice(population), random.choice(population)
# Create the child solution
child = x.copy()
# Switch CROSSOVER_COUNT chromosomes between the parents
for i in range(CROSSOVER_COUNT):
char = random.choice(list(CHARS))
update_mapping(child, char, y[char])
# Randomly mutate MUTATIONS_COUNT chromosomes of the the child solution
for i in range(MUTATIONS_COUNT):
char = random.choice(list(CHARS))
repl = random.choice(list(CHARS))
update_mapping(child, char, repl)
# Add the newly obtained child the the current population
new_population.append(child)
return new_population
def score(text, ref_bigram):
text_bigram = bigram(text)
score = 0
# Multiply the number of occurrences of each pair in the decoded
# ciphertext with the number of occurrences of that same pair in the
# reference text, then take the sum of all multiplications.
for pair, occurrences in text_bigram.items():
score += occurrences * ref_bigram[pair]
return score
###############################################################################
# Decryption routine
def decrypt():
# Read the reference text into memory
with open(REFFILE) as fh:
reftext = fh.read().upper()
# Analyze the reference text and compute a mapping of each pair of letters
# to the number of occurrences in the reference text
#
# ref_bigram = {
# 'AA': 1,
# 'AB': 64,
# 'AC': 354,
# 'AD': 279,
# 'AE': 26,
# 'AF': 52,
# 'AG': 241,
# 'AH': 2,
# 'AI': 260,
# 'AL': 1141,
# 'AM': 353,
# ...
# 'VE': 958,
# 'VI': 727,
# 'VO': 409,
# 'VR': 43,
# 'VU': 33,
# 'VV': 28,
# 'ZA': 249,
# 'ZE': 35,
# 'ZI': 240,
# 'ZO': 49,
# 'ZZ': 103,
# }
#
ref_bigram = bigram(reftext)
# Read the ciphertext into memory
with open(INFILE) as fh:
ciphertext = fh.read().upper()
# Create an initial population of random possible solutions
population = [init_mapping() for i in range(POPULATION_SIZE)]
print(population[0])
# Set the initial values for the stability checker
last_score = 0
last_score_increase = 0
iterations = -STABILITY_INTERVALS
# Run the genetic algorithm
while last_score_increase < STABILITY_INTERVALS:
# Fill up the population up to POPULATION_SIZE solutions by crossing
# over and mutating the TOP_POPULATION best solutions
population = generate(population)
# Select the TOP_POPULATION best solutions from the current population
best_score, population = select(population, ciphertext, ref_bigram)
# Update the stability check state with the current best score
if best_score > last_score:
last_score_increase = 0
last_score = best_score
else:
last_score_increase += 1
print(best_score)
iterations += 1
# Print the current (best) solution
#
# best_solution = population[0] = {
# 'A': 'M',
# 'B': 'O',
# 'C': 'Q',
# 'D': 'R',
# 'E': 'P',
# 'F': 'T',
# 'G': 'G', # Wrong, should be 'Z'
# 'H': 'U',
# 'I': 'V',
# 'L': 'S',
# 'M': 'A',
# 'N': 'N',
# 'O': 'B',
# 'P': 'E',
# 'Q': 'C',
# 'R': 'D',
# 'S': 'L',
# 'T': 'F',
# 'U': 'H',
# 'V': 'I',
# 'Z': 'Z', # Wrong, should be 'G'
# }
#
print('Best solution found after {} iterations:'.format(iterations))
print(decode(ciphertext, population[0]))
print(population[0])
decrypt()
time python decrypt.py
{'A': 'L', 'C': 'M', 'B': 'Q', 'E': 'N', 'D': 'R', 'G': 'O', 'F': 'Z', 'I': 'H', 'H': 'I', 'M': 'C', 'L': 'A', 'O': 'G', 'N': 'E', 'Q': 'B', 'P': 'P', 'S': 'S', 'R': 'D', 'U': 'U', 'T': 'V', 'V': 'T', 'Z': 'F'}
328371
328371
331760
359667
388421
417208
430346
464624
523066
523066
523066
532600
532600
543202
559138
577726
577726
577726
577726
577726
582042
582042
582042
582042
602872
602872
602872
602872
602872
602872
602872
602872
603763
603763
603763
604013
604013
604013
616037
616037
631872
632122
632122
632122
632122
632122
632122
632122
632122
632122
632122
632122
632122
632122
632122
632122
632122
632122
632122
632122
632122
632122
Best solution found after 42 iterations:
LA MACCHINA DI TURINZ AZISCE SOPRA UN NASTRO CHE SI PRESENTA
COME UNA SEQUENGA DI CASELLE NELLE QUALI POSSONO ESSERE
REZISTRATI SIMBOLI DI UN BEN DETERMINATO ALFABETO FINITO; ESSA
HA UNA TESTINA DI LETTURA E SCRITTURA CON CUI EFFETTUA
OPERAGIONI DI LETTURA E SCRITTURA SU UNA CASELLA DEL NASTRO.
HA ANCHE UN REPERTORIO FINITO DI ISTRUGIONI CHE DETERMINANO
LA SUA EVOLUGIONE IN CONSEZUENGA DEI DATI INIGIALI. LE
CARATTERISTICHE PRECEDENTI SONO COMUNI A MOLTE MACCHINE
FORMALI, MA LE MDT HANNO INOLTRE LA CARATTERISTICA DI
DISPORRE DI UN NASTRO POTENGIALMENTE INFINITO, OSSIA
ESTENDIBILE QUANTO SI VUOLE QUALORA QUESTO SI RENDA
NECESSARIO. OZNI PASSO DELL'EVOLUGIONE VIENE DETERMINATO DALLO
STATO ATTUALE S NEL QUALE LA MACCHINA SI TROVA E DAL
CARATTERE C CHE LA TESTINA LEZZE SULLA CASELLA DEL NASTRO SU
CUI SI TROVA E SI CONCRETIGGA NELL'EVENTUALE MODIFICA DEL
CONTENUTO DELLA CASELLA, NELL'EVENTUALE SPOSTAMENTO DELLA
TESTINA DI UNA POSIGIONE VERSO DESTRA O VERSO SINISTRA E
NELL'EVENTUALE CAMBIAMENTO DELLO STATO.
{'A': 'M', 'C': 'Q', 'B': 'O', 'E': 'P', 'D': 'R', 'G': 'G', 'F': 'T', 'I': 'V', 'H': 'U', 'M': 'A', 'L': 'S', 'O': 'B', 'N': 'N', 'Q': 'C', 'P': 'E', 'S': 'L', 'R': 'D', 'U': 'H', 'T': 'F', 'V': 'I', 'Z': 'Z'}
python decrypt.py 2.68s user 0.05s system 98% cpu 2.770 total
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment