Skip to content

Instantly share code, notes, and snippets.

@kylebgorman
Last active May 25, 2021 14:46
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kylebgorman/5d1ca713b7e288f89db8ef174347b7be to your computer and use it in GitHub Desktop.
Save kylebgorman/5d1ca713b7e288f89db8ef174347b7be to your computer and use it in GitHub Desktop.
Zodiac cipher 408: freestanding Python 3 script for converting the plaintext and ciphertext to OpenFst assets
#!/usr/bin/env python
#
# Constructs resources for Zodiac cipher 408:
#
# * Plaintext and ciphertext FARs
# * Unweighted "key" FSTs and "channel" (hypothesis space) FSTs
# * A textual symbol table for plaintext and ciphertext
#
# Requires: Pynini and OpenFst with the FAR extension.
import itertools
import os
import subprocess
import pynini
# Output filenames.
PLAINTEXT_TXT = "plaintext.txt"
PLAINTEXT_STD_FST = "plaintext.fst"
PLAINTEXT_LOG_FST = "plaintext-log.fst"
CIPHERTEXT_TXT = "ciphertext.txt"
CIPHERTEXT_STD_FST = "ciphertext.fst"
CIPHERTEXT_LOG_FST = "ciphertext-log.fst"
KEY_STD_FST = "key.fst"
KEY_LOG_FST = "key-log.fst"
CHANNEL_STD_FST = "channel.fst"
CHANNEL_LOG_FST = "channel-log.fst"
SYMBOL_TABLE = "sym.map"
# Plaintext, ciphertext, and a table from plaintext to ciphertext characters.
# The data is drawn from the "Harper key" available here:
#
# http://zodiackillerciphers.com/408/key.html
#
# Visually similar Unicode codepoints for the non-ASCII ciphertext characters
# were selected using:
#
# http://shapecatcher.com/
#
# The following interpretations are applied.
#
# Misspellings unlikely to be due to polyphony or encipherment mistakes:
#
# * FORREST for FOREST (fn. 6)
# * EXPERENCE for EXPERIENCE (fn. 10)
# * PARADICE for PARADISE (fn. 14)
# * ANAMAL for ANIMAL (ciphertext β–², plaintext A [4] and S [3]; could be due to
# overinking of ciphertext β–³, read as plaintext I, but treating this as a
# mispelling allows us to preserve two early cycles)
#
# Mispellings which are probably encipherment mistakes:
#
# * DANGERTUE for DANGEROUS (fn. 8; ciphertext 🜴YE)
# * THAE for THAT (fn. 13; ciphertext N, plaintext E [8])
# * SND for AND (fn. 15; ciphertext β—­, plaintext S [2])
# * SLOI for SLOW (fn. 18; ciphertext β–³, plaintext I [13])
#
# Other plaintext typos:
#
# * "...AND ALL THE [people?--ed.] I HAVE KILLED"
#
# True polyphones:
#
# * Ciphertext β–² for plaintext A [4] and S [3]
PLAINTEXT = ("I", "L", "I", "K", "E", "K", "I", "L", "L", "I", "N", "G", "P",
"E", "O", "P", "L", "E", "B", "E", "C", "A", "U", "S", "E", "I",
"T", "I", "S", "S", "O", "M", "U", "C", "H", "F", "U", "N", "I",
"T", "I", "S", "M", "O", "R", "E", "F", "U", "N", "T", "H", "A",
"N", "K", "I", "L", "L", "I", "N", "G", "W", "I", "L", "D", "G",
"A", "M", "E", "I", "N", "T", "H", "E", "F", "O", "R", "R", "E",
"S", "T", "B", "E", "C", "A", "U", "S", "E", "M", "A", "N", "I",
"S", "T", "H", "E", "M", "O", "S", "T", "D", "A", "N", "G", "E",
"R", "T", "U", "E", "A", "N", "A", "M", "A", "L", "O", "F", "A",
"L", "L", "T", "O", "K", "I", "L", "L", "S", "O", "M", "E", "T",
"H", "I", "N", "G", "G", "I", "V", "E", "S", "M", "E", "T", "H",
"E", "M", "O", "S", "T", "T", "H", "R", "I", "L", "L", "I", "N",
"G", "E", "X", "P", "E", "R", "E", "N", "C", "E", "I", "T", "I",
"S", "E", "V", "E", "N", "B", "E", "T", "T", "E", "R", "T", "H",
"A", "N", "G", "E", "T", "T", "I", "N", "G", "Y", "O", "U", "R",
"R", "O", "C", "K", "S", "O", "F", "F", "W", "I", "T", "H", "A",
"G", "I", "R", "L", "T", "H", "E", "B", "E", "S", "T", "P", "A",
"R", "T", "O", "F", "I", "T", "I", "S", "T", "H", "A", "E", "W",
"H", "E", "N", "I", "D", "I", "E", "I", "W", "I", "L", "L", "B",
"E", "R", "E", "B", "O", "R", "N", "I", "N", "P", "A", "R", "A",
"D", "I", "C", "E", "S", "N", "D", "A", "L", "L", "T", "H", "E",
"I", "H", "A", "V", "E", "K", "I", "L", "L", "E", "D", "W", "I",
"L", "L", "B", "E", "C", "O", "M", "E", "M", "Y", "S", "L", "A",
"V", "E", "S", "I", "W", "I", "L", "L", "N", "O", "T", "G", "I",
"V", "E", "Y", "O", "U", "M", "Y", "N", "A", "M", "E", "B", "E",
"C", "A", "U", "S", "E", "Y", "O", "U", "W", "I", "L", "L", "T",
"R", "Y", "T", "O", "S", "L", "O", "I", "D", "O", "W", "N", "O",
"R", "S", "T", "O", "P", "M", "Y", "C", "O", "L", "L", "E", "C",
"T", "I", "N", "G", "O", "F", "S", "L", "A", "V", "E", "S", "F",
"O", "R", "M", "Y", "A", "F", "T", "E", "R", "L", "I", "F", "E",
"E", "B", "E", "O", "R", "I", "E", "T", "E", "M", "E", "T", "H",
"H", "P", "I", "T", "I",)
assert len(PLAINTEXT) == 408
# A relatively loose reading of the plaintext, with errors corrected, spaces
# and punctuation inserted, and the final padding removed, would read:
#
# "I like killing people because it is so much fun. It is more fun than killing
# wild game in the forest because man is the most dangerous animal of all. To
# kill something gives me the most thrilling experience. It is even better than
# getting your rocks off with a girl. The best part of it is that when I die, I
# will be reborn in paradise and all the people I have killed will become my
# slaves. I will not give you my name because you will try to slow down or stop
# my collecting of slaves for my afterlife."
CIPHERTEXT = (
# Part 1.
"β–³", "β—ͺ", "P", "βŸ‹", "Z", "βŸ‹", "U", "B", "β—ͺ", "𝈲", "O",
"R", "⚻", "ꟼ", "X", "⚻", "B",
"W", "V", "+", "Ǝ", "G", "Y", "F", "αƒ’", "β–³", "H", "P",
"⊑", "K", "🜴", "βŒ•", "Y", "Ǝ",
"M", "J", "Y", "Ξ›", "U", "I", "𝈲", "β—­", "βŒ•", "T", "β₯Ώ",
"N", "Q", "Y", "D", "●", "ꝋ",
"S", "Ο•", "βŸ‹", "β–³", "β– ", "B", "P", "O", "R", "A", "U",
"β—ͺ", "ꟻ", "R", "β…ƒ", "βŒ•", "E",
"𝈲", "Ξ›", "L", "M", "Z", "J", "α—‘", "Π―", "⟍", "ꟼ", "F",
"H", "V", "W", "Ǝ", "β–²", "Y",
"⊑", "+", "βŒ•", "G", "D", "β–³", "K", "I", "ꝋ", "αƒ’", "βŒ•",
"X", "β–²", "●", "βŒ–", "S", "Ο•",
"R", "N", "β₯Ώ", "I", "Y", "E", "β…ƒ", "O", "β–²", "βŒ•", "G",
"B", "T", "Q", "S", "β– ", "B",
"L", "α—‘", "βŸ‹", "P", "β– ", "B", "⊑", "X", "βŒ•", "E", "H",
"M", "U", "Ξ›", "R", "R", "𝈲",
# Part 2.
"Ι”", "Z", "K", "βŒ•", "ꟼ", "I", "ꝋ", "W", "βŒ•", "🜴", "β–²",
"●", "L", "M", "Π―", "β–³", "β– ",
"B", "P", "D", "R", "+", "Ꚍ", "⚻", "αƒ’", "⟍", "N", "Ο•",
"Ǝ", "E", "U", "H", "𝈲", "F",
"Z", "Ι”", "ꟼ", "O", "V", "W", "I", "●", "+", "β₯Ώ", "L",
"ꝋ", "β…ƒ", "Ξ›", "R", "αƒ’", "H",
"I", "β–³", "D", "R", "β–‘", "T", "Y", "Π―", "⟍", "α—‘", "Ǝ",
"βŸ‹", "⊑", "X", "J", "Q", "A",
"P", "●", "M", "β–²", "R", "U", "β₯Ώ", "β—ͺ", "L", "ꝋ", "N",
"V", "E", "K", "H", "⚻", "G",
"Π―", "I", "🜴", "J", "𝈲", "●", "β–³", "β–²", "L", "M", "β…ƒ",
"N", "A", "ꝋ", "Z", "Ο•", "P",
"βŒ–", "U", "ꟼ", "𝈲", "A", "β–³", "β– ", "B", "V", "W", "⟍",
"+", "V", "T", "β₯Ώ", "O", "P",
"Ξ›", "⚻", "S", "Π―", "β…ƒ", "ꟻ", "U", "Ǝ", "αƒ’", "β—­", "D",
"βŒ–", "G", "β—ͺ", "β—ͺ", "I", "M",
# Part 3.
"N", "𝈲", "ꝋ", "S", "Ι”", "E", "βŸ‹", "β–³", "β—ͺ", "β—ͺ", "Z",
"ꟻ", "A", "P", "β– ", "B", "V",
"ꟼ", "Ǝ", "X", "βŒ•", "W", "βŒ•", "β–‘", "F", "β– ", "β–²", "Ι”",
"+", "⊑", "β–³", "A", "β–³", "B",
"β—ͺ", "O", "T", "●", "R", "U", "Ι”", "+", "β–‘", "α—‘", "Y",
"βŒ•", "β–‘", "Ξ›", "S", "βŒ•", "W",
"V", "Z", "Ǝ", "G", "Y", "K", "E", "β–‘", "T", "Y", "A",
"β–³", "β—ͺ", "β– ", "L", "β₯Ώ", "β–‘",
"H", "🜴", "F", "B", "X", "β–³", "βŒ–", "X", "A", "D", "α—‘",
"⟍", "β—­", "L", "🜴", "⚻", "βŒ•",
"β–‘", "Ǝ", "α—‘", "β– ", "β– ", "αƒ’", "Ǝ", "●", "P", "O", "R",
"X", "Q", "F", "β—ͺ", "G", "Ι”",
"Z", "⊑", "J", "T", "β₯Ώ", "βŒ•", "β–‘", "β–²", "J", "I", "+",
"Π―", "B", "P", "Q", "W", "αƒ’",
"V", "E", "X", "Π―", "β–³", "W", "I", "αƒ’", "βŒ•", "E", "H",
"M", "ꝋ", "⚻", "U", "I", "𝈲",)
assert len(CIPHERTEXT) == 408
KEY = {"A": {"G", "S", "β…ƒ", "β–²",},
"B": {"V",},
"C": {"Ǝ",},
"D": {"ꟻ", "βŒ–",},
"E": {"Z", "ꟼ", "W", "+", "αƒ’", "N", "E",},
"F": {"J", "Q",},
"G": {"R",},
"H": {"M", "ꝋ",},
"I": {"β–³", "P", "U", "𝈲",},
"K": {"βŸ‹",},
"L": {"β—ͺ", "B", "β– ",},
"M": {"βŒ•",},
"N": {"O", "Ξ›", "D", "Ο•",},
"O": {"X", "🜴", "T", "α—‘",},
"P": {"⚻",},
"R": {"β₯Ώ", "Π―", "⟍",},
"S": {"F", "⊑", "K", "β—­", "β–²",},
"T": {"H", "I", "●", "L",},
"U": {"Y",},
"V": {"Ι”",},
"W": {"A",},
"X": {"Ꚍ",},
"Y": {"β–‘",},}
def _check_alignment():
return all(c in KEY[p] for (p, c) in zip(PLAINTEXT, CIPHERTEXT))
def _make_plaintext():
with open(PLAINTEXT_TXT, "w") as sink:
print(" ".join(str(ord(p)) for p in PLAINTEXT), file=sink)
subprocess.check_call(("farcompilestrings", "--arc_type=standard",
"--far_type=fst", "--fst_type=compact",
PLAINTEXT_TXT, PLAINTEXT_STD_FST))
subprocess.check_call(("farcompilestrings", "--arc_type=log",
"--far_type=fst", "--fst_type=compact",
PLAINTEXT_TXT, PLAINTEXT_LOG_FST))
os.remove(PLAINTEXT_TXT)
def _make_ciphertext():
with open(CIPHERTEXT_TXT, "w") as sink:
print(" ".join(str(ord(c)) for c in CIPHERTEXT), file=sink)
subprocess.check_call(("farcompilestrings", "--arc_type=standard",
"--far_type=fst", "--fst_type=compact",
CIPHERTEXT_TXT, CIPHERTEXT_STD_FST))
subprocess.check_call(("farcompilestrings", "--arc_type=log",
"--far_type=fst", "--fst_type=compact",
CIPHERTEXT_TXT, CIPHERTEXT_LOG_FST))
os.remove(CIPHERTEXT_TXT)
def _make_key():
eps = pynini.epsilon_machine()
# Adds arcs.
for (plaintext, ciphertexts) in KEY.items():
plaintext_label = ord(plaintext)
for ciphertext in ciphertexts:
ciphertext_label = ord(ciphertext)
eps.add_arc(0, pynini.Arc(plaintext_label, ciphertext_label, 0, 0))
# Writes it out.
eps.write(KEY_STD_FST)
pynini.arcmap(eps, map_type="to_log").write(KEY_LOG_FST)
def _make_channel():
eps = pynini.epsilon_machine()
# Adds arcs.
for plaintext in KEY.keys():
plaintext_label = ord(plaintext)
for ciphertexts in KEY.values():
for ciphertext in ciphertexts:
ciphertext_label = ord(ciphertext)
eps.add_arc(0, pynini.Arc(plaintext_label, ciphertext_label, 0, 0))
eps = pynini.arcmap(eps, map_type="arc_sum")
# Writes it out.
eps.write(CHANNEL_STD_FST)
pynini.arcmap(eps, map_type="to_log").write(CHANNEL_LOG_FST)
def _make_symbol_table():
symbols = set()
# Adds plaintext symbols.
for plaintext in KEY.keys():
symbols.add(plaintext)
# Adds ciphertext symbols.
for ciphertexts in KEY.values():
symbols.update(ciphertexts)
# Writes them into a symbol table.
sym = pynini.SymbolTable()
for symbol in sorted(symbols):
sym.add_symbol(symbol, ord(symbol))
# Writes it out.
sym.write_text(SYMBOL_TABLE)
def main():
assert _check_alignment()
_make_plaintext()
_make_ciphertext()
_make_key()
_make_channel()
_make_symbol_table()
if __name__ == "__main__":
main()
@drewstaylor
Copy link

@kylebgorman What's the equivalent of epsilon_machine() in the current pynini? Trying to test this out.

@kylebgorman
Copy link
Author

kylebgorman commented May 25, 2021

maybe this, not tested:

f = pynini.Fst()
s = f.add_state()
f.set_start(s)
f.set_final(s)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment