Skip to content

Instantly share code, notes, and snippets.

@btawney
Last active December 15, 2015 06:28
Show Gist options
  • Save btawney/5216150 to your computer and use it in GitHub Desktop.
Save btawney/5216150 to your computer and use it in GitHub Desktop.
Short ciphers are relatively immune to a frequency analysis attack, but if they are long enough they can be solved with this approach, which checks all possible solutions given a lexicon of possible words and the pattern of repeated characters in the cipher. This approach would be vastly improved by adding some likelihood checks that would cause…
// ShortCipherSolver.cpp
#include <stdio.h>
#include <string.h>
void SolutionFound(char *solution, char *cipher);
void TrimBuffer(char *buffer);
void Solve(char *cipher);
// Solve ciphers provided as arguments to the command
int main(int argc, char *argv[]) {
int i;
for (i = 1; i < argc; ++i) {
if (i > 1) {
fprintf(stdout, "\n");
}
fprintf(stdout, "%s\n", argv[i]);
Solve(argv[i]);
}
return 0;
}
// This is called when a solution is found
void SolutionFound(char *solution, char *cipher) {
int c;
char *buffer = new char[strlen(cipher) + 1];
for (c = 0; cipher[c] != 0; ++c) {
buffer[c] = solution[cipher[c]];
}
fprintf(stdout, "%s\n", buffer);
delete[] buffer;
}
// The lexical tree is basically a finite state transducer that
// can produce all words in the lexicon. The root state represents
// the beginning of a word, the succeeding states are possible first
// letters. Each subsequent state is a possible second letter, etc.
class LexicalTreeElement {
private:
// Each state contains a letter
char _letter;
// This is true if the letter can be the last letter of a word
bool _canBeTerminal;
// The _next element is the next sibling of the current letter.
LexicalTreeElement *_next;
// The _successor is the first letter that could follow this letter
// in a valid word
LexicalTreeElement *_successor;
public:
LexicalTreeElement(char letter) {
this->_letter = letter;
this->_canBeTerminal = false;
this->_next = NULL;
this->_successor = NULL;
}
~LexicalTreeElement() {
if (this->_successor != NULL) {
delete this->_successor;
}
if (this->_next != NULL) {
delete this->_next;
}
}
// Add a word to the lexicon
void AddWord (char *word) {
if (word[0] == 0) {
return;
} else if (word[0] == this->_letter) {
if (word[1] == 0) {
this->_canBeTerminal = true;
} else {
if (this->_successor == NULL) {
this->_successor = new LexicalTreeElement(word[1]);
}
this->_successor->AddWord(&word[1]);
}
} else {
if (this->_next == NULL) {
this->_next = new LexicalTreeElement(word[0]);
}
this->_next->AddWord(word);
}
}
// Recursively solve a cipher
void Solve
(
char *solution,
char *cipher,
LexicalTreeElement *top,
char *topCipher,
int words,
int wordsLimit,
char *solutionBuffer,
int solutionBufferPointer
) {
int c;
bool notSolvedFor = true;
// If we reach the end of the cipher, and we are
// at the start of a word, then this is a valid solution
if (cipher[0] == 0) {
if (this == top) {
solutionBuffer[solutionBufferPointer] = 0;
fprintf(stdout, "%s\n", solutionBuffer);
}
return;
}
// This is a simple form of likelihood check: Fail
// if we have more words already than we think is likely.
// (I.e., avoid a solution composed of many small words)
if (words > wordsLimit) {
return;
}
// If this cipher character does not have a tentative
// solution yet, then try the possibilities
if (solution[cipher[0]] == 0) {
for (c = 0; c <= 0xff && notSolvedFor; ++c) {
if (solution[c] == this->_letter) {
notSolvedFor = false;
}
}
if (notSolvedFor) {
solution[cipher[0]] = this->_letter;
solutionBuffer[solutionBufferPointer] = this->_letter;
// Try the next letter of the cipher
if (this->_successor != NULL) {
this->_successor->Solve
(
solution,
&cipher[1],
top,
topCipher,
words,
wordsLimit,
solutionBuffer,
solutionBufferPointer + 1
);
}
// If this letter can be terminal, try going back to the top
if (this->_canBeTerminal) {
solutionBuffer[solutionBufferPointer + 1] = ' ';
top->Solve
(
solution,
&cipher[1],
top,
topCipher,
words + 1,
wordsLimit,
solutionBuffer,
solutionBufferPointer + 2
);
}
solution[cipher[0]] = 0;
}
// Try the next letter of the lexicon
if (this->_next != NULL) {
this->_next->Solve
(
solution,
cipher,
top,
topCipher,
words,
wordsLimit,
solutionBuffer,
solutionBufferPointer
);
}
} else if (solution[cipher[0]] == this->_letter) {
solutionBuffer[solutionBufferPointer] = this->_letter;
// Try the next letter of the cipher
if (this->_successor != NULL) {
this->_successor->Solve
(
solution,
&cipher[1],
top,
topCipher,
words,
wordsLimit,
solutionBuffer,
solutionBufferPointer + 1
);
}
// If this letter can be terminal, try going back to the top
if (this->_canBeTerminal) {
solutionBuffer[solutionBufferPointer + 1] = ' ';
top->Solve
(
solution,
&cipher[1],
top,
topCipher,
words + 1,
wordsLimit,
solutionBuffer,
solutionBufferPointer + 2
);
}
} else {
// Try the next letter of the lexicon
if (this->_next != NULL) {
this->_next->Solve
(
solution,
cipher,
top,
topCipher,
words,
wordsLimit,
solutionBuffer,
solutionBufferPointer
);
}
}
}
// This can be used to prove that the lexicon will produce all
// possible valid words. It's really just for debugging
void PrintAllWords (char *buffer, int offset) {
buffer[offset] = this->_letter;
if (this->_canBeTerminal) {
buffer[offset + 1] = 0;
fprintf(stdout, "%s\n", buffer);
}
if (this->_successor != NULL) {
this->_successor->PrintAllWords(buffer, offset + 1);
}
if (this->_next != NULL) {
this->_next->PrintAllWords(buffer, offset);
}
}
};
// Convert a buffer value to lower case and remove non-alpha characters
void TrimBuffer(char *buffer) {
int i = 0;
int j = 0;
for (i = 0; buffer[i] != 0; ++i) {
if (buffer[i] >= 'A' && buffer[i] <= 'Z') {
buffer[i] += 'a' - 'A';
}
if (buffer[i] >= 'a' && buffer[i] <= 'z') {
buffer[j] = buffer[i];
++j;
}
}
buffer[j] = 0;
}
// Solve a cipher
void Solve(char *cipher) {
FILE *f = fopen("./all_english_words.txt", "r");
char buffer[128];
char *read;
char solution[0xff];
int c;
LexicalTreeElement *root = NULL;
int cipherLength;
int expectedWords;
char *solutionBuffer = new char[strlen(cipher) * 2];
// Abandon any solution that produces more than length / 4 words.
// Many possible nonsense solutions involve small words.
// Really, we need some linguistically-based likelihood checks.
cipherLength = strlen(cipher);
expectedWords = cipherLength <= 4 ? 1 : cipherLength / 4;
for (c = 0; c <= 0xff; ++c) {
solution[c] = 0;
}
solution[' '] = ' ';
solution['\''] = '\'';
if (f == NULL || ferror(f)) {
fprintf(stderr, "Unable to open lexicon file\n");
return;
}
while (! feof(f)) {
read = fgets(buffer, 128, f);
if (read != NULL) {
TrimBuffer(buffer);
if (buffer[0] != 0) {
if (root == NULL) {
root = new LexicalTreeElement(buffer[0]);
}
root->AddWord(buffer);
}
}
}
root->Solve
(
solution,
cipher,
root,
cipher,
0,
expectedWords,
solutionBuffer,
0
);
delete[] solutionBuffer;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment