Last active
December 15, 2015 06:28
-
-
Save btawney/5216150 to your computer and use it in GitHub Desktop.
Short ciphers are relatively immune to a frequency analysis attack, but if they are long enough they can be solved with this approach, which checks all possible solutions given a lexicon of possible words and the pattern of repeated characters in the cipher. This approach would be vastly improved by adding some likelihood checks that would cause…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ShortCipherSolver.cpp | |
#include <stdio.h> | |
#include <string.h> | |
void SolutionFound(char *solution, char *cipher); | |
void TrimBuffer(char *buffer); | |
void Solve(char *cipher); | |
// Solve ciphers provided as arguments to the command | |
int main(int argc, char *argv[]) { | |
int i; | |
for (i = 1; i < argc; ++i) { | |
if (i > 1) { | |
fprintf(stdout, "\n"); | |
} | |
fprintf(stdout, "%s\n", argv[i]); | |
Solve(argv[i]); | |
} | |
return 0; | |
} | |
// This is called when a solution is found | |
void SolutionFound(char *solution, char *cipher) { | |
int c; | |
char *buffer = new char[strlen(cipher) + 1]; | |
for (c = 0; cipher[c] != 0; ++c) { | |
buffer[c] = solution[cipher[c]]; | |
} | |
fprintf(stdout, "%s\n", buffer); | |
delete[] buffer; | |
} | |
// The lexical tree is basically a finite state transducer that | |
// can produce all words in the lexicon. The root state represents | |
// the beginning of a word, the succeeding states are possible first | |
// letters. Each subsequent state is a possible second letter, etc. | |
class LexicalTreeElement { | |
private: | |
// Each state contains a letter | |
char _letter; | |
// This is true if the letter can be the last letter of a word | |
bool _canBeTerminal; | |
// The _next element is the next sibling of the current letter. | |
LexicalTreeElement *_next; | |
// The _successor is the first letter that could follow this letter | |
// in a valid word | |
LexicalTreeElement *_successor; | |
public: | |
LexicalTreeElement(char letter) { | |
this->_letter = letter; | |
this->_canBeTerminal = false; | |
this->_next = NULL; | |
this->_successor = NULL; | |
} | |
~LexicalTreeElement() { | |
if (this->_successor != NULL) { | |
delete this->_successor; | |
} | |
if (this->_next != NULL) { | |
delete this->_next; | |
} | |
} | |
// Add a word to the lexicon | |
void AddWord (char *word) { | |
if (word[0] == 0) { | |
return; | |
} else if (word[0] == this->_letter) { | |
if (word[1] == 0) { | |
this->_canBeTerminal = true; | |
} else { | |
if (this->_successor == NULL) { | |
this->_successor = new LexicalTreeElement(word[1]); | |
} | |
this->_successor->AddWord(&word[1]); | |
} | |
} else { | |
if (this->_next == NULL) { | |
this->_next = new LexicalTreeElement(word[0]); | |
} | |
this->_next->AddWord(word); | |
} | |
} | |
// Recursively solve a cipher | |
void Solve | |
( | |
char *solution, | |
char *cipher, | |
LexicalTreeElement *top, | |
char *topCipher, | |
int words, | |
int wordsLimit, | |
char *solutionBuffer, | |
int solutionBufferPointer | |
) { | |
int c; | |
bool notSolvedFor = true; | |
// If we reach the end of the cipher, and we are | |
// at the start of a word, then this is a valid solution | |
if (cipher[0] == 0) { | |
if (this == top) { | |
solutionBuffer[solutionBufferPointer] = 0; | |
fprintf(stdout, "%s\n", solutionBuffer); | |
} | |
return; | |
} | |
// This is a simple form of likelihood check: Fail | |
// if we have more words already than we think is likely. | |
// (I.e., avoid a solution composed of many small words) | |
if (words > wordsLimit) { | |
return; | |
} | |
// If this cipher character does not have a tentative | |
// solution yet, then try the possibilities | |
if (solution[cipher[0]] == 0) { | |
for (c = 0; c <= 0xff && notSolvedFor; ++c) { | |
if (solution[c] == this->_letter) { | |
notSolvedFor = false; | |
} | |
} | |
if (notSolvedFor) { | |
solution[cipher[0]] = this->_letter; | |
solutionBuffer[solutionBufferPointer] = this->_letter; | |
// Try the next letter of the cipher | |
if (this->_successor != NULL) { | |
this->_successor->Solve | |
( | |
solution, | |
&cipher[1], | |
top, | |
topCipher, | |
words, | |
wordsLimit, | |
solutionBuffer, | |
solutionBufferPointer + 1 | |
); | |
} | |
// If this letter can be terminal, try going back to the top | |
if (this->_canBeTerminal) { | |
solutionBuffer[solutionBufferPointer + 1] = ' '; | |
top->Solve | |
( | |
solution, | |
&cipher[1], | |
top, | |
topCipher, | |
words + 1, | |
wordsLimit, | |
solutionBuffer, | |
solutionBufferPointer + 2 | |
); | |
} | |
solution[cipher[0]] = 0; | |
} | |
// Try the next letter of the lexicon | |
if (this->_next != NULL) { | |
this->_next->Solve | |
( | |
solution, | |
cipher, | |
top, | |
topCipher, | |
words, | |
wordsLimit, | |
solutionBuffer, | |
solutionBufferPointer | |
); | |
} | |
} else if (solution[cipher[0]] == this->_letter) { | |
solutionBuffer[solutionBufferPointer] = this->_letter; | |
// Try the next letter of the cipher | |
if (this->_successor != NULL) { | |
this->_successor->Solve | |
( | |
solution, | |
&cipher[1], | |
top, | |
topCipher, | |
words, | |
wordsLimit, | |
solutionBuffer, | |
solutionBufferPointer + 1 | |
); | |
} | |
// If this letter can be terminal, try going back to the top | |
if (this->_canBeTerminal) { | |
solutionBuffer[solutionBufferPointer + 1] = ' '; | |
top->Solve | |
( | |
solution, | |
&cipher[1], | |
top, | |
topCipher, | |
words + 1, | |
wordsLimit, | |
solutionBuffer, | |
solutionBufferPointer + 2 | |
); | |
} | |
} else { | |
// Try the next letter of the lexicon | |
if (this->_next != NULL) { | |
this->_next->Solve | |
( | |
solution, | |
cipher, | |
top, | |
topCipher, | |
words, | |
wordsLimit, | |
solutionBuffer, | |
solutionBufferPointer | |
); | |
} | |
} | |
} | |
// This can be used to prove that the lexicon will produce all | |
// possible valid words. It's really just for debugging | |
void PrintAllWords (char *buffer, int offset) { | |
buffer[offset] = this->_letter; | |
if (this->_canBeTerminal) { | |
buffer[offset + 1] = 0; | |
fprintf(stdout, "%s\n", buffer); | |
} | |
if (this->_successor != NULL) { | |
this->_successor->PrintAllWords(buffer, offset + 1); | |
} | |
if (this->_next != NULL) { | |
this->_next->PrintAllWords(buffer, offset); | |
} | |
} | |
}; | |
// Convert a buffer value to lower case and remove non-alpha characters | |
void TrimBuffer(char *buffer) { | |
int i = 0; | |
int j = 0; | |
for (i = 0; buffer[i] != 0; ++i) { | |
if (buffer[i] >= 'A' && buffer[i] <= 'Z') { | |
buffer[i] += 'a' - 'A'; | |
} | |
if (buffer[i] >= 'a' && buffer[i] <= 'z') { | |
buffer[j] = buffer[i]; | |
++j; | |
} | |
} | |
buffer[j] = 0; | |
} | |
// Solve a cipher | |
void Solve(char *cipher) { | |
FILE *f = fopen("./all_english_words.txt", "r"); | |
char buffer[128]; | |
char *read; | |
char solution[0xff]; | |
int c; | |
LexicalTreeElement *root = NULL; | |
int cipherLength; | |
int expectedWords; | |
char *solutionBuffer = new char[strlen(cipher) * 2]; | |
// Abandon any solution that produces more than length / 4 words. | |
// Many possible nonsense solutions involve small words. | |
// Really, we need some linguistically-based likelihood checks. | |
cipherLength = strlen(cipher); | |
expectedWords = cipherLength <= 4 ? 1 : cipherLength / 4; | |
for (c = 0; c <= 0xff; ++c) { | |
solution[c] = 0; | |
} | |
solution[' '] = ' '; | |
solution['\''] = '\''; | |
if (f == NULL || ferror(f)) { | |
fprintf(stderr, "Unable to open lexicon file\n"); | |
return; | |
} | |
while (! feof(f)) { | |
read = fgets(buffer, 128, f); | |
if (read != NULL) { | |
TrimBuffer(buffer); | |
if (buffer[0] != 0) { | |
if (root == NULL) { | |
root = new LexicalTreeElement(buffer[0]); | |
} | |
root->AddWord(buffer); | |
} | |
} | |
} | |
root->Solve | |
( | |
solution, | |
cipher, | |
root, | |
cipher, | |
0, | |
expectedWords, | |
solutionBuffer, | |
0 | |
); | |
delete[] solutionBuffer; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment