btawney/ShortCipherSolver.cpp

## ShortCipherSolver.cpp
// ShortCipherSolver.cpp

#include <stdio.h>
#include <string.h>

void SolutionFound(char *solution, char *cipher);
void TrimBuffer(char *buffer);
void Solve(char *cipher);

// Solve ciphers provided as arguments to the command
int main(int argc, char *argv[]) {
  int i;
  for (i = 1; i < argc; ++i) {
    if (i > 1) {
      fprintf(stdout, "\n");
    }
    fprintf(stdout, "%s\n", argv[i]);
    Solve(argv[i]);
  }
  return 0;
}

// This is called when a solution is found
void SolutionFound(char *solution, char *cipher) {
  int c;
  char *buffer = new char[strlen(cipher) + 1];

  for (c = 0; cipher[c] != 0; ++c) {
    buffer[c] = solution[cipher[c]];
  }
  fprintf(stdout, "%s\n", buffer);

  delete[] buffer;
}

// The lexical tree is basically a finite state transducer that
// can produce all words in the lexicon.  The root state represents
// the beginning of a word, the succeeding states are possible first
// letters.  Each subsequent state is a possible second letter, etc.
class LexicalTreeElement {
private:
  // Each state contains a letter
  char _letter;

  // This is true if the letter can be the last letter of a word
  bool _canBeTerminal;

  // The _next element is the next sibling of the current letter.
  LexicalTreeElement *_next;

  // The _successor is the first letter that could follow this letter
  // in a valid word
  LexicalTreeElement *_successor;
public:
  LexicalTreeElement(char letter) {
    this->_letter = letter;
    this->_canBeTerminal = false;
    this->_next = NULL;
    this->_successor = NULL;
  }
  ~LexicalTreeElement() {
    if (this->_successor != NULL) {
      delete this->_successor;
    }
    if (this->_next != NULL) {
      delete this->_next;
    }
  }

  // Add a word to the lexicon
  void AddWord (char *word) {
    if (word[0] == 0) {
      return;
    } else if (word[0] == this->_letter) {
      if (word[1] == 0) {
        this->_canBeTerminal = true;
      } else {
        if (this->_successor == NULL) {
          this->_successor = new LexicalTreeElement(word[1]);
        }
        this->_successor->AddWord(&word[1]);
      }
    } else {
      if (this->_next == NULL) {
        this->_next = new LexicalTreeElement(word[0]);
      }
      this->_next->AddWord(word);
    }
  }

  // Recursively solve a cipher
  void Solve
    (
      char *solution,
      char *cipher,
      LexicalTreeElement *top,
      char *topCipher,
      int words,
      int wordsLimit,
      char *solutionBuffer,
      int solutionBufferPointer
    ) {
    int c;
    bool notSolvedFor = true;

    // If we reach the end of the cipher, and we are
    // at the start of a word, then this is a valid solution
    if (cipher[0] == 0) {
      if (this == top) {
        solutionBuffer[solutionBufferPointer] = 0;
        fprintf(stdout, "%s\n", solutionBuffer);
      }
      return;
    }

    // This is a simple form of likelihood check:  Fail
    // if we have more words already than we think is likely.
    // (I.e., avoid a solution composed of many small words)
    if (words > wordsLimit) {
      return;
    }

    // If this cipher character does not have a tentative
    // solution yet, then try the possibilities
    if (solution[cipher[0]] == 0) {
      for (c = 0; c <= 0xff && notSolvedFor; ++c) {
        if (solution[c] == this->_letter) {
          notSolvedFor = false;
        }
      }

      if (notSolvedFor) {
        solution[cipher[0]] = this->_letter;
        solutionBuffer[solutionBufferPointer] = this->_letter;

        // Try the next letter of the cipher
        if (this->_successor != NULL) {
          this->_successor->Solve
            (
            solution,
            &cipher[1],
            top,
            topCipher,
            words,
            wordsLimit,
            solutionBuffer,
            solutionBufferPointer + 1
            );
        }

        // If this letter can be terminal, try going back to the top
        if (this->_canBeTerminal) {
          solutionBuffer[solutionBufferPointer + 1] = ' ';
          top->Solve
            (
            solution,
            &cipher[1],
            top,
            topCipher,
            words + 1,
            wordsLimit,
            solutionBuffer,
            solutionBufferPointer + 2
            );
        }

        solution[cipher[0]] = 0;
      }

      // Try the next letter of the lexicon
      if (this->_next != NULL) {
        this->_next->Solve
          (
          solution,
          cipher,
          top,
          topCipher,
          words,
          wordsLimit,
          solutionBuffer,
          solutionBufferPointer
          );
      }
    } else if (solution[cipher[0]] == this->_letter) {
      solutionBuffer[solutionBufferPointer] = this->_letter;

      // Try the next letter of the cipher
      if (this->_successor != NULL) {
        this->_successor->Solve
          (
          solution,
          &cipher[1],
          top,
          topCipher,
          words,
          wordsLimit,
          solutionBuffer,
          solutionBufferPointer + 1
          );
      }

      // If this letter can be terminal, try going back to the top
      if (this->_canBeTerminal) {
        solutionBuffer[solutionBufferPointer + 1] = ' ';
        top->Solve
          (
          solution,
          &cipher[1],
          top,
          topCipher,
          words + 1,
          wordsLimit,
          solutionBuffer,
          solutionBufferPointer + 2
          );
      }
    } else {
      // Try the next letter of the lexicon
      if (this->_next != NULL) {
        this->_next->Solve
          (
          solution,
          cipher,
          top,
          topCipher,
          words,
          wordsLimit,
          solutionBuffer,
          solutionBufferPointer
          );
      }
    }
  }

  // This can be used to prove that the lexicon will produce all
  // possible valid words.  It's really just for debugging
  void PrintAllWords (char *buffer, int offset) {
    buffer[offset] = this->_letter;

    if (this->_canBeTerminal) {
      buffer[offset + 1] = 0;
      fprintf(stdout, "%s\n", buffer);
    }
    if (this->_successor != NULL) {
      this->_successor->PrintAllWords(buffer, offset + 1);
    }
    if (this->_next != NULL) {
      this->_next->PrintAllWords(buffer, offset);
    }
  }
};

// Convert a buffer value to lower case and remove non-alpha characters
void TrimBuffer(char *buffer) {
  int i = 0;
  int j = 0;

  for (i = 0; buffer[i] != 0; ++i) {
    if (buffer[i] >= 'A' && buffer[i] <= 'Z') {
      buffer[i] += 'a' - 'A';
    }
    if (buffer[i] >= 'a' && buffer[i] <= 'z') {
      buffer[j] = buffer[i];
      ++j;
    }
  }
  buffer[j] = 0;
}

// Solve a cipher
void Solve(char *cipher) {
  FILE *f = fopen("./all_english_words.txt", "r");
  char buffer[128];
  char *read;
  char solution[0xff];
  int c;
  LexicalTreeElement *root = NULL;
  int cipherLength;
  int expectedWords;
  char *solutionBuffer = new char[strlen(cipher) * 2];

  // Abandon any solution that produces more than length / 4 words.
  // Many possible nonsense solutions involve small words.
  // Really, we need some linguistically-based likelihood checks.
  cipherLength = strlen(cipher);
  expectedWords = cipherLength <= 4 ? 1 : cipherLength / 4;

  for (c = 0; c <= 0xff; ++c) {
    solution[c] = 0;
  }

  solution[' '] = ' ';
  solution['\''] = '\'';

  if (f == NULL || ferror(f)) {
    fprintf(stderr, "Unable to open lexicon file\n");
    return;
  }
  while (! feof(f)) {
    read = fgets(buffer, 128, f);

    if (read != NULL) {
      TrimBuffer(buffer);

      if (buffer[0] != 0) {
        if (root == NULL) {
          root = new LexicalTreeElement(buffer[0]);
        }

        root->AddWord(buffer);
      }
    }
  }

  root->Solve
    (
    solution,
    cipher,
    root,
    cipher,
    0,
    expectedWords,
    solutionBuffer,
    0
    );

  delete[] solutionBuffer;
}
	// ShortCipherSolver.cpp

	#include <stdio.h>
	#include <string.h>

	void SolutionFound(char solution, char cipher);
	void TrimBuffer(char *buffer);
	void Solve(char *cipher);

	// Solve ciphers provided as arguments to the command
	int main(int argc, char *argv[]) {
	int i;
	for (i = 1; i < argc; ++i) {
	if (i > 1) {
	fprintf(stdout, "\n");
	}
	fprintf(stdout, "%s\n", argv[i]);
	Solve(argv[i]);
	}
	return 0;
	}

	// This is called when a solution is found
	void SolutionFound(char solution, char cipher) {
	int c;
	char *buffer = new char[strlen(cipher) + 1];

	for (c = 0; cipher[c] != 0; ++c) {
	buffer[c] = solution[cipher[c]];
	}
	fprintf(stdout, "%s\n", buffer);

	delete[] buffer;
	}

	// The lexical tree is basically a finite state transducer that
	// can produce all words in the lexicon. The root state represents
	// the beginning of a word, the succeeding states are possible first
	// letters. Each subsequent state is a possible second letter, etc.
	class LexicalTreeElement {
	private:
	// Each state contains a letter
	char _letter;

	// This is true if the letter can be the last letter of a word
	bool _canBeTerminal;

	// The _next element is the next sibling of the current letter.
	LexicalTreeElement *_next;

	// The _successor is the first letter that could follow this letter
	// in a valid word
	LexicalTreeElement *_successor;
	public:
	LexicalTreeElement(char letter) {
	this->_letter = letter;
	this->_canBeTerminal = false;
	this->_next = NULL;
	this->_successor = NULL;
	}
	~LexicalTreeElement() {
	if (this->_successor != NULL) {
	delete this->_successor;
	}
	if (this->_next != NULL) {
	delete this->_next;
	}
	}

	// Add a word to the lexicon
	void AddWord (char *word) {
	if (word[0] == 0) {
	return;
	} else if (word[0] == this->_letter) {
	if (word[1] == 0) {
	this->_canBeTerminal = true;
	} else {
	if (this->_successor == NULL) {
	this->_successor = new LexicalTreeElement(word[1]);
	}
	this->_successor->AddWord(&word[1]);
	}
	} else {
	if (this->_next == NULL) {
	this->_next = new LexicalTreeElement(word[0]);
	}
	this->_next->AddWord(word);
	}
	}

	// Recursively solve a cipher
	void Solve
	(
	char *solution,
	char *cipher,
	LexicalTreeElement *top,
	char *topCipher,
	int words,
	int wordsLimit,
	char *solutionBuffer,
	int solutionBufferPointer
	) {
	int c;
	bool notSolvedFor = true;

	// If we reach the end of the cipher, and we are
	// at the start of a word, then this is a valid solution
	if (cipher[0] == 0) {
	if (this == top) {
	solutionBuffer[solutionBufferPointer] = 0;
	fprintf(stdout, "%s\n", solutionBuffer);
	}
	return;
	}

	// This is a simple form of likelihood check: Fail
	// if we have more words already than we think is likely.
	// (I.e., avoid a solution composed of many small words)
	if (words > wordsLimit) {
	return;
	}

	// If this cipher character does not have a tentative
	// solution yet, then try the possibilities
	if (solution[cipher[0]] == 0) {
	for (c = 0; c <= 0xff && notSolvedFor; ++c) {
	if (solution[c] == this->_letter) {
	notSolvedFor = false;
	}
	}

	if (notSolvedFor) {
	solution[cipher[0]] = this->_letter;
	solutionBuffer[solutionBufferPointer] = this->_letter;

	// Try the next letter of the cipher
	if (this->_successor != NULL) {
	this->_successor->Solve
	(
	solution,
	&cipher[1],
	top,
	topCipher,
	words,
	wordsLimit,
	solutionBuffer,
	solutionBufferPointer + 1
	);
	}

	// If this letter can be terminal, try going back to the top
	if (this->_canBeTerminal) {
	solutionBuffer[solutionBufferPointer + 1] = ' ';
	top->Solve
	(
	solution,
	&cipher[1],
	top,
	topCipher,
	words + 1,
	wordsLimit,
	solutionBuffer,
	solutionBufferPointer + 2
	);
	}

	solution[cipher[0]] = 0;
	}

	// Try the next letter of the lexicon
	if (this->_next != NULL) {
	this->_next->Solve
	(
	solution,
	cipher,
	top,
	topCipher,
	words,
	wordsLimit,
	solutionBuffer,
	solutionBufferPointer
	);
	}
	} else if (solution[cipher[0]] == this->_letter) {
	solutionBuffer[solutionBufferPointer] = this->_letter;

	// Try the next letter of the cipher
	if (this->_successor != NULL) {
	this->_successor->Solve
	(
	solution,
	&cipher[1],
	top,
	topCipher,
	words,
	wordsLimit,
	solutionBuffer,
	solutionBufferPointer + 1
	);
	}

	// If this letter can be terminal, try going back to the top
	if (this->_canBeTerminal) {
	solutionBuffer[solutionBufferPointer + 1] = ' ';
	top->Solve
	(
	solution,
	&cipher[1],
	top,
	topCipher,
	words + 1,
	wordsLimit,
	solutionBuffer,
	solutionBufferPointer + 2
	);
	}
	} else {
	// Try the next letter of the lexicon
	if (this->_next != NULL) {
	this->_next->Solve
	(
	solution,
	cipher,
	top,
	topCipher,
	words,
	wordsLimit,
	solutionBuffer,
	solutionBufferPointer
	);
	}
	}
	}

	// This can be used to prove that the lexicon will produce all
	// possible valid words. It's really just for debugging
	void PrintAllWords (char *buffer, int offset) {
	buffer[offset] = this->_letter;

	if (this->_canBeTerminal) {
	buffer[offset + 1] = 0;
	fprintf(stdout, "%s\n", buffer);
	}
	if (this->_successor != NULL) {
	this->_successor->PrintAllWords(buffer, offset + 1);
	}
	if (this->_next != NULL) {
	this->_next->PrintAllWords(buffer, offset);
	}
	}
	};

	// Convert a buffer value to lower case and remove non-alpha characters
	void TrimBuffer(char *buffer) {
	int i = 0;
	int j = 0;

	for (i = 0; buffer[i] != 0; ++i) {
	if (buffer[i] >= 'A' && buffer[i] <= 'Z') {
	buffer[i] += 'a' - 'A';
	}
	if (buffer[i] >= 'a' && buffer[i] <= 'z') {
	buffer[j] = buffer[i];
	++j;
	}
	}
	buffer[j] = 0;
	}

	// Solve a cipher
	void Solve(char *cipher) {
	FILE *f = fopen("./all_english_words.txt", "r");
	char buffer[128];
	char *read;
	char solution[0xff];
	int c;
	LexicalTreeElement *root = NULL;
	int cipherLength;
	int expectedWords;
	char solutionBuffer = new char[strlen(cipher) 2];

	// Abandon any solution that produces more than length / 4 words.
	// Many possible nonsense solutions involve small words.
	// Really, we need some linguistically-based likelihood checks.
	cipherLength = strlen(cipher);
	expectedWords = cipherLength <= 4 ? 1 : cipherLength / 4;

	for (c = 0; c <= 0xff; ++c) {
	solution[c] = 0;
	}

	solution[' '] = ' ';
	solution['\''] = '\'';

	if (f == NULL \|\| ferror(f)) {
	fprintf(stderr, "Unable to open lexicon file\n");
	return;
	}
	while (! feof(f)) {
	read = fgets(buffer, 128, f);

	if (read != NULL) {
	TrimBuffer(buffer);

	if (buffer[0] != 0) {
	if (root == NULL) {
	root = new LexicalTreeElement(buffer[0]);
	}

	root->AddWord(buffer);
	}
	}
	}

	root->Solve
	(
	solution,
	cipher,
	root,
	cipher,
	0,
	expectedWords,
	solutionBuffer,
	0
	);

	delete[] solutionBuffer;
	}