Skip to content

Instantly share code, notes, and snippets.

@sorrge
Created April 7, 2018 19:36
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sorrge/07972dcbc4e804a6979bbc3d260c7073 to your computer and use it in GitHub Desktop.
Save sorrge/07972dcbc4e804a6979bbc3d260c7073 to your computer and use it in GitHub Desktop.
Decipher a double substitution with a shift
#include <iostream>
#include <fstream>
#include <codecvt>
#include <string>
#include <sstream>
#include <unordered_set>
#include <unordered_map>
#include <set>
#include <vector>
#include <algorithm>
using namespace std;
wstring read_file(const string& fn) {
wifstream file(fn);
std::wstringstream wss;
wss << file.rdbuf();
return wss.str();
}
typedef vector<int> str;
class alpha {
unordered_map<wchar_t, int> charmap;
wstring alphabet;
public:
alpha(wstring& alphabet) : alphabet(alphabet) {
for(int i = 0; i < (int)alphabet.size(); ++i)
charmap[alphabet[i]] = i;
}
str to_num(wstring& s) {
str res(s.size());
for(int i = 0; i < (int)s.size(); ++i)
res[i] = charmap[s[i]];
return res;
}
wstring to_str(str& v) {
wstring res(v.size(), ' ');
for(int i = 0; i < (int)v.size(); ++i)
res[i] = alphabet[v[i]];
return res;
}
int size() {
return (int)alphabet.size();
}
};
class bigram_model {
vector<str> counts;
public:
bigram_model(str& corpus, int size) : counts(size) {
for(int i = 0; i < size; ++i)
counts[i].resize(size, 0);
for(int i = 0; i < (int)corpus.size() - 1; ++i)
++counts[corpus[i]][corpus[i + 1]];
}
int score(str& s) {
int c = 0;
for(int i = 0; i < (int)s.size() - 1; ++i)
c += counts[s[i]][s[i + 1]];
return c;
}
};
class trigram_model {
vector<vector<str>> counts;
public:
trigram_model(str& corpus, int size) : counts(size) {
for(int i = 0; i < size; ++i) {
counts[i].resize(size);
for(int j = 0; j < size; ++j)
counts[i][j].resize(size, 0);
}
for(int i = 0; i < (int)corpus.size() - 2; ++i)
++counts[corpus[i]][corpus[i + 1]][corpus[i + 2]];
}
int score(str& s) {
int c = 0;
for(int i = 0; i < (int)s.size() - 2; ++i)
c += counts[s[i]][s[i + 1]][s[i + 2]];
return c;
}
};
void decode_shifts(str& s, int shift, int size) {
for(int i = 0; i < (int)s.size(); ++i)
s[i] = ((s[i] - shift * i) % size + size) % size;
}
void substitute(str& s, str& subs) {
for(int& c : s)
c = subs[c];
}
void decode_1(str& s, str& subs, int shift, int size) {
substitute(s, subs);
decode_shifts(s, shift, size);
}
void decode_2(str& s, pair<str, str>& subs2, int shift, int size) {
substitute(s, subs2.first);
decode_shifts(s, shift, size);
substitute(s, subs2.second);
}
void mutate_subs(str& subs) {
int n = rand() % 2 + 1;
for(int i = 0; i < n; ++i) {
int idx1 = rand() % subs.size();
int idx2 = (idx1 + 1 + rand() % (subs.size() - 1)) % subs.size();
swap(subs[idx1], subs[idx2]);
}
}
str used;
void crossover_subs(str& child, str& parent1, str parent2) {
int size = (int)parent1.size();
used.resize(size);
fill_n(used.begin(), size, 0);
for(int i = 0; i < size; ++i) {
int chosen = rand() % 2 == 0 ? parent1[i] : parent2[i];
++used[chosen];
child[i] = chosen;
}
auto unused = find(used.begin(), used.end(), 0);
for(int i = 0; i < size && unused != used.end(); ++i)
if (used[child[i]] > 1) {
--used[child[i]];
int c = (int)distance(used.begin(), unused);
child[i] = c;
++used[c];
unused = find(unused + 1, used.end(), 0);
}
}
int main(int, char *[]) {
std::locale::global (std::locale (""));
wstring cipher = read_file("../cipher");
wcout << cipher << endl;
wcout << "cipher size: " << cipher.size() << endl;
set<wchar_t> used_chars(cipher.begin(), cipher.end());
wstring alphabet(used_chars.begin(), used_chars.end());
wcout << "alphabet: " << alphabet << "\t" << alphabet.size() << " chars" << endl;
wstring wp = read_file("../wp_pp.txt");
wcout << "corpus: " << wp.size() << endl;
alpha al(alphabet);
auto ciph = al.to_num(cipher);
auto corp = al.to_num(wp);
bigram_model bg(corp, al.size());
trigram_model tg(corp, al.size());
int best_score = bg.score(ciph);
wcout << "Init score: " << best_score << endl;
int pop_size = 10000, elite = 500, select = 2000;
typedef pair<str, str> org;
org seed = make_pair(al.to_num(alphabet), al.to_num(alphabet));
vector<org> population(pop_size, seed), new_population(population);
str scores(pop_size);
str order(pop_size);
for(int i = 0; i < pop_size; ++i) {
mutate_subs(population[i].first);
mutate_subs(population[i].second);
order[i] = i;
}
int key2 = 8 * 3;
for(int gen = 0;; ++gen) {
#pragma omp parallel for
for(int i = 0; i < pop_size; ++i) {
str cc = ciph;
decode_2(cc, population[i], key2, al.size());
scores[i] = bg.score(cc);
}
sort(order.begin(), order.end(), [&scores](int i1, int i2) { return scores[i1] > scores[i2]; });
if (scores[order[0]] > best_score) {
best_score = scores[order[0]];
wcout << endl;
wcout << "New best: " << best_score << endl;
str cc = ciph;
decode_2(cc, population[order[0]], key2, al.size());
wcout << al.to_str(cc) << endl << endl << endl;
}
for(int i = 0; i < elite; ++i)
new_population[i] = population[order[i]];
for(int i = elite; i < pop_size / 2; ++i) {
new_population[i] = population[order[rand() % select]];
mutate_subs(new_population[i].first);
mutate_subs(new_population[i].second);
}
for(int i = pop_size / 2; i < pop_size; ++i) {
int p1 = order[rand() % select], p2 = order[rand() % select];
crossover_subs(new_population[i].first, population[p1].first, population[p2].first);
crossover_subs(new_population[i].second, population[p1].second, population[p2].second);
}
population.swap(new_population);
wcout << ".";
if (gen % 100 == 0)
wcout << gen;
wcout.flush();
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment