Created
April 7, 2018 19:36
-
-
Save sorrge/07972dcbc4e804a6979bbc3d260c7073 to your computer and use it in GitHub Desktop.
Decipher a double substitution with a shift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <codecvt> | |
#include <string> | |
#include <sstream> | |
#include <unordered_set> | |
#include <unordered_map> | |
#include <set> | |
#include <vector> | |
#include <algorithm> | |
using namespace std; | |
wstring read_file(const string& fn) { | |
wifstream file(fn); | |
std::wstringstream wss; | |
wss << file.rdbuf(); | |
return wss.str(); | |
} | |
typedef vector<int> str; | |
class alpha { | |
unordered_map<wchar_t, int> charmap; | |
wstring alphabet; | |
public: | |
alpha(wstring& alphabet) : alphabet(alphabet) { | |
for(int i = 0; i < (int)alphabet.size(); ++i) | |
charmap[alphabet[i]] = i; | |
} | |
str to_num(wstring& s) { | |
str res(s.size()); | |
for(int i = 0; i < (int)s.size(); ++i) | |
res[i] = charmap[s[i]]; | |
return res; | |
} | |
wstring to_str(str& v) { | |
wstring res(v.size(), ' '); | |
for(int i = 0; i < (int)v.size(); ++i) | |
res[i] = alphabet[v[i]]; | |
return res; | |
} | |
int size() { | |
return (int)alphabet.size(); | |
} | |
}; | |
class bigram_model { | |
vector<str> counts; | |
public: | |
bigram_model(str& corpus, int size) : counts(size) { | |
for(int i = 0; i < size; ++i) | |
counts[i].resize(size, 0); | |
for(int i = 0; i < (int)corpus.size() - 1; ++i) | |
++counts[corpus[i]][corpus[i + 1]]; | |
} | |
int score(str& s) { | |
int c = 0; | |
for(int i = 0; i < (int)s.size() - 1; ++i) | |
c += counts[s[i]][s[i + 1]]; | |
return c; | |
} | |
}; | |
class trigram_model { | |
vector<vector<str>> counts; | |
public: | |
trigram_model(str& corpus, int size) : counts(size) { | |
for(int i = 0; i < size; ++i) { | |
counts[i].resize(size); | |
for(int j = 0; j < size; ++j) | |
counts[i][j].resize(size, 0); | |
} | |
for(int i = 0; i < (int)corpus.size() - 2; ++i) | |
++counts[corpus[i]][corpus[i + 1]][corpus[i + 2]]; | |
} | |
int score(str& s) { | |
int c = 0; | |
for(int i = 0; i < (int)s.size() - 2; ++i) | |
c += counts[s[i]][s[i + 1]][s[i + 2]]; | |
return c; | |
} | |
}; | |
void decode_shifts(str& s, int shift, int size) { | |
for(int i = 0; i < (int)s.size(); ++i) | |
s[i] = ((s[i] - shift * i) % size + size) % size; | |
} | |
void substitute(str& s, str& subs) { | |
for(int& c : s) | |
c = subs[c]; | |
} | |
void decode_1(str& s, str& subs, int shift, int size) { | |
substitute(s, subs); | |
decode_shifts(s, shift, size); | |
} | |
void decode_2(str& s, pair<str, str>& subs2, int shift, int size) { | |
substitute(s, subs2.first); | |
decode_shifts(s, shift, size); | |
substitute(s, subs2.second); | |
} | |
void mutate_subs(str& subs) { | |
int n = rand() % 2 + 1; | |
for(int i = 0; i < n; ++i) { | |
int idx1 = rand() % subs.size(); | |
int idx2 = (idx1 + 1 + rand() % (subs.size() - 1)) % subs.size(); | |
swap(subs[idx1], subs[idx2]); | |
} | |
} | |
str used; | |
void crossover_subs(str& child, str& parent1, str parent2) { | |
int size = (int)parent1.size(); | |
used.resize(size); | |
fill_n(used.begin(), size, 0); | |
for(int i = 0; i < size; ++i) { | |
int chosen = rand() % 2 == 0 ? parent1[i] : parent2[i]; | |
++used[chosen]; | |
child[i] = chosen; | |
} | |
auto unused = find(used.begin(), used.end(), 0); | |
for(int i = 0; i < size && unused != used.end(); ++i) | |
if (used[child[i]] > 1) { | |
--used[child[i]]; | |
int c = (int)distance(used.begin(), unused); | |
child[i] = c; | |
++used[c]; | |
unused = find(unused + 1, used.end(), 0); | |
} | |
} | |
int main(int, char *[]) { | |
std::locale::global (std::locale ("")); | |
wstring cipher = read_file("../cipher"); | |
wcout << cipher << endl; | |
wcout << "cipher size: " << cipher.size() << endl; | |
set<wchar_t> used_chars(cipher.begin(), cipher.end()); | |
wstring alphabet(used_chars.begin(), used_chars.end()); | |
wcout << "alphabet: " << alphabet << "\t" << alphabet.size() << " chars" << endl; | |
wstring wp = read_file("../wp_pp.txt"); | |
wcout << "corpus: " << wp.size() << endl; | |
alpha al(alphabet); | |
auto ciph = al.to_num(cipher); | |
auto corp = al.to_num(wp); | |
bigram_model bg(corp, al.size()); | |
trigram_model tg(corp, al.size()); | |
int best_score = bg.score(ciph); | |
wcout << "Init score: " << best_score << endl; | |
int pop_size = 10000, elite = 500, select = 2000; | |
typedef pair<str, str> org; | |
org seed = make_pair(al.to_num(alphabet), al.to_num(alphabet)); | |
vector<org> population(pop_size, seed), new_population(population); | |
str scores(pop_size); | |
str order(pop_size); | |
for(int i = 0; i < pop_size; ++i) { | |
mutate_subs(population[i].first); | |
mutate_subs(population[i].second); | |
order[i] = i; | |
} | |
int key2 = 8 * 3; | |
for(int gen = 0;; ++gen) { | |
#pragma omp parallel for | |
for(int i = 0; i < pop_size; ++i) { | |
str cc = ciph; | |
decode_2(cc, population[i], key2, al.size()); | |
scores[i] = bg.score(cc); | |
} | |
sort(order.begin(), order.end(), [&scores](int i1, int i2) { return scores[i1] > scores[i2]; }); | |
if (scores[order[0]] > best_score) { | |
best_score = scores[order[0]]; | |
wcout << endl; | |
wcout << "New best: " << best_score << endl; | |
str cc = ciph; | |
decode_2(cc, population[order[0]], key2, al.size()); | |
wcout << al.to_str(cc) << endl << endl << endl; | |
} | |
for(int i = 0; i < elite; ++i) | |
new_population[i] = population[order[i]]; | |
for(int i = elite; i < pop_size / 2; ++i) { | |
new_population[i] = population[order[rand() % select]]; | |
mutate_subs(new_population[i].first); | |
mutate_subs(new_population[i].second); | |
} | |
for(int i = pop_size / 2; i < pop_size; ++i) { | |
int p1 = order[rand() % select], p2 = order[rand() % select]; | |
crossover_subs(new_population[i].first, population[p1].first, population[p2].first); | |
crossover_subs(new_population[i].second, population[p1].second, population[p2].second); | |
} | |
population.swap(new_population); | |
wcout << "."; | |
if (gen % 100 == 0) | |
wcout << gen; | |
wcout.flush(); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment