Skip to content

Instantly share code, notes, and snippets.

@fluffy-critter
Created April 2, 2016 23:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fluffy-critter/0fe944d41d182d134dd6e2d2f3b7d3b6 to your computer and use it in GitHub Desktop.
Save fluffy-critter/0fe944d41d182d134dd6e2d2f3b7d3b6 to your computer and use it in GitHub Desktop.
A really dumb genetic algorithm for optimizing hash coefficients
#include <iostream>
#include <map>
#include <vector>
#include <string>
#include <algorithm>
#include <random>
#include <chrono>
#include <thread>
#include <mutex>
namespace {
auto rng = std::default_random_engine(std::chrono::system_clock::now().time_since_epoch().count());
}
int h(const char* w, const char* c, int s) {unsigned t = 0, p = 0; while (*w) { t = c[p++ % s] * t + *w++; } return t % 16777213;}
struct Algo {
std::string coef;
Algo() {}
Algo(std::string c): coef(c) {}
bool operator<(const Algo& rhs) const {
return coef.size() < rhs.coef.size()
|| (coef.size() == rhs.coef.size() && coef < rhs.coef);
}
};
int score(const std::vector<std::string>& words, const Algo& alg)
{
std::map<int, int> shared;
for (auto w : words) {
shared[h(w.c_str(), alg.coef.c_str(), alg.coef.size())]++;
}
int count = 0;
for (auto c : shared) {
if ((c.first & 0xFFFFFF) != c.first) { std::cerr << "invalid hash: " << c.first << std::endl; }
if (c.second > 1) { count += c.second; }
}
return count;
}
Algo breed(const Algo& a1, const Algo& a2)
{
std::string coef;
if ((rng() & 3) == 1) {
// concat
coef = a1.coef + a2.coef;
size_t len = std::min(coef.size(), size_t(56));
if (len < coef.size()) {
coef = coef.substr(rng() % (coef.size() - len + 1), len);
}
} else {
// splice
const std::string& s1 = a1.coef, &s2 = a2.coef;
size_t sz = std::min(s1.size(), s2.size());
// Find the codon substring
std::string c1, c2;
c1 = s1.substr(rng() % (s1.length() - sz + 1), sz);
c2 = s2.substr(rng() % (s2.length() - sz + 1), sz);
// Breed 'em
for (size_t k = 0; k < sz; k++) {
coef += ((rng() & 127) ? c1 : c2)[k];
}
}
// Randomly swap some codons
for (int n = rng() % coef.size(); n >= 0; n--) {
std::swap(coef[rng() % coef.size()], coef[rng() % coef.size()]);
}
// Randomly mutate some codons
for (int n = rng() % coef.size(); n >= 0; n--) {
coef[rng() % coef.size()] ^= 1 << (rng() & 7);
}
// Ensure printability
for (char& c : coef) {
while (!isprint(c)) {
c ^= 1 << (rng() & 7);
}
}
return Algo(coef);
}
int main(void)
{
std::map<int, int> shared;
std::vector<std::pair<int, Algo>> scores;
std::vector<std::string> words;
std::mutex scoreLock;
{
std::string s;
while (std::cin >> s) {
words.push_back(s);
}
std::cout << "Loaded " << words.size() << " words" << std::endl;
}
{
std::string seeds[] = {"%)+/15;=CGIOSYaegkmqy", // primes
"*",
"5euy",
"abcdefghijklmnopqrstuvwxyz",
"12489301940139041904190841",
"jnmrhfsxdluyinhsiobxgvpftcywtkjeLzemvpoaqgqbuaw",
"9YN",
"~kgkloK",
"Y9_",
"5/5;=CGI",
"*nt",
};
for (auto s : seeds) {
Algo a(s);
int sc = score(words, a);
std::cout << s << " -> " << sc << std::endl;
scores.emplace_back(sc, a);
}
std::sort(scores.begin(), scores.end());
}
std::cout << "Starting best score: '" << scores.front().second.coef << "' -> " << scores.front().first << std::endl;
std::vector<std::thread> threads;
for (int i = 0; i < std::thread::hardware_concurrency(); i++) {
threads.emplace_back([&]() {
for (;;) {
Algo a1, a2;
{
std::lock_guard<std::mutex> lock(scoreLock);
size_t ofs = rng() % scores.size();
a1 = scores[ofs].second;
a2 = scores[rand() % (ofs + 1)].second;
}
Algo a = breed(a1, a2);
if (a.coef.size()) {
int ns = score(words, a);
std::cout << ns << ' ' << a.coef.substr(0, 8) << '(' << a.coef.size() << ") \r" << std::flush;
{
std::lock_guard<std::mutex> lock(scoreLock);
scores.emplace_back(ns, a);
if (ns < scores.front().first
|| (ns == scores.front().first && a.coef.size() < scores.front().second.coef.size())) {
std::cout << "*** New record: \"" << a.coef << "\" -> " << ns << std::endl;
}
std::sort(scores.begin(), scores.end());
scores.resize(std::min(size_t(65535), scores.size()));
}
}
}
});
}
for (;;) {
std::this_thread::sleep_for(std::chrono::seconds(5));
{
std::lock_guard<std::mutex> lock(scoreLock);
std::cout << "Current record: \"" << scores.front().second.coef << "\" -> " << scores.front().first << std::endl;
}
}
for (auto& t : threads) {
t.join();
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment