Created
January 25, 2021 14:18
-
-
Save Mivik/15fd4b903007fc25a9cd866e27337ca3 to your computer and use it in GitHub Desktop.
C++ program that computes sum of subword complexity of all binary strings of length n.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Requires: libgmp-dev | |
// Recommended compiler options: -Ofast -lgmp -lgmpxx | |
// Mivik 2021.1.25 | |
#include <cstdint> | |
#include <iostream> | |
#include <vector> | |
#include <gmpxx.h> | |
typedef mpq_class num; | |
const int N = 60; // maximum length of the string | |
const int C = 2; // size of the alphabet | |
namespace gen { // enumeration of autocorrelations | |
int n; | |
struct bit_vector { // a simple bit vector containing up to 64 bits | |
int v[2]; | |
inline bool test(int i) const { return v[i >> 5] & (1 << (i & 31)); } | |
inline void set(int i) { v[i >> 5] |= 1 << (i & 31); } | |
inline void reset(int i) { v[i >> 5] &= ~(1 << (i & 31)); } | |
inline void reset() { v[0] = v[1] = 0; } | |
inline bool contains(const bit_vector &t) const { return (v[0] & t.v[0]) == t.v[0] && (v[1] & t.v[1]) == t.v[1]; } | |
} vec; | |
struct info { | |
bit_vector vec; | |
int length; | |
int nfc; // Number of Free Characters | |
uint64_t pop; // population of this autocorrelation. | |
// it's easily seen that when n <= 60, the population will not exceed (2^64). | |
info(bit_vector vec, int length, int nfc, uint64_t pop): | |
vec(std::move(vec)), length(length), nfc(nfc), pop(pop) {} | |
}; | |
std::vector<info> rec; // records all the possible autocorrelations with a length <= N | |
int req[N + 1]; // if (req[i] != 0), we require `i` to be a period. | |
inline int gcd(int x, int y) { while (x %= y) std::swap(x, y); return y; } | |
inline uint64_t qpow(uint64_t x, int p) { | |
uint64_t ret = 1; | |
for (; p; p >>= 1, x *= x) if (p & 1) ret *= x; | |
return ret; | |
} | |
void dfs( | |
int o, // offset | |
int lst, // last small period. if the last period is not small then it's 0 | |
int cnt // number of free characters. computed dynamically. | |
) { | |
const int n = gen::n - o; | |
if (!n) { | |
uint64_t pop = qpow(C, cnt); | |
for (size_t i = rec.size() - 1; ~i; --i) { | |
const auto &info = rec[i]; | |
if (info.vec.contains(vec)) pop -= info.pop; | |
} | |
rec.emplace_back(vec, gen::n, cnt, pop); | |
return; | |
} | |
int *req = gen::req + o; // ATTENTION!!! name shadowing for convenience | |
vec.set(o); | |
for (int p = 1; p < n; ++p) { // enumerate the minimum non-zero period | |
if (p >= lst || p > (n - lst) + gcd(lst, p)) { | |
if (p * 2 <= n) { // a small period | |
const int r = p * (n / p - 1); | |
bool ok = true; | |
for (int i = 1; i < r; ++i) // check whether it satisfies previous requirements | |
if (req[i] && i % p) { ok = false; break; } | |
if (ok) { | |
for (int i = 1; i < r; ++i) vec.reset(o + i); | |
for (int i = p; i < r; i += p) vec.set(o + i); | |
if (r + p != n) ++req[r + p]; // requirements might overlap so we use int instead of bool | |
dfs(o + r, p, cnt); | |
if (r + p != n) --req[r + p]; | |
} | |
} else { | |
vec.set(o + p); | |
dfs(o + p, 0, cnt + (p << 1) - n); | |
} | |
} | |
if (req[p]) return; | |
vec.reset(o + p); | |
} | |
// no period exists | |
dfs(o + n, 0, cnt + n); | |
} | |
inline const std::vector<info>& find_for(int n) { | |
gen::n = n; rec.clear(); | |
dfs(0, 0, 0); return rec; | |
} | |
} // namespace gen | |
struct poly { // polynomial with single indeterminate | |
constexpr static int L = N + 1; | |
num v[L]; | |
inline num &operator[](int i) { return v[i]; } | |
inline num operator[](int i) const { return v[i]; } | |
inline void reset() { for (int i = 0; i < L; ++i) v[i] = 0; } | |
// Calculate the product of polynomials `a` and `b` and put it in `this` | |
// Modulo (x ^ L) | |
inline void from_mul(const poly &a, const poly &b) { | |
reset(); | |
for (int i = 0; i < L; ++i) { | |
const int lim = L - i; | |
for (int j = 0; j < lim; ++j) v[i + j] += a[i] * b[j]; | |
} | |
} | |
// Calculate the multiplicative inverse of `a` and put it in `this` | |
// Modulo (x ^ L) | |
inline void from_inv(const poly &a) { | |
static poly t, d; | |
reset(); | |
v[0] = num(a[0].get_den(), a[0].get_num()); | |
const int lim = L * 2; | |
for (int l = 2; l < lim; l <<= 1) { | |
t.from_mul(*this, a); | |
for (int i = 0; i < L; ++i) t[i] = -t[i]; | |
t[0] += 2; | |
d.from_mul(*this, t); | |
std::copy(d.v, d.v + std::min(l, L), v); | |
} | |
} | |
// Shift the content in this polynomial by `offset`, equivalent to multiplying (x ^ offset) | |
// Modulo (x ^ L) | |
inline void shift(int offset) { | |
for (int i = L - 1; i >= offset; --i) v[i] = v[i - offset]; | |
for (int i = 0; i < offset; ++i) v[i] = 0; | |
} | |
}; | |
// Calculate the OGF of words containing a word with length `len` and autocorrelation `cor` | |
// See https://en.wikipedia.org/wiki/Autocorrelation_(words)#Ordinary_generating_functions | |
inline const poly& containing(const poly &cor, int len) { | |
static poly ret, tmp, fin; | |
ret = { 1, -C }; | |
tmp.from_mul(ret, cor); | |
++tmp[len]; | |
fin.from_mul(ret, tmp); | |
ret.from_inv(fin); | |
ret.shift(len); | |
return ret; | |
} | |
num ans[N + 1]; | |
int main() { | |
poly cor; | |
for (int i = 1; i <= N; ++i) { | |
std::cerr << "Computing for i = " << i << std::endl; | |
for (const auto &info : gen::find_for(i)) { | |
cor.reset(); | |
const int n = info.length; | |
for (int i = 0; i < n; ++i) | |
if (info.vec.test(i)) cor[i] = 1; | |
auto &tmp = containing(cor, n); | |
for (int i = n; i <= N; ++i) | |
ans[i] += tmp[i] * info.pop; | |
} | |
} | |
for (int i = 0; i <= N; ++i) | |
std::cout << i << ' ' << ans[i] << std::endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment