krofna/hash.cpp

## hash.cpp
#include <bits/stdc++.h>
#define _ << ' ' <<
using namespace std;
using ll = long long;

const int mod = 1e9 + 9;
int add(int a, int b) { return (a += b) < mod? a : a - mod; }
int sub(int a, int b) { return (a -= b) >=  0? a : a + mod; }
int mul(int a, int b) { return 1LL * a * b % mod; }
int pwr(int a, int p) {
    if (p == 0) return 1;
    if (p & 1) return mul(a, pwr(a, p - 1));
    return pwr(mul(a, a), p / 2);
}
int inv(int a) { return pwr(a, mod - 2); }

const int maxn = 10000;
int invp[maxn], ppow[maxn], pref[maxn + 1];

void pre_hash(const string& s)
{
    for (int i = 0; i < s.size(); ++i)
        pref[i + 1] = add(pref[i], mul(s[i] - 'a' + 1, ppow[i]));
}

int substr_hash(int i, int j)
{
    return mul(invp[i], sub(pref[j + 1], pref[i]));
}

// +1 radimo da nam "a", "aa", "aaa", ... nebi imali isti hash
int hash_f(const string& s)
{
    ll p = 1, h = 0;
    for (int i = 0; i < s.size(); ++i)
        h = add(h, mul(s[i] - 'a' + 1, ppow[i]));
    return h;
}

vector<vector<int>> group_identical_strings(vector<string> const& s)
{
    int n = s.size();
    vector<pair<ll, int>> hashes(n);
    for (int i = 0; i < n; i++)
        hashes[i] = {hash_f(s[i]), i};

    sort(hashes.begin(), hashes.end());

    vector<vector<int>> groups;
    for (int i = 0; i < n; i++) {
        if (i == 0 || hashes[i].first != hashes[i-1].first)
            groups.emplace_back();
        groups.back().push_back(hashes[i].second);
    }
    return groups;
}

int count_unique_substrings(string const& s)
{
    int n = s.size(), cnt = 0;
    pre_hash(s);
    for (int l = 0; l < n; ++l)
    {
        set<int> hs;
        for (int i = 0; i < n - l; ++i)
            hs.insert(substr_hash(i, i + l));
        cnt += hs.size();
    }
    return cnt;
}

vector<int> rabin_karp(string const& s, string const& t)
{
    pre_hash(t);
    ll h_s = hash_f(s);
    vector<int> occurences;
    for (int i = 0; i + s.size() - 1 < t.size(); i++)
        if (substr_hash(i, i + s.size() - 1) == h_s)
            occurences.push_back(i);
    return occurences;
}

int LCP(const string& s, int x0, int y0, int x1, int y1)
{
    if (s[x0] != s[x1])
        return 0;

    int lo = 1, hi = min(y0 - x0 + 1, y1 - x1 + 1);
    while (lo < hi)
    {
        int mid = (lo + hi + 1) / 2;
        if (substr_hash(x0, x0 + mid - 1) == substr_hash(x1, x1 + mid - 1))
            lo = mid;
        else
            hi = mid-1;
    }
    return lo;
}

bool cmp(const string& s, int x0, int y0, int x1, int y1)
{
    int L = LCP(s, x0, y0, x1, y1);
    if (L == y0 - x0 + 1) return true;
    if (L == y1 - x1 + 1) return false;
    return s[x0 + L] < s[x1 + L];
}

bool palindrome(const string& s, int i, int j)
{
    int n = s.size();
    string z = s;
    reverse(z.begin(), z.end());
    // pre_hash bi trebao biti unaprijed izracunat inace nema smisla
    pre_hash(s);
    int hs = substr_hash(i, j);
    pre_hash(z);
    int hz = substr_hash(n - j - 1, n - i - 1);
    return hs == hz;
}

int period(const string& s)
{
    int m = s.size();
    vector<int> divs;
    for (int i = 1; i <= m; ++i)
        if (m % i == 0)
            divs.push_back(i);

    pre_hash(s);
    for (int x : divs)
    {
        bool bad = false;
        for (int i = x; i < m && !bad; i += x)
            if (substr_hash(i, i + x - 1) != substr_hash(0, x - 1))
                bad = true;

        if (!bad)
            return x;
    }
    return m;
}

// ako radimo puno usporedbi napravimo dva hasha da smanjimo kolizije
int main()
{
    invp[0] = ppow[0] = 1;
    for (int i = 1; i < maxn; ++i)
    {
        // 31 je dobar jer je otprilike toliko slova u engleskoj abecedi
        ppow[i] = mul(ppow[i - 1], 31);
        invp[i] = inv(ppow[i]);
    }
    string s = "mislav";
    string z = "isla";
    pre_hash(s);
    cout << hash_f(z) _ substr_hash(1, 4) << endl;
    cout << count_unique_substrings("misl") << endl;
    cout << count_unique_substrings("lala") << endl;
    for (int x : rabin_karp("la", "lahhlllallarla"))
        cout << x << ' ';
    cout << endl;
    cout << palindrome("afoofza", 1, 4) << ' ' << palindrome("foof", 1, 3) << endl;
    cout << period("fodfodfodfod");
}
	#include <bits/stdc++.h>
	#define _ << ' ' <<
	using namespace std;
	using ll = long long;

	const int mod = 1e9 + 9;
	int add(int a, int b) { return (a += b) < mod? a : a - mod; }
	int sub(int a, int b) { return (a -= b) >= 0? a : a + mod; }
	int mul(int a, int b) { return 1LL * a * b % mod; }
	int pwr(int a, int p) {
	if (p == 0) return 1;
	if (p & 1) return mul(a, pwr(a, p - 1));
	return pwr(mul(a, a), p / 2);
	}
	int inv(int a) { return pwr(a, mod - 2); }

	const int maxn = 10000;
	int invp[maxn], ppow[maxn], pref[maxn + 1];

	void pre_hash(const string& s)
	{
	for (int i = 0; i < s.size(); ++i)
	pref[i + 1] = add(pref[i], mul(s[i] - 'a' + 1, ppow[i]));
	}

	int substr_hash(int i, int j)
	{
	return mul(invp[i], sub(pref[j + 1], pref[i]));
	}

	// +1 radimo da nam "a", "aa", "aaa", ... nebi imali isti hash
	int hash_f(const string& s)
	{
	ll p = 1, h = 0;
	for (int i = 0; i < s.size(); ++i)
	h = add(h, mul(s[i] - 'a' + 1, ppow[i]));
	return h;
	}

	vector<vector<int>> group_identical_strings(vector<string> const& s)
	{
	int n = s.size();
	vector<pair<ll, int>> hashes(n);
	for (int i = 0; i < n; i++)
	hashes[i] = {hash_f(s[i]), i};

	sort(hashes.begin(), hashes.end());

	vector<vector<int>> groups;
	for (int i = 0; i < n; i++) {
	if (i == 0 \|\| hashes[i].first != hashes[i-1].first)
	groups.emplace_back();
	groups.back().push_back(hashes[i].second);
	}
	return groups;
	}

	int count_unique_substrings(string const& s)
	{
	int n = s.size(), cnt = 0;
	pre_hash(s);
	for (int l = 0; l < n; ++l)
	{
	set<int> hs;
	for (int i = 0; i < n - l; ++i)
	hs.insert(substr_hash(i, i + l));
	cnt += hs.size();
	}
	return cnt;
	}

	vector<int> rabin_karp(string const& s, string const& t)
	{
	pre_hash(t);
	ll h_s = hash_f(s);
	vector<int> occurences;
	for (int i = 0; i + s.size() - 1 < t.size(); i++)
	if (substr_hash(i, i + s.size() - 1) == h_s)
	occurences.push_back(i);
	return occurences;
	}

	int LCP(const string& s, int x0, int y0, int x1, int y1)
	{
	if (s[x0] != s[x1])
	return 0;

	int lo = 1, hi = min(y0 - x0 + 1, y1 - x1 + 1);
	while (lo < hi)
	{
	int mid = (lo + hi + 1) / 2;
	if (substr_hash(x0, x0 + mid - 1) == substr_hash(x1, x1 + mid - 1))
	lo = mid;
	else
	hi = mid-1;
	}
	return lo;
	}

	bool cmp(const string& s, int x0, int y0, int x1, int y1)
	{
	int L = LCP(s, x0, y0, x1, y1);
	if (L == y0 - x0 + 1) return true;
	if (L == y1 - x1 + 1) return false;
	return s[x0 + L] < s[x1 + L];
	}

	bool palindrome(const string& s, int i, int j)
	{
	int n = s.size();
	string z = s;
	reverse(z.begin(), z.end());
	// pre_hash bi trebao biti unaprijed izracunat inace nema smisla
	pre_hash(s);
	int hs = substr_hash(i, j);
	pre_hash(z);
	int hz = substr_hash(n - j - 1, n - i - 1);
	return hs == hz;
	}

	int period(const string& s)
	{
	int m = s.size();
	vector<int> divs;
	for (int i = 1; i <= m; ++i)
	if (m % i == 0)
	divs.push_back(i);

	pre_hash(s);
	for (int x : divs)
	{
	bool bad = false;
	for (int i = x; i < m && !bad; i += x)
	if (substr_hash(i, i + x - 1) != substr_hash(0, x - 1))
	bad = true;

	if (!bad)
	return x;
	}
	return m;
	}

	// ako radimo puno usporedbi napravimo dva hasha da smanjimo kolizije
	int main()
	{
	invp[0] = ppow[0] = 1;
	for (int i = 1; i < maxn; ++i)
	{
	// 31 je dobar jer je otprilike toliko slova u engleskoj abecedi
	ppow[i] = mul(ppow[i - 1], 31);
	invp[i] = inv(ppow[i]);
	}
	string s = "mislav";
	string z = "isla";
	pre_hash(s);
	cout << hash_f(z) _ substr_hash(1, 4) << endl;
	cout << count_unique_substrings("misl") << endl;
	cout << count_unique_substrings("lala") << endl;
	for (int x : rabin_karp("la", "lahhlllallarla"))
	cout << x << ' ';
	cout << endl;
	cout << palindrome("afoofza", 1, 4) << ' ' << palindrome("foof", 1, 3) << endl;
	cout << period("fodfodfodfod");
	}