Skip to content

Instantly share code, notes, and snippets.

@Chillee
Last active May 6, 2019 00:30
Show Gist options
  • Save Chillee/d9f6c5d52ef2d3664dcf0eeef464ef95 to your computer and use it in GitHub Desktop.
Save Chillee/d9f6c5d52ef2d3664dcf0eeef464ef95 to your computer and use it in GitHub Desktop.
Rolling String Hash
struct H {
ull x;
H(ull x = 0) : x(x) {}
ull get() const { return x; }
const static ull M = (1ull << 61) - 1;
H operator+(H o) {
o.x += x + 1;
o.x = (o.x & M) + (o.x >> 61);
return o.x - 1;
}
H operator*(H o) {
ull l1 = x & -1u, h1 = x >> 32, l2 = o.x & -1u, h2 = o.x >> 32;
ull l = l1 * l2, m = l1 * h2 + l2 * h1, h = h1 * h2;
ull ret = (l & M) + (l >> 61) + (h << 3) + (m >> 29) + (m << 35 >> 3) + 1;
ret = (ret & M) + (ret >> 61);
ret = (ret & M) + (ret >> 61);
return ret - 1;
}
H operator-(H o) { return *this + ~o.x; }
bool operator==(H o) const { return get() == o.get(); }
bool operator<(H o) const { return get() < o.get(); }
};
struct H {
ull x;
H(ull x = 0) : x(x) {}
ull get() const { return x + !~x; }
H operator+(H o) { return x + o.x + (x + o.x < x); }
H operator*(H o) {
ull r = x;
asm("mul %1\n addq %%rdx, %0\n adcq $0,%0" : "+a"(r) : "r"(o.x) : "rdx");
return r;
}
H operator-(H o) { return *this + ~o.x; }
bool operator==(H o) const { return get() == o.get(); }
bool operator<(H o) const { return get() < o.get(); }
};
template <class h> struct H : array<h, 2> {
H g() { return *this; }
H() { *this = {0, 0}; }
H(h a) { *this = {a, a}; }
H(h a, h b) { (*this)[0] = a, (*this)[1] = b; }
#define op(o) \
H operator o(H a) { return {g()[0] o a[0], g()[1] o a[1]}; }
op(+) op(-) op(*);
H operator+(int a) { return g() + H(a); }
};
struct RollingHash {
H p = H(29);
vector<H> ha;
vector<H> pw;
RollingHash(string s) : ha(s.size() + 1), pw(ha) {
pw[0] = H(1);
for (int i = 0; i < s.size(); i++)
ha[i + 1] = ha[i] * p + s[i], pw[i + 1] = pw[i] * p;
}
H interval(int a, int b) { // hash [a, b)
return ha[b] - ha[a] * pw[b - a];
}
};
@Chillee
Copy link
Author

Chillee commented Apr 6, 2019

Mod 2^64 -1 is about twice as fast as using doubled 1e9+7, and 3x as fast as using 2^61-1. To change to the extensible hash, replace the H struct and the resulting errors with initializing H with 1 element.

TL;DR: Use Rolling hash with H=2^64-1 if on 64 bit server, H=2^61-1 if on CF/32 bit server, and H=ull if lazy and malicious test data is not a concern.

Use extensible hash if you need more than 2^64 space for w.e. reason

Benchmark: https://ideone.com/i9ABXT

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment