Skip to content

Instantly share code, notes, and snippets.

@purplesyringa
Last active February 17, 2018 09:39
Show Gist options
  • Save purplesyringa/1a5808258849480cd5cec55db8351143 to your computer and use it in GitHub Desktop.
Save purplesyringa/1a5808258849480cd5cec55db8351143 to your computer and use it in GitHub Desktop.
Suffix array
#include <iostream>
#include <cstdint>
#include <vector>
#include <string>
#include <fstream>
using namespace std;
int main() {
ifstream in("array.in");
ofstream out("array.out");
string s;
in >> s;
char cell = 'a' - 1;
s += string(1, cell);
int64_t k = 'z' - cell + 1;
int64_t n = s.size();
// Calc color
vector<int64_t> cnt(max(n, k)), col(n);
for(int64_t i = 0; i < n; i++) {
col[i] = s[i] - cell;
cnt[col[i]]++;
}
// Have references to beggining of each block
vector<int64_t> head(max(n, k));
head[0] = 0;
for(int64_t i = 1; i < k; i++) {
head[i] = head[i - 1] + cnt[i - 1];
}
// Count sort
vector<int64_t> a(n);
for(int64_t i = 0; i < n; i++) {
a[head[col[i]]++] = i;
}
// Fix references to beggining of each block
head[0] = 0;
for(int64_t i = 1; i < k; i++) {
head[i] = head[i - 1] + cnt[i - 1];
}
for(int64_t l = 1; l < n; l *= 2) {
// e.g. a = |10|11|2 |4 |6 |9 |0 |7 |3 |6|1 |8 |
// Resort based on the fact that B is sorted
vector<int64_t> a_(n);
for(int64_t i = 0; i < n; i++) {
int64_t j = (a[i] + 2 * n - l) % n;
a_[head[col[j]]++] = j;
}
// Recalc colors
vector<int64_t> col_(n);
k = 0;
int64_t k = 1;
for(int64_t i = 0; i < n; i++) {
a[i] = a_[i];
if(i == 0 || col[a[i]] != col[a[i - 1]] || col[(a[i] + l) % n] != col[(a[i - 1] + l) % n]) {
if(k >= head.size()) {
head.resize(k + 1);
}
head[k++] = i;
}
col_[a[i]] = k - 1;
}
col = col_;
}
// Output suffix array
for(int64_t i = 1; i < n; i++) {
out << a[i] + 1 << " ";
}
out << endl;
// Kosai
vector<int64_t> ord(n), lcp(n);
for(int64_t i = 0; i < n; i++) {
ord[a[i]] = i;
}
int64_t x = 0;
for(int64_t i = 0; i < n; i++) {
x = max(x - 1, 0LL);
if(ord[i] == n - 1) {
x = 0;
} else {
int64_t j = a[ord[i] + 1];
while(s[i + x] == s[j + x]) {
x++;
}
lcp[ord[i]] = x;
}
}
// Output LCP
for(int64_t i = 1; i < n - 1; i++) {
out << lcp[i] << " ";
}
return 0;
}
int64_t n = s.size();
char cell = 'a' - 1;
s[n++] = cell;
int64_t k = 'z' - cell + 1;
// Calc color
vector<int64_t> cnt(n), col(n);
for(int64_t i = 0; i < n; i++) {
col[i] = s[i] - cell;
cnt[col[i]]++;
}
// Have references to beggining of each block
vector<int64_t> head(k);
head[0] = 0;
for(int64_t i = 1; i < k; i++) {
head[i] = head[i - 1] + cnt[i - 1];
}
// Count sort
vector<int64_t> a(n);
for(int64_t i = 0; i < n; i++) {
a[head[col[i]]++] = i;
}
// Fix references to beggining of each block
head[0] = 0;
for(int64_t i = 1; i < k; i++) {
head[i] = head[i - 1] + cnt[i - 1];
}
// Main
vector<int64_t> A, B;
// L L
// __________
// |_|#|_|#|__|
// Ai Bi
for(int64_t l = 1; l < n; l *= 2) {
// e.g. a = |10|11|2 |4 |6 |9 |0 |7 |3 |6|1 |8 |
// Resort based on the fact that B is sorted
vector<int64_t> a_(n);
for(int64_t i = 0; i < n; i++) {
int64_t j = (a[i] + n - l) % n;
a_[head[col[j]]++] = j;
}
// Recalc colors
vector<int64_t> col_(n);
int64_t k = 0;
for(int64_t i = 0; i < n; i++) {
a[i] = a_[i];
if(i == 0 || col[a[i]] == col[a[i - 1]] || col[(a[i] + l) % n] != col[(a[i - 1] + l) % n]) {
head[k++] = i;
}
col_[a[i]] = k - 1;
}
col.swap(col_);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment