Skip to content

Instantly share code, notes, and snippets.

@gasin
Last active April 8, 2020 12:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gasin/16d6f64adea167376cf39264be62585f to your computer and use it in GitHub Desktop.
Save gasin/16d6f64adea167376cf39264be62585f to your computer and use it in GitHub Desktop.
#include <bits/stdc++.h>
#define rep(i,n) for(int i = 0; i < n; i++)
using namespace std;
//1-indexed, var..maxnum
vector<int> SA_IS(vector<int> str, int var) {
if(str.size() == 1) {
vector<int> ret(1,0);
return ret;
}
str.push_back(0);
int si = str.size();
vector<int> st(var, 0), en(var, 0);
vector<int> SL(si, 0); //s..0, l..1
vector<int> SA(si, -1);
vector<int> LMS;
vector<int> is_LMS(si, -1);
rep(i,str.size()) en[str[i]]++;
for(int i = 1; i < var; i++) en[i] += en[i-1];
for(int i = 1; i < var; i++) st[i] = en[i-1];
SL[str.size()-1] = 0;
for(int i = str.size()-2; i >= 0; i--) {
if(str[i] == str[i+1]) {
SL[i] = SL[i+1];
continue;
}
if(str[i] > str[i+1]) SL[i] = 1;
else SL[i] = 0;
}
for(int i = 1; i < str.size(); i++) {
if(SL[i] == 0 && SL[i-1] == 1) {
SA[--en[str[i]]] = i;
LMS.push_back(i);
is_LMS[i] = 1;
}
}
rep(i,var-1) en[i] = st[i+1];
en[var-1] = str.size();
rep(i,str.size()) if(SA[i] > 0 && SL[SA[i]-1] == 1) { SA[st[str[SA[i]-1]]++] = SA[i]-1; }
st[0] = 0;
for(int i = 1; i < var; i++) st[i] = en[i-1];
for(int i = 1; i < str.size(); i++) if(SA[i] != -1 && SL[SA[i]] == 0) { SA[i] = -1; }
for(int i = str.size()-1; i >= 1; i--) if(SA[i] > 0 && SL[SA[i]-1] == 0) { SA[--en[str[SA[i]-1]]] = SA[i]-1; }
rep(i,var-1) en[i] = st[i+1];
en[var-1] = str.size();
int counter = 0;
vector<int> pre_sa, new_sa;
rep(i,SA.size()) if(is_LMS[SA[i]] != -1) {
is_LMS[SA[i]] = ++counter;
new_sa.clear();
for(int j = SA[i]; j < SA.size(); j++) {
new_sa.push_back(str[j]);
if(j != SA[i] && is_LMS[j] != -1) {
break;
}
}
if(pre_sa == new_sa) {
is_LMS[SA[i]] = --counter;
}
pre_sa = new_sa;
}
vector<int> new_str;
vector<int> rev((int)LMS.size()+1, 0);
counter = 0;
rep(i,is_LMS.size()) {
if(is_LMS[i] != -1) {
new_str.push_back(is_LMS[i]);
rev[counter++] = i;
}
}
vector<int> rec = SA_IS(new_str, new_str.size()+1);
rep(i,SA.size()) SA[i] = -1;
for(int i = rec.size()-1; i >= 0; i--) { SA[--en[str[rev[rec[i]]]]] = rev[rec[i]]; }
rep(i,var-1) en[i] = st[i+1];
en[var-1] = str.size();
rep(i,str.size()) if(SA[i] > 0 && SL[SA[i]-1] == 1) { SA[st[str[SA[i]-1]]++] = SA[i]-1; }
for(int i = 1; i < str.size(); i++) if(SA[i] != -1 && SL[SA[i]] == 0) { SA[i] = -1; }
for(int i = str.size()-1; i >= 1; i--) if(SA[i] > 0 && SL[SA[i]-1] == 0) { SA[--en[str[SA[i]-1]]] = SA[i]-1; }
SA.erase(SA.begin());
return SA;
}
int main() {
/*
string str;
cin >> str;
vector<int> num;
rep(i,str.size()) num.push_back(str[i]-'a'+1);
vector<int> v = SA_IS(num, 30);
rep(i,v.size()) {
cout << str.substr(v[i]) << endl;
}
*/
random_device rnd;
mt19937 mt(rnd());
int elapsed_time = 0;
rep(i,10) {
vector<int> num;
string str;
rep(i,10000000) num.push_back(mt()%26+1);
rep(i,10000000) str.push_back('a'+num[i]-1);
auto start = chrono::system_clock::now();
vector<int> v = SA_IS(num, 30);
auto end = chrono::system_clock::now();
elapsed_time += chrono::duration_cast<chrono::milliseconds>(end-start).count();
cout << v.size() << endl;
bool checker = false;
for(int j = 1; j < v.size(); j++) {
if(str.substr(v[j-1]) > str.substr(v[j])) {
cout << "failed" << endl;
break;
}
}
}
elapsed_time /= 10;
cout << "elapsed_time : " << elapsed_time << endl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment