Last active
April 8, 2020 12:07
-
-
Save gasin/16d6f64adea167376cf39264be62585f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <bits/stdc++.h> | |
#define rep(i,n) for(int i = 0; i < n; i++) | |
using namespace std; | |
//1-indexed, var..maxnum | |
vector<int> SA_IS(vector<int> str, int var) { | |
if(str.size() == 1) { | |
vector<int> ret(1,0); | |
return ret; | |
} | |
str.push_back(0); | |
int si = str.size(); | |
vector<int> st(var, 0), en(var, 0); | |
vector<int> SL(si, 0); //s..0, l..1 | |
vector<int> SA(si, -1); | |
vector<int> LMS; | |
vector<int> is_LMS(si, -1); | |
rep(i,str.size()) en[str[i]]++; | |
for(int i = 1; i < var; i++) en[i] += en[i-1]; | |
for(int i = 1; i < var; i++) st[i] = en[i-1]; | |
SL[str.size()-1] = 0; | |
for(int i = str.size()-2; i >= 0; i--) { | |
if(str[i] == str[i+1]) { | |
SL[i] = SL[i+1]; | |
continue; | |
} | |
if(str[i] > str[i+1]) SL[i] = 1; | |
else SL[i] = 0; | |
} | |
for(int i = 1; i < str.size(); i++) { | |
if(SL[i] == 0 && SL[i-1] == 1) { | |
SA[--en[str[i]]] = i; | |
LMS.push_back(i); | |
is_LMS[i] = 1; | |
} | |
} | |
rep(i,var-1) en[i] = st[i+1]; | |
en[var-1] = str.size(); | |
rep(i,str.size()) if(SA[i] > 0 && SL[SA[i]-1] == 1) { SA[st[str[SA[i]-1]]++] = SA[i]-1; } | |
st[0] = 0; | |
for(int i = 1; i < var; i++) st[i] = en[i-1]; | |
for(int i = 1; i < str.size(); i++) if(SA[i] != -1 && SL[SA[i]] == 0) { SA[i] = -1; } | |
for(int i = str.size()-1; i >= 1; i--) if(SA[i] > 0 && SL[SA[i]-1] == 0) { SA[--en[str[SA[i]-1]]] = SA[i]-1; } | |
rep(i,var-1) en[i] = st[i+1]; | |
en[var-1] = str.size(); | |
int counter = 0; | |
vector<int> pre_sa, new_sa; | |
rep(i,SA.size()) if(is_LMS[SA[i]] != -1) { | |
is_LMS[SA[i]] = ++counter; | |
new_sa.clear(); | |
for(int j = SA[i]; j < SA.size(); j++) { | |
new_sa.push_back(str[j]); | |
if(j != SA[i] && is_LMS[j] != -1) { | |
break; | |
} | |
} | |
if(pre_sa == new_sa) { | |
is_LMS[SA[i]] = --counter; | |
} | |
pre_sa = new_sa; | |
} | |
vector<int> new_str; | |
vector<int> rev((int)LMS.size()+1, 0); | |
counter = 0; | |
rep(i,is_LMS.size()) { | |
if(is_LMS[i] != -1) { | |
new_str.push_back(is_LMS[i]); | |
rev[counter++] = i; | |
} | |
} | |
vector<int> rec = SA_IS(new_str, new_str.size()+1); | |
rep(i,SA.size()) SA[i] = -1; | |
for(int i = rec.size()-1; i >= 0; i--) { SA[--en[str[rev[rec[i]]]]] = rev[rec[i]]; } | |
rep(i,var-1) en[i] = st[i+1]; | |
en[var-1] = str.size(); | |
rep(i,str.size()) if(SA[i] > 0 && SL[SA[i]-1] == 1) { SA[st[str[SA[i]-1]]++] = SA[i]-1; } | |
for(int i = 1; i < str.size(); i++) if(SA[i] != -1 && SL[SA[i]] == 0) { SA[i] = -1; } | |
for(int i = str.size()-1; i >= 1; i--) if(SA[i] > 0 && SL[SA[i]-1] == 0) { SA[--en[str[SA[i]-1]]] = SA[i]-1; } | |
SA.erase(SA.begin()); | |
return SA; | |
} | |
int main() { | |
/* | |
string str; | |
cin >> str; | |
vector<int> num; | |
rep(i,str.size()) num.push_back(str[i]-'a'+1); | |
vector<int> v = SA_IS(num, 30); | |
rep(i,v.size()) { | |
cout << str.substr(v[i]) << endl; | |
} | |
*/ | |
random_device rnd; | |
mt19937 mt(rnd()); | |
int elapsed_time = 0; | |
rep(i,10) { | |
vector<int> num; | |
string str; | |
rep(i,10000000) num.push_back(mt()%26+1); | |
rep(i,10000000) str.push_back('a'+num[i]-1); | |
auto start = chrono::system_clock::now(); | |
vector<int> v = SA_IS(num, 30); | |
auto end = chrono::system_clock::now(); | |
elapsed_time += chrono::duration_cast<chrono::milliseconds>(end-start).count(); | |
cout << v.size() << endl; | |
bool checker = false; | |
for(int j = 1; j < v.size(); j++) { | |
if(str.substr(v[j-1]) > str.substr(v[j])) { | |
cout << "failed" << endl; | |
break; | |
} | |
} | |
} | |
elapsed_time /= 10; | |
cout << "elapsed_time : " << elapsed_time << endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment