-
-
Save buyoh/f48efa29dc9ce4eee57c367300b53f39 to your computer and use it in GitHub Desktop.
SuffixArray-InducedSorting (未完成)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<bits/stdc++.h> | |
using namespace std; | |
typedef unsigned int uint; | |
typedef long long int ll; | |
typedef unsigned long long int ull; | |
#define debugv(v) printf("L%d %s => ",__LINE__,#v);for(auto e:v){cout<<e<<" ";}cout<<endl; | |
#define debugm(m) printf("L%d %s is..\n",__LINE__,#m);for(auto v:m){for(auto e:v){cout<<e<<" ";}cout<<endl;} | |
#define debuga(m,w) printf("L%d %s is => ",__LINE__,#m);for(int x=0;x<(w);x++){cout<<(m)[x]<<" ";}cout<<endl; | |
#define debugaa(m,w,h) printf("L%d %s is..\n",__LINE__,#m);for(int y=0;y<(h);y++){for(int x=0;x<(w);x++){cout<<(m)[x][y]<<" ";}cout<<endl;} | |
#define ALL(v) (v).begin(),(v).end() | |
#define BIGINT 0x7FFFFFFF | |
#define E107 1000000007 | |
#define TIME chrono::system_clock::now | |
#define MILLISEC(t) (chrono::duration_cast<chrono::milliseconds>(t).count()) | |
template<typename T1,typename T2> | |
ostream& operator <<(ostream &o,const pair<T1,T2> p){o<<"("<<p.first<<":"<<p.second<<")";return o;} | |
// strcmp 改造 | |
// abcd と abcを同一とみなす。 | |
// int strcmpcut(const char *s1, const char *s2){ | |
// register const unsigned char *ss1, *ss2; | |
// for (ss1 = (const unsigned char*)s1, ss2 = (const unsigned char*)s2; | |
// *ss1 == *ss2 && *ss1 != '\0'; | |
// ss1++, ss2++); | |
// return (*ss1 != '\0' && *ss2 != '\0') * (*ss1 - *ss2); | |
// } | |
template<typename ARRAY,typename T> | |
int bsearch_low(ARRAY head,int low,int high,const T target,auto compare){ | |
register int l,h,m; | |
l = low;h = high; | |
while (l < h){ | |
m = (l+h)/2; | |
if (0<compare(head[m],target)){ | |
l = m+1; | |
}else{ | |
h = m; | |
} | |
} | |
return compare(head[l],target)==0 ? l : -1; | |
} | |
template<typename ARRAY,typename T> | |
int bsearch_high(ARRAY head,int low,int high,const T target,auto compare){ | |
register int l,h,m; | |
l = low;h = high; | |
while (l < h){ | |
m = (l+h+1)/2; | |
if (0<=compare(head[m],target)){ | |
l = m; | |
}else{ | |
h = m-1; | |
} | |
} | |
return compare(head[l],target)==0 ? l : -1; | |
} | |
// uninduced Sorting(´・ω・`) | |
struct SuffixArray{ | |
const char* data; | |
int size; | |
vector<const char*> sa; | |
SuffixArray(){} | |
SuffixArray(const string &d){data = d.c_str();size = d.size();} | |
void generate(){ | |
int i,j; | |
sa.resize(size+1); // '/0'の為、+1する | |
// debug用 | |
for (i=0;i<=size;i++){ | |
if (sa[i]==0) sa[i]=data+size; | |
} | |
// bucket | |
int bucket[257]; | |
fill(bucket,bucket+257,0); | |
for (i=0;i<=size;i++){ | |
bucket[(int)(unsigned char)data[i]+1]++; | |
} | |
for (i=1;i<256;i++){ | |
bucket[i]+=bucket[i-1]; | |
} | |
// s,l classify | |
vector<bool> slc(size); | |
slc[size] = true; // TRUE => S , FALSE => L | |
for (i=size-1;0<=i;i--){ | |
if (data[i]<data[i+1]){ | |
slc[i]=true; | |
}else if (data[i]>data[i+1]){ | |
slc[i]=false; | |
}else{ | |
slc[i]=slc[i+1]; | |
} | |
} | |
cout<<1<<endl; | |
// two stage | |
// TODO:S-typeは気合でソートされる。 sais. | |
int bucket_s[256]; | |
fill(bucket_s,bucket_s+256,0); | |
for (i=0;i<=size;i++){ | |
if (slc[i]){ | |
sa[ bucket[data[i]+1]-(++bucket_s[data[i]]) ] = data + i; | |
} | |
} | |
for (i=0;i<255;i++){ | |
if (bucket_s[i]){ | |
printf("%c %d %d\n",i,bucket[i+1]-bucket_s[i],bucket[i+1]); | |
sort(sa.begin()+bucket[i+1]-bucket_s[i],sa.begin()+bucket[i+1],[](const char* l,const char* r){return 0>strcmp(l,r);}); | |
} | |
} | |
debugv(sa); | |
for (i=1;i<=size;i++){ | |
if (sa[i] != data+size){ | |
j = sa[i]-data-1; | |
cout<<j<<endl; | |
if (0<=j && !slc[j]) | |
sa[bucket[data[j]]++] = data + j; | |
} | |
} | |
// lms | |
vector<int> lms; | |
for (i=1;i<=size;i++){ | |
if (!slc[i-1] && slc[i]){ | |
lms.push_back(i); | |
} | |
} | |
sort(ALL(lms),[this](const int &l,const int &r){return 0>strcmp(data+l,data+r);}); // TODO:sais | |
for (i=0;i<=size;i++){ | |
cout << (slc[i] ? 'S' : 'L'); | |
}cout << endl; | |
debugv(lms); | |
debugv(sa); | |
for (i=0;i<=size;i++){ | |
sa[i] = data + i; | |
} | |
sort(ALL(sa),[](const char* l,const char* r){return 0>strcmp(l,r);}); // TODO: sais | |
debugv(sa); | |
// TODO:LCP | |
} | |
void find(string &keyword,int &out_low,int &out_high){ | |
int low,high,l,h,i; | |
auto k_length = keyword.size(); | |
const char* c_keyword = keyword.c_str(); | |
low = 0; | |
high = sa.size(); | |
for (i = 0;i < k_length;i++){ | |
l = bsearch_low (sa,low,high,c_keyword+i,[&i](auto p,auto t){return *t-*(p+i);}); | |
h = bsearch_high(sa,low,high,c_keyword+i,[&i](auto p,auto t){return *t-*(p+i);}); | |
if (l==-1||h==-1||h<l){ | |
out_low = -1;out_high = -2; | |
return; | |
} | |
low = l; | |
high = h; | |
} | |
out_low = low; | |
out_high = high; | |
return ; | |
} | |
int foundToIdx(int found){ | |
return sa[found]-data; | |
} | |
}; | |
int m,n; | |
int main(){ | |
int numtestcase; | |
cin >> numtestcase; | |
for (int count=1;count<=numtestcase;count++){ | |
cout << "# case " << count << endl; | |
string source; | |
int numquery; | |
cin >> source; | |
cout << source << endl; | |
SuffixArray sais(source); | |
sais.generate(); | |
cin >>numquery; | |
for (int i=0;i<numquery;i++){ | |
string query; | |
cin >> query; | |
int low,high; | |
sais.find(query,low,high); | |
string result(source.size(),' '); | |
for(;low<=high;low++){ | |
result[sais.foundToIdx(low)] = '^'; | |
} | |
cout << result << "| " << query << endl; | |
} | |
} | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment