Skip to content

Instantly share code, notes, and snippets.

@buyoh
Created September 26, 2016 02:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save buyoh/f48efa29dc9ce4eee57c367300b53f39 to your computer and use it in GitHub Desktop.
Save buyoh/f48efa29dc9ce4eee57c367300b53f39 to your computer and use it in GitHub Desktop.
SuffixArray-InducedSorting (未完成)
#include<bits/stdc++.h>
using namespace std;
typedef unsigned int uint;
typedef long long int ll;
typedef unsigned long long int ull;
#define debugv(v) printf("L%d %s => ",__LINE__,#v);for(auto e:v){cout<<e<<" ";}cout<<endl;
#define debugm(m) printf("L%d %s is..\n",__LINE__,#m);for(auto v:m){for(auto e:v){cout<<e<<" ";}cout<<endl;}
#define debuga(m,w) printf("L%d %s is => ",__LINE__,#m);for(int x=0;x<(w);x++){cout<<(m)[x]<<" ";}cout<<endl;
#define debugaa(m,w,h) printf("L%d %s is..\n",__LINE__,#m);for(int y=0;y<(h);y++){for(int x=0;x<(w);x++){cout<<(m)[x][y]<<" ";}cout<<endl;}
#define ALL(v) (v).begin(),(v).end()
#define BIGINT 0x7FFFFFFF
#define E107 1000000007
#define TIME chrono::system_clock::now
#define MILLISEC(t) (chrono::duration_cast<chrono::milliseconds>(t).count())
template<typename T1,typename T2>
ostream& operator <<(ostream &o,const pair<T1,T2> p){o<<"("<<p.first<<":"<<p.second<<")";return o;}
// strcmp 改造
// abcd と abcを同一とみなす。
// int strcmpcut(const char *s1, const char *s2){
// register const unsigned char *ss1, *ss2;
// for (ss1 = (const unsigned char*)s1, ss2 = (const unsigned char*)s2;
// *ss1 == *ss2 && *ss1 != '\0';
// ss1++, ss2++);
// return (*ss1 != '\0' && *ss2 != '\0') * (*ss1 - *ss2);
// }
template<typename ARRAY,typename T>
int bsearch_low(ARRAY head,int low,int high,const T target,auto compare){
register int l,h,m;
l = low;h = high;
while (l < h){
m = (l+h)/2;
if (0<compare(head[m],target)){
l = m+1;
}else{
h = m;
}
}
return compare(head[l],target)==0 ? l : -1;
}
template<typename ARRAY,typename T>
int bsearch_high(ARRAY head,int low,int high,const T target,auto compare){
register int l,h,m;
l = low;h = high;
while (l < h){
m = (l+h+1)/2;
if (0<=compare(head[m],target)){
l = m;
}else{
h = m-1;
}
}
return compare(head[l],target)==0 ? l : -1;
}
// uninduced Sorting(´・ω・`)
struct SuffixArray{
const char* data;
int size;
vector<const char*> sa;
SuffixArray(){}
SuffixArray(const string &d){data = d.c_str();size = d.size();}
void generate(){
int i,j;
sa.resize(size+1); // '/0'の為、+1する
// debug用
for (i=0;i<=size;i++){
if (sa[i]==0) sa[i]=data+size;
}
// bucket
int bucket[257];
fill(bucket,bucket+257,0);
for (i=0;i<=size;i++){
bucket[(int)(unsigned char)data[i]+1]++;
}
for (i=1;i<256;i++){
bucket[i]+=bucket[i-1];
}
// s,l classify
vector<bool> slc(size);
slc[size] = true; // TRUE => S , FALSE => L
for (i=size-1;0<=i;i--){
if (data[i]<data[i+1]){
slc[i]=true;
}else if (data[i]>data[i+1]){
slc[i]=false;
}else{
slc[i]=slc[i+1];
}
}
cout<<1<<endl;
// two stage
// TODO:S-typeは気合でソートされる。 sais.
int bucket_s[256];
fill(bucket_s,bucket_s+256,0);
for (i=0;i<=size;i++){
if (slc[i]){
sa[ bucket[data[i]+1]-(++bucket_s[data[i]]) ] = data + i;
}
}
for (i=0;i<255;i++){
if (bucket_s[i]){
printf("%c %d %d\n",i,bucket[i+1]-bucket_s[i],bucket[i+1]);
sort(sa.begin()+bucket[i+1]-bucket_s[i],sa.begin()+bucket[i+1],[](const char* l,const char* r){return 0>strcmp(l,r);});
}
}
debugv(sa);
for (i=1;i<=size;i++){
if (sa[i] != data+size){
j = sa[i]-data-1;
cout<<j<<endl;
if (0<=j && !slc[j])
sa[bucket[data[j]]++] = data + j;
}
}
// lms
vector<int> lms;
for (i=1;i<=size;i++){
if (!slc[i-1] && slc[i]){
lms.push_back(i);
}
}
sort(ALL(lms),[this](const int &l,const int &r){return 0>strcmp(data+l,data+r);}); // TODO:sais
for (i=0;i<=size;i++){
cout << (slc[i] ? 'S' : 'L');
}cout << endl;
debugv(lms);
debugv(sa);
for (i=0;i<=size;i++){
sa[i] = data + i;
}
sort(ALL(sa),[](const char* l,const char* r){return 0>strcmp(l,r);}); // TODO: sais
debugv(sa);
// TODO:LCP
}
void find(string &keyword,int &out_low,int &out_high){
int low,high,l,h,i;
auto k_length = keyword.size();
const char* c_keyword = keyword.c_str();
low = 0;
high = sa.size();
for (i = 0;i < k_length;i++){
l = bsearch_low (sa,low,high,c_keyword+i,[&i](auto p,auto t){return *t-*(p+i);});
h = bsearch_high(sa,low,high,c_keyword+i,[&i](auto p,auto t){return *t-*(p+i);});
if (l==-1||h==-1||h<l){
out_low = -1;out_high = -2;
return;
}
low = l;
high = h;
}
out_low = low;
out_high = high;
return ;
}
int foundToIdx(int found){
return sa[found]-data;
}
};
int m,n;
int main(){
int numtestcase;
cin >> numtestcase;
for (int count=1;count<=numtestcase;count++){
cout << "# case " << count << endl;
string source;
int numquery;
cin >> source;
cout << source << endl;
SuffixArray sais(source);
sais.generate();
cin >>numquery;
for (int i=0;i<numquery;i++){
string query;
cin >> query;
int low,high;
sais.find(query,low,high);
string result(source.size(),' ');
for(;low<=high;low++){
result[sais.foundToIdx(low)] = '^';
}
cout << result << "| " << query << endl;
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment