Skip to content

Instantly share code, notes, and snippets.

@jacky860226
Last active December 27, 2018 12:07
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jacky860226/1d33adad858eef71bfe18120d8d69e6d to your computer and use it in GitHub Desktop.
Save jacky860226/1d33adad858eef71bfe18120d8d69e6d to your computer and use it in GitHub Desktop.
Suffix Array + Longest Common Prefix
#define F(x) (x)/3+((x)%3==1?0:tb)
#define G(x) (x)<tb?(x)*3+1:((x)-tb)*3+2
int wa[3*maxn+5],wb[3*maxn+5],wv[3*maxn+5],c[maxn+5];
inline bool c0(int *s,int a,int b){
return s[a]==s[b]&&s[a+1]==s[b+1]&&s[a+2]==s[b+2];
}
inline bool c12(int k,int *s,int a,int b){
if(k==2)return s[a]<s[b]||s[a]==s[b]&&c12(1,s,a+1,b+1);
else return s[a]<s[b]||s[a]==s[b]&&wv[a+1]<wv[b+1];
}
inline void radix_sort(int *s,int *a,int *b,int len,int A){
static int i;
for(i=0;i<len;++i)wv[i]=s[a[i]];
for(i=0;i<A;++i)c[i]=0;
for(i=0;i<len;++i)++c[wv[i]];
for(i=1;i<A;++i)c[i]+=c[i-1];
for(i=len-1;i>=0;--i)b[--c[wv[i]]]=a[i];
}
void dc3(int *s,int *sa,int len,int A){
int i,j,*san=sa+len,ta=0,tb=(len+1)/3,tbc=0,p;
int *rn=s+len;
s[len]=s[len+1]=0;
for(i=0;i<len;++i){
if(i%3!=0)wa[tbc++]=i;
}
radix_sort(s+2,wa,wb,tbc,A);
radix_sort(s+1,wb,wa,tbc,A);
radix_sort(s,wa,wb,tbc,A);
for(p=1,rn[F(wb[0])]=0,i=1;i<tbc;++i){
rn[F(wb[i])]=c0(s,wb[i-1],wb[i])?p-1:p++;
}
if(p<tbc)dc3(rn,san,tbc,p);
else for(i=0;i<tbc;i++)san[rn[i]]=i;
for(i=0;i<tbc;++i){
if(san[i]<tb)wb[ta++]=san[i]*3;
}
if(len%3==1)wb[ta++]=len-1;
radix_sort(s,wb,wa,ta,A);
for(i=0;i<tbc;++i)wv[wb[i]=G(san[i])]=i;
for(i=0,j=0,p=0;i<ta&&j<tbc;++p){
sa[p]=c12(wb[j]%3,s,wa[i],wb[j])?wa[i++]:wb[j++];
}
for(;i<ta;++p)sa[p]=wa[i++];
for(;j<tbc;++p)sa[p]=wb[j++];
}
#undef F
#undef G
/*
DC3的輸入陣列s必須能儲存0~len的數值,所以設為int
s及sa陣列的大小必須大於3*maxn
*/
#include<algorithm>
#define radix_sort(x,y){\
for(i=0;i<A;++i)c[i]=0;\
for(i=0;i<len;++i)c[x[y[i]]]++;\
for(i=1;i<A;++i)c[i]+=c[i-1];\
for(i=len-1;i>=0;--i)sa[--c[x[y[i]]]]=y[i];\
}
#define sac(r,a,b) r[a]!=r[b]||a+k>=len||r[a+k]!=r[b+k]
inline void suffix_array(const char *s,int len,int *sa,int *rank,int *tmp,int *c){
int A='z'+1,i,k,id=0;
for(i=0;i<len;++i)rank[tmp[i]=i]=s[i];
radix_sort(rank,tmp);
for(k=1;id<len-1;k<<=1){
for(id=0,i=len-k;i<len;++i)tmp[id++]=i;
for(i=0;i<len;++i)if(sa[i]>=k)tmp[id++]=sa[i]-k;
radix_sort(rank,tmp);
std::swap(rank,tmp);
for(rank[sa[0]]=id=0,i=1;i<len;++i)
rank[sa[i]]=id+=sac(tmp,sa[i-1],sa[i]);
A=id+1;
}
}
#undef radix_sort
#undef sac
#include<algorithm>
struct CMP{
int len,k,*rank,a,b;
inline bool operator()(int i,int j){
if(rank[i]!=rank[j])return rank[i]<rank[j];
a=(i+=k)<len?rank[i]:-1;
b=(j+=k)<len?rank[j]:-1;
return a<b;
}
};
inline void suffix_array(const char *s,int len,int *sa,int *rank,int *tmp){
for(int i=0;i<len;++i){
sa[i]=i;
rank[i]=s[i];
}
CMP cmp={len,1};
for(;;cmp.k<<=1){
cmp.rank=rank;
std::sort(sa,sa+len,cmp);
tmp[sa[0]]=0;
for(int i=1;i<len;++i)tmp[sa[i]]=tmp[sa[i-1]]+cmp(sa[i-1],sa[i]);
if(tmp[sa[len-1]]==len-1)break;
std::swap(rank,tmp);
}
}
/*
h:高度數組
sa:後綴數組
rank:排名
*/
inline void suffix_array_lcp(const char *s,int len,int *h,int *sa,int *rank){
for(int i=0;i<len;++i)rank[sa[i]]=i;
for(int i=0,k=0;i<len;++i){
if(rank[i]==0)continue;
if(k)--k;
while(s[i+k]==s[sa[rank[i]-1]+k])++k;
h[rank[i]]=k;
}
h[0]=0;
}
#include<string.h>
#define MAGIC(XD){\
memset(sa,0,sizeof(int)*n);\
memcpy(x,c,sizeof(int)*z);\
XD\
memcpy(x+1,c,sizeof(int)*(z-1));\
for(i=0;i<n;++i){\
if(sa[i]&&!t[sa[i]-1])sa[x[s[sa[i]-1]]++]=sa[i]-1;\
}\
memcpy(x,c,sizeof(int)*z);\
for(i=n-1;i>=0;--i){\
if(sa[i]&&t[sa[i]-1])sa[--x[s[sa[i]-1]]]=sa[i]-1;\
}\
}
void sais(int *s,int *sa,int *p,bool *t,int *c,int n,int z){
bool neq=t[n-1]=1;
int nn=0,nmxz=-1,*q=sa+n,*ns=s+n,*x=c+z,lst=-1,i,j;
memset(c,0,sizeof(int)*z);
for(i=0;i<n;++i)++c[s[i]];
for(i=0;i<z-1;++i)c[i+1]+=c[i];
for(i=n-2;i>=0;i--)t[i]=(s[i]==s[i+1]?t[i+1]:s[i]<s[i+1]);
MAGIC(
for(i=1;i<n;++i){
if(t[i]&&!t[i-1])sa[--x[s[i]]]=p[q[i]=nn++]=i;
}
);
for(i=0;i<n;++i)if((j=sa[i])&&t[j]&&!t[j-1]){
neq=lst<0||memcmp(s+j,s+lst,(p[q[j]+1]-j)*sizeof(int));
ns[q[lst=j]]=nmxz+=neq;
}
if(nmxz==nn-1)for(i=0;i<nn;++i)q[ns[i]]=i;
else sais(ns,q,p+nn,t+n,c+z,nn,nmxz+1);
MAGIC(
for(i=nn-1;i>=0;--i)sa[--x[s[p[q[i]]]]]=p[q[i]];
);
}
#undef MAGIC
/*****************這些是需要用到的陣列大小**************/
static const int MXN=10000000;
int s[MXN*2+5],sa[MXN*2+5],p[MXN+5],c[MXN*2+5];
bool t[MXN*2+5];
/*SA-IS Algorithm*/
const unsigned char mask[]={0x80,0x40,0x20,0x10,0x08,0x04,0x02,0x01};
#define tget(i) ( (t[(i)/8]&mask[(i)%8])?1:0)
#define tset(i, b) t[(i)/8]=(b)?(mask[(i)%8]|t[(i)/8]):((~mask[(i)%8])&t[(i)/8])
#define chr(i) (cs==sizeof(int)?((int*)s)[i]:((unsigned char *)s)[i])
#define isLMS(i) (i>0&&tget(i)&&!tget(i-1))
// find the start or end of each bucket
inline void getBuckets(unsigned char *s,int *bkt,int n,int K,int cs,bool end){
int i,sum=0;
// clear all buckets
for(i=0;i<=K;++i)bkt[i]=0;
// compute the size of each bucket
for(i=0;i<n;++i)++bkt[chr(i)];
for(i=0;i<=K;++i){
sum+=bkt[i];
bkt[i]=end?sum:sum-bkt[i];
}
}
// compute SAl
inline void induceSAl(unsigned char *t,int *SA,unsigned char *s,int *bkt,int n,int K,int cs,bool end){
int i,j;
// find starts of buckets
getBuckets(s,bkt,n,K,cs,end);
for(i=0;i<n;++i){
j=SA[i]-1;
if(j>=0&&!tget(j))SA[bkt[chr(j)]++]=j;
}
}
// compute SAs
inline void induceSAs(unsigned char *t,int *SA,unsigned char *s,int *bkt,int n,int K,int cs,bool end){
int i,j;
// find ends of buckets
getBuckets(s,bkt,n,K,cs,end);
for(i=n-1;i>=0;--i){
j=SA[i]-1;
if(j>=0&&tget(j))SA[--bkt[chr(j)]]=j;
}
}
// find the suffix array SA of s[0..n-1] in {1..K}?n
// require s[n-1]=0 (the sentinel!), n>=2
// use a working space (excluding s and SA) of
// at most 2.25n+O(1) for a constant alphabet
void SA_IS(unsigned char *s,int *SA,int n,int K,short cs){
// LS-type array in bits
unsigned char *t=(unsigned char *)malloc(n/8+1);
int i,j;
// classify the type of each character
// the sentinel must be in s1, important!!!
tset(n-2,0);tset(n-1,1);
for(i=n-3;i>=0;--i)
tset(i,(chr(i)<chr(i+1)||(chr(i)==chr(i+1)&&tget(i+1)==1))?1:0);
// stage 1: reduce the problem by at least 1/2
// sort all the S-substrings
// bucket array
int *bkt=(int *)malloc(sizeof(int)*(K+1));
// find ends of buckets
getBuckets(s,bkt,n,K,cs,true);
for(i=0;i<n;++i)SA[i]=-1;
for(i=1;i<n;++i)if(isLMS(i))SA[--bkt[chr(i)]]=i;
induceSAl(t,SA,s,bkt,n,K,cs,false);
induceSAs(t,SA,s,bkt,n,K,cs,true);
free(bkt);
// compact all the sorted substrings into
// the first n1 items of SA
// 2*n1 must be not larger than n (proveable)
int n1=0;
for(i=0;i<n;++i)if(isLMS(SA[i]))SA[n1++]=SA[i];
// find the lexicographic names of substrings
// init the name array buffer
for(i=n1;i<n;++i)SA[i]=-1;
int name=0,prev=-1;
for(i=0;i<n1;++i){
int pos=SA[i];bool diff=false;
for(int d=0;d<n;++d)
if(prev==-1||chr(pos+d)!=chr(prev+d)||tget(pos+d)!=tget(prev+d)){
diff=true;break;
}else if(d>0&&(isLMS(pos+d)||isLMS(prev+d)))break;
if(diff){
++name;prev=pos;
}
pos=(pos%2==0)?pos/2:(pos-1)/2;
SA[n1+pos]=name-1;
}
for(i=n-1,j=n-1;i>=n1;--i)if(SA[i]>=0)SA[j--]=SA[i];
// stage 2: solve the reduced problem
// recurse if names are not yet unique
int *SA1=SA,*s1=SA+n-n1;
if(name<n1)SA_IS((unsigned char*)s1,SA1,n1,name-1,sizeof(int));
else // generate the suffix array of s1 directly
for(i=0;i<n1;++i)SA1[s1[i]]=i;
// stage 3: induce the result for
// the original problem
// bucket array
bkt=(int *)malloc(sizeof(int)*(K+1));
// put all the LMS characters into their buckets
// find ends of buckets
getBuckets(s,bkt,n,K,cs,true);
for(i=1,j=0;i<n;++i)if(isLMS(i))s1[j++]=i; // get p1
// get index in s
for(i=0;i<n1;++i)SA1[i]=s1[SA1[i]];
// init SA[n1..n-1]
for(i=n1;i<n;++i)SA[i]=-1;
for(i=n1-1;i>=0;--i){
j=SA[i];SA[i]=-1;
SA[--bkt[chr(j)]]=j;
}
induceSAl(t,SA,s,bkt,n,K,cs,false);
induceSAs(t,SA,s,bkt,n,K,cs,true);
free(bkt);free(t);
}
#include <time.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#ifndef UCHAR_SIZE
# define UCHAR_SIZE 256
#endif
#ifndef MINBUCKETSIZE
# define MINBUCKETSIZE 256
#endif
#define sais_index_type int
#define sais_bool_type int
#define SAIS_LMSSORT2_LIMIT 0x3fffffff
#define SAIS_MYMALLOC(_num, _type) ((_type *)malloc((_num) * sizeof(_type)))
#define chr(_a) (cs == sizeof(sais_index_type) ? ((sais_index_type *)T)[(_a)] : ((unsigned char *)T)[(_a)])
/* find the start or end of each bucket */
inline void getCounts(const void *T,sais_index_type *C,sais_index_type n,sais_index_type k,int cs){
sais_index_type i;
for(i=0;i<k;++i)C[i]=0;
for(i=0;i<n;++i)++C[chr(i)];
}
inline void getBuckets(const sais_index_type *C,sais_index_type *B,sais_index_type k,sais_bool_type end){
sais_index_type i,sum=0;
if(end)for(i=0;i<k;++i){
sum+=C[i];
B[i]=sum;
}else for(i=0;i<k;++i){
sum+=C[i];
B[i]=sum-C[i];
}
}
/* sort all type LMS suffixes */
inline void LMSsort1(const void *T,sais_index_type *SA,sais_index_type *C,sais_index_type *B,sais_index_type n,sais_index_type k,int cs){
sais_index_type *b,i,j;
sais_index_type c0,c1;
/* compute SAl */
if(C==B)getCounts(T,C,n,k,cs);
getBuckets(C,B,k,0); /* find starts of buckets */
j=n-1;
b=SA+B[c1=chr(j)];
--j;
*b++=(chr(j)<c1)?~j:j;
for(i=0;i<n;++i){
if(0<(j=SA[i])){
if((c0=chr(j))!=c1){
B[c1]=b-SA;
b=SA+B[c1=c0];
}
--j;
*b++=(chr(j)<c1)?~j:j;
SA[i]=0;
}else if(j<0)SA[i]=~j;
}
/* compute SAs */
if(C==B)getCounts(T,C,n,k,cs);
getBuckets(C,B,k,1); /* find ends of buckets */
for(i=n-1,b=SA+B[c1=0];0<=i;--i){
if(0<(j=SA[i])){
if((c0=chr(j))!=c1){
B[c1]=b-SA;
b=SA+B[c1=c0];
}
--j;
*--b=(chr(j)>c1)?~(j+1):j;
SA[i]=0;
}
}
}
inline sais_index_type LMSpostproc1(const void *T,sais_index_type *SA,sais_index_type n,sais_index_type m,int cs){
sais_index_type i,j,p,q,plen,qlen,name;
sais_index_type c0,c1;
sais_bool_type diff;
/* compact all the sorted substrings into the first m items of SA
2*m must be not larger than n (proveable) */
for(i=0;(p=SA[i])<0;++i)SA[i]=~p;
if(i<m){
for(j=i,++i;;++i){
if((p=SA[i])<0){
SA[j++]=~p;SA[i]=0;
if(j==m)break;
}
}
}
/* store the length of all substrings */
i=n-1;j=n-1;c0=chr(n-1);
do{
c1=c0;
}while((0<=--i)&&((c0=chr(i))>=c1));
for(;0<=i;){
do{
c1=c0;
}while((0<=--i)&&((c0=chr(i))<=c1));
if(0<=i){
SA[m+((i+1)>>1)]=j-i;
j=i+1;
do{
c1=c0;
}while((0<=--i)&&((c0=chr(i))>=c1));
}
}
/* find the lexicographic names of all substrings */
for(i=0,name=0,q=n,qlen=0;i<m;++i){
p=SA[i],plen=SA[m+(p>>1)],diff=1;
if((plen==qlen)&&((q+plen)<n)){
for(j=0;(j<plen)&&(chr(p+j)==chr(q+j));++j);
if(j==plen)diff=0;
}
if(diff!=0)++name,q=p,qlen=plen;
SA[m+(p>>1)]=name;
}
return name;
}
inline void LMSsort2(const void *T,sais_index_type *SA,sais_index_type *C,sais_index_type *B,sais_index_type *D,sais_index_type n,sais_index_type k,int cs){
sais_index_type *b,i,j,t,d;
sais_index_type c0,c1;
/* compute SAl */
getBuckets(C,B,k,0); /* find starts of buckets */
j=n-1;
b=SA+B[c1=chr(j)];
--j;
t=(chr(j)<c1);
j+=n;
*b++=(t&1)?~j:j;
for(i=0,d=0;i<n;++i){
if(0<(j=SA[i])){
if(n<=j)d+=1,j-=n;
if((c0=chr(j))!=c1){
B[c1]=b-SA;
b=SA+B[c1=c0];
}
--j;
t=c0;t=(t<<1)|(chr(j)<c1);
if(D[t]!=d)j+=n,D[t]=d;
*b++=(t&1)?~j:j;
SA[i]=0;
}else if(j<0)SA[i]=~j;
}
for(i=n-1;0<=i;--i){
if(0<SA[i]){
if(SA[i]<n){
SA[i]+=n;
for(j=i-1;SA[j]<n;--j);
SA[j]-=n;
i=j;
}
}
}
/* compute SAs */
getBuckets(C,B,k,1); /* find ends of buckets */
for(i=n-1,d+=1,b=SA+B[c1=0];0<=i;--i){
if(0<(j=SA[i])){
if(n<=j)d+=1,j-=n;
if((c0=chr(j))!=c1){
B[c1]=b-SA;
b=SA+B[c1=c0];
}
--j;
t=c0;t=(t<<1)|(chr(j)>c1);
if(D[t]!=d)j+=n,D[t]=d;
*--b=(t&1)?~(j+1):j;
SA[i]=0;
}
}
}
inline sais_index_type LMSpostproc2(sais_index_type *SA,sais_index_type n,sais_index_type m){
sais_index_type i,j,d,name;
/* compact all the sorted LMS substrings into the first m items of SA */
for(i=0,name=0;(j=SA[i])<0;++i){
j=~j;
if(n<=j)name+=1;
SA[i]=j;
}
if(i<m){
for(d=i,++i;;++i){
if((j=SA[i])<0){
j=~j;
if(n<=j)name+=1;
SA[d++]=j;SA[i]=0;
if(d==m)break;
}
}
}
if(name<m){
/* store the lexicographic names */
for(i=m-1,d=name+1;0<=i;--i){
if(n<=(j=SA[i]))j-=n,--d;
SA[m+(j>>1)]=d;
}
}else{
/* unset flags */
for(i=0;i<m;++i){
if(n<=(j=SA[i]))j-=n,SA[i]=j;
}
}
return name;
}
/* compute SA and BWT */
inline void induceSA(const void *T,sais_index_type *SA,sais_index_type *C,sais_index_type *B,sais_index_type n,sais_index_type k,int cs){
sais_index_type *b,i,j;
sais_index_type c0,c1;
/* compute SAl */
if(C==B)getCounts(T, C, n, k, cs);
getBuckets(C,B,k,0); /* find starts of buckets */
j =n-1;
b=SA+B[c1=chr(j)];
*b++=((0<j)&&(chr(j-1)<c1))?~j:j;
for(i=0;i<n;++i){
j=SA[i],SA[i]=~j;
if(0<j){
--j;
if((c0=chr(j))!=c1){
B[c1]=b-SA;
b=SA+B[c1=c0];
}
*b++=((0<j)&&(chr(j-1)<c1))?~j:j;
}
}
/* compute SAs */
if(C==B)getCounts(T,C,n,k,cs);
getBuckets(C,B,k,1); /* find ends of buckets */
for(i=n-1,b=SA+B[c1=0];0<=i;--i){
if(0<(j=SA[i])){
--j;
if((c0=chr(j))!=c1){
B[c1]=b-SA;
b=SA+B[c1=c0];
}
*--b=((j==0)||(chr(j-1)>c1))?~j:j;
}else SA[i]=~j;
}
}
inline sais_index_type computeBWT(const void *T,sais_index_type *SA,sais_index_type *C,sais_index_type *B,sais_index_type n,sais_index_type k,int cs){
sais_index_type *b,i,j,pidx=-1;
sais_index_type c0,c1;
/* compute SAl */
if(C==B)getCounts(T, C, n, k, cs);
getBuckets(C,B,k,0); /* find starts of buckets */
j=n-1;
b=SA+B[c1=chr(j)];
*b++=((0<j)&&(chr(j-1)<c1))?~j:j;
for(i=0;i<n;++i){
if(0<(j=SA[i])){
--j;
SA[i]=~((sais_index_type)(c0 = chr(j)));
if(c0!=c1){
B[c1]=b-SA;
b=SA+B[c1=c0];
}
*b++=((0<j)&&(chr(j-1)<c1))?~j:j;
}else if(j!=0)SA[i]=~j;
}
/* compute SAs */
if(C==B)getCounts(T,C,n,k,cs);
getBuckets(C,B,k,1); /* find ends of buckets */
for(i=n-1,b=SA+B[c1=0];0<=i;--i){
if(0<(j=SA[i])){
--j;
SA[i]=(c0=chr(j));
if(c0!=c1){
B[c1]=b-SA;
b=SA+B[c1=c0];
}
*--b=((0<j)&&(chr(j-1)>c1))?~((sais_index_type)chr(j-1)):j;
}else if(j!=0)SA[i]=~j;
else pidx=i;
}
return pidx;
}
/* find the suffix array SA of T[0..n-1] in {0..255}^n */
sais_index_type sais_main(const void *T,sais_index_type *SA,sais_index_type fs,sais_index_type n,sais_index_type k,int cs,sais_bool_type isbwt){
sais_index_type *C,*B,*D,*RA,*b;
sais_index_type i,j,m,p,q,t,name,pidx=0,newfs;
sais_index_type c0,c1;
unsigned int flags;
if(k<=MINBUCKETSIZE){
if((C=SAIS_MYMALLOC(k,sais_index_type))==NULL)return -2;
if(k<=fs){
B=SA+(n+fs-k);
flags=1;
}else{
if((B=SAIS_MYMALLOC(k,sais_index_type))==NULL){
free(C);
return -2;
}
flags=3;
}
}else if(k<=fs){
C=SA+(n+fs-k);
if(k<=(fs-k)){
B=C-k;
flags=0;
}else if(k<=(MINBUCKETSIZE*4)){
if((B=SAIS_MYMALLOC(k,sais_index_type))==NULL)return -2;
flags=2;
}else{
B=C;
flags=8;
}
}else{
if((C=B=SAIS_MYMALLOC(k,sais_index_type))==NULL)return -2;
flags=4|8;
}
if((n<=SAIS_LMSSORT2_LIMIT)&&(2<=(n/k))){
if(flags&1)flags|=((k*2)<=(fs-k))?32:16;
else if((flags==0)&&((k*2)<=(fs-k*2)))flags|=32;
}
/* stage 1: reduce the problem by at least 1/2
sort all the LMS-substrings */
getCounts(T,C,n,k,cs);getBuckets(C,B,k,1); /* find ends of buckets */
for(i=0;i<n;++i)SA[i]=0;
b=&t;i=n-1;j=n;m=0;c0=chr(n-1);
do{
c1=c0;
}while((0<=--i)&&((c0=chr(i))>=c1));
for(;0<=i;){
do{
c1=c0;
}while((0<=--i)&&((c0=chr(i))<=c1));
if(0<=i){
*b=j;b=SA+--B[c1];j=i;++m;
do{
c1=c0;
}while((0<=--i)&&((c0=chr(i))>=c1));
}
}
if(1<m){
if(flags&(16|32)){
if(flags&16){
if((D=SAIS_MYMALLOC(k*2,sais_index_type))==NULL){
if(flags&(1|4))free(C);
if(flags&2)free(B);
return -2;
}
}else D=B-k*2;
++B[chr(j+1)];
for(i=0,j=0;i<k;++i){
j+=C[i];
if(B[i]!=j)SA[B[i]]+=n;
D[i]=D[i+k]=0;
}
LMSsort2(T,SA,C,B,D,n,k,cs);
name=LMSpostproc2(SA,n,m);
if(flags&16)free(D);
}else{
LMSsort1(T,SA,C,B,n,k,cs);
name=LMSpostproc1(T,SA,n,m,cs);
}
}else if(m==1){
*b=j+1;
name=1;
}else name = 0;
/* stage 2: solve the reduced problem
recurse if names are not yet unique */
if(name<m){
if(flags&4)free(C);
if(flags&2)free(B);
newfs=(n+fs)-(m*2);
if((flags&(1|4|8))==0){
if((k+name)<=newfs)newfs-=k;
else flags|=8;
}
RA=SA+m+newfs;
for(i=m+(n>>1)-1,j=m-1;m<=i;--i){
if(SA[i]!=0)RA[j--]=SA[i]-1;
}
if(sais_main(RA,SA,newfs,m,name,sizeof(sais_index_type),0)!=0){
if(flags&1)free(C);
return -2;
}
i=n-1;j=m-1;c0=chr(n-1);
do{
c1=c0;
}while((0<=--i)&&((c0=chr(i))>=c1));
for(;0<=i;){
do{
c1=c0;
}while((0<=--i)&&((c0=chr(i))<=c1));
if(0<=i){
RA[j--]=i+1;
do{
c1=c0;
}while((0<=--i)&&((c0=chr(i))>=c1));
}
}
for(i=0;i<m;++i)SA[i]=RA[SA[i]];
if(flags&4){
if((C=B=SAIS_MYMALLOC(k,int))==NULL)return -2;
}
if(flags&2){
if((B=SAIS_MYMALLOC(k,int))==NULL){
if(flags&1)free(C);
return -2;
}
}
}
/* stage 3: induce the result for the original problem */
if(flags&8)getCounts(T,C,n,k,cs);
/* put all left-most S characters into their buckets */
if(1<m){
getBuckets(C,B,k,1); /* find ends of buckets */
i=m-1,j=n,p=SA[m-1],c1=chr(p);
do{
q=B[c0=c1];
while(q<j)SA[--j]=0;
do{
SA[--j]=p;
if(--i<0)break;
p=SA[i];
}while((c1=chr(p))==c0);
}while(0<=i);
while(0<j)SA[--j]=0;
}
if(isbwt==0)induceSA(T,SA,C,B,n,k,cs);
else pidx=computeBWT(T,SA,C,B,n,k,cs);
if(flags&(1|4))free(C);
if(flags&2)free(B);
return pidx;
}
/*---------------------------------------------------------------------------*/
inline int sais(const unsigned char *T,int *SA,int n){
if((T==NULL)||(SA==NULL)||(n<0))return -1;
if(n<=1){
if(n==1)SA[0]=0;
return 0;
}
return sais_main(T,SA,0,n,UCHAR_SIZE,sizeof(unsigned char),0);
}
inline int sais_int(const int *T,int *SA,int n,int k){
if((T==NULL)||(SA==NULL)||(n<0)||(k<=0))return -1;
if(n<=1){
if(n==1)SA[0]=0;
return 0;
}
return sais_main(T,SA,0,n,k,sizeof(int),0);
}
inline int sais_bwt(const unsigned char *T,unsigned char *U,int *A,int n){
int i,pidx;
if((T==NULL)||(U==NULL)||(A==NULL)||(n<0))return -1;
if(n<=1){
if(n==1)U[0]=T[0];
return n;
}
pidx=sais_main(T,A,0,n,UCHAR_SIZE,sizeof(unsigned char),1);
if(pidx<0)return pidx;
U[0]=T[n-1];
for(i=0;i<pidx;++i)U[i+1]=(unsigned char)A[i];
for(i+=1;i<n;++i)U[i]=(unsigned char)A[i];
pidx+=1;
return pidx;
}
inline int sais_int_bwt(const int *T,int *U,int *A,int n,int k){
int i,pidx;
if((T==NULL)||(U==NULL)||(A==NULL)||(n<0)||(k<=0))return -1;
if(n<=1){
if(n==1)U[0]=T[0];
return n;
}
pidx=sais_main(T,A,0,n,k,sizeof(int),1);
if(pidx<0)return pidx;
U[0]=T[n-1];
for(i=0;i<pidx;++i)U[i+1]=A[i];
for(i+=1;i<n;++i)U[i]=A[i];
pidx+=1;
return pidx;
}
char s[10000005];
int sa[10000005],len;
int main(){
freopen("in.txt","r",stdin);
gets(s);
int st=clock();
sais((unsigned char*)s,sa,len=strlen(s));
printf("%f\n",(double)(clock()-st)/1000);
//for(int i=0;i<len;++i)printf("%d\n",sa[i]);
return 0;
}
#define pushS(x) sa[--bkt[s[x]]] = x
#define pushL(x) sa[bkt[s[x]]++] = x
#define induce_sort(v){\
fill_n(sa,n,0);\
copy_n(_bkt,A,bkt);\
for(i=n1-1;~i;--i)pushS(v[i]);\
copy_n(_bkt,A-1,bkt+1);\
for(i=0;i<n;++i)if(sa[i]&&!t[sa[i]-1])pushL(sa[i]-1);\
copy_n(_bkt,A,bkt);\
for(i=n-1;~i;--i)if(sa[i]&&t[sa[i]-1])pushS(sa[i]-1);\
}
template<typename T>
void sais(const T s,int n,int *sa,int *_bkt,int *p,bool *t,int A){
int *rnk=p+n,*s1=p+n/2,*bkt=_bkt+A;
int n1=0,i,j,x=t[n-1]=1,y=rnk[0]=-1,cnt=-1;
for(i=n-2;~i;--i)t[i]=(s[i]==s[i+1]?t[i+1]:s[i]<s[i+1]);
for(i=1;i<n;++i)rnk[i]=t[i]&&!t[i-1]?(p[n1]=i,n1++):-1;
fill_n(_bkt,A,0);
for(i=0;i<n;++i)++_bkt[s[i]];
for(i=1;i<A;++i)_bkt[i]+=_bkt[i-1];
induce_sort(p);
for(i=0;i<n;++i)if(~(x=rnk[sa[i]]))
j=y<0||memcmp(s+p[x],s+p[y],(p[x+1]-p[x])*sizeof(s[0]))
,s1[y=x]=cnt+=j;
if(cnt+1<n1)sais(s1,n1,sa,bkt,rnk,t+n,cnt+1);
else for(i=0;i<n1;++i)sa[s1[i]]=i;
for(i=0;i<n1;++i)s1[i]=p[sa[i]];
induce_sort(s1);
}
/*****************這些是需要用到的陣列大小**************/
const int MAXN=10000005,MAXA='z'+1;
int sa[MAXN],bkt[MAXN+MAXA],p[MAXN*2];
bool t[MAXN*2];
char s[MAXN];
@avis9ditiu
Copy link

In SA-IS very fast.cpp, i don't get var flags stand for, can you explain?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment