Skip to content

Instantly share code, notes, and snippets.

@anta0
Last active August 30, 2016 08:49
Show Gist options
  • Save anta0/6574318 to your computer and use it in GitHub Desktop.
Save anta0/6574318 to your computer and use it in GitHub Desktop.
1.1つ目のSA-ISの実装
//LICENSE: CC0 (Public Domain)
#include <vector>
#include <algorithm>
class SuffixArray {
public:
typedef int Index;
//Verified: SPOJ SARRAY (0.49 / 2.8M)
template<typename AlphaT>
void build(const AlphaT *str, Index n, int AlphaSize) {
suffixArray.resize(n+1);
sa_is<AlphaT>(str, n, AlphaSize, &suffixArray[0]);
//clear-and-minimize
std::vector<Index>().swap(alphabetCounts);
std::vector<Index>().swap(bucketOffsets);
}
template<typename AlphaT>
void build(const AlphaT *str, Index n) {
build(str, n, *std::max_element(str, str + n)+1);
}
private:
std::vector<Index> suffixArray;
std::vector<Index> alphabetCounts, bucketOffsets;
//strは[0,n)が有効で番兵は含まれない。saは[0,n]が有効
template<typename AlphaT>
void sa_is(const AlphaT *str, Index n, int AlphaSize, Index *sa) {
std::vector<bool> types(n+1);
types[n-1] = 0; types[n] = 1;
for(Index i = n-2; i >= 0; i --)
types[i] = str[i] < str[i+1] || (str[i] == str[i+1] && types[i+1]);
if((int)alphabetCounts.size() < AlphaSize) {
alphabetCounts.resize(AlphaSize);
bucketOffsets.resize(AlphaSize);
}
countingAlphabets(str, n, AlphaSize);
getBucketOffsets(true, AlphaSize);
std::fill(sa, sa + n + 1, -1);
for(Index i = 1; i < n; i ++)
if(types[i] && !types[i-1]) sa[-- bucketOffsets[str[i]]] = i;
sa[0] = n;
inducedSort(str, n, AlphaSize, types, sa);
Index n1 = 0;
for(Index i = 0; i <= n; i ++) {
Index j = sa[i];
if(j > 0 && types[j] && !types[j-1]) sa[n1 ++] = j;
}
//LMS substringsを番号付けする。sa[0..n1-1]にソートされている。
//メモリのためにsaの右半分をバッファに利用する。
//さらにそこでposの順序で整数ソートすることを同時に行う。
//ここでLMS substringが連続して現れないことやLMS substringの数がn/2以下であることを利用してなんとか1つの配列でやる
Index *buffer = sa + n1;
std::fill(buffer, sa + n + 1, -1);
Index uniqueLMSCount = 0, prevPos = -1;
assert(sa[0] == n);
buffer[sa[0] / 2] = uniqueLMSCount ++; //'$'
for(Index i = 1; i < n1; i ++) {
Index pos = sa[i]; bool diff = false;
if(prevPos == -1) diff = true;
else for(Index j = pos, k = prevPos; ; j ++, k ++) {
if(str[j] != str[k] || types[j] != types[k]) {
diff = true;
break;
}else if(j != pos && ((types[j] && !types[j-1]) || (types[k] && !types[k-1])))
break;
}
if(diff) {
uniqueLMSCount ++;
prevPos = pos;
}
buffer[pos / 2] = uniqueLMSCount - 1;
}
for(Index i = n, j = n; i >= n1; i --)
if(sa[i] >= 0) sa[j --] = sa[i];
Index *sa1 = sa, *s1 = sa + n + 1 - n1;
if(uniqueLMSCount == n1)
for(Index i = 0; i < n1; i ++) sa1[s1[i]] = i;
else
sa_is<Index>(s1, n1 - 1, uniqueLMSCount, sa1);
countingAlphabets(str, n, AlphaSize);
getBucketOffsets(true, AlphaSize);
for(Index i = 1, j = 0; i <= n; i ++)
if(types[i] && !types[i-1]) s1[j ++] = i;
for(Index i = 0; i < n1; i ++) sa1[i] = s1[sa1[i]];
std::fill(sa + n1, sa + n + 1, -1);
for(Index i = n1-1; i >= 1; i --) {
Index j = sa[i]; sa[i] = -1;
sa[-- bucketOffsets[str[j]]] = j;
}
inducedSort(str, n, AlphaSize, types, sa);
}
template<typename AlphaT>
void inducedSort(const AlphaT *str, Index n, int AlphaSize, const std::vector<bool> &types, Index *sa) {
getBucketOffsets(false, AlphaSize);
for(Index i = 0; i < n; i ++) {
Index j = sa[i] - 1;
if(j >= 0 && !types[j]) sa[bucketOffsets[str[j]] ++] = j;
}
getBucketOffsets(true, AlphaSize);
for(Index i = n; i >= 1; i --) {
Index j = sa[i] - 1;
if(j >= 0 && types[j]) sa[-- bucketOffsets[str[j]]] = j;
}
}
template<typename AlphaT>
void countingAlphabets(const AlphaT *str, Index n, int AlphaSize) {
std::fill(alphabetCounts.begin(), alphabetCounts.end(), 0);
for(Index i = 0; i < n; i ++)
alphabetCounts[str[i]] ++;
}
void getBucketOffsets(bool dir, int AlphaSize) {
Index cumsum = 1; //'$'の分
if(dir) for(int i = 0; i < AlphaSize; i ++) {
cumsum += alphabetCounts[i];
bucketOffsets[i] = cumsum;
}else for(int i = 0; i < AlphaSize; i ++) {
bucketOffsets[i] = cumsum;
cumsum += alphabetCounts[i];
}
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment