Instantly share code, notes, and snippets.

calmhandtitan/SARRAY_nlogn.cpp Created Dec 25, 2013

Suffix Array Construction in O(nlogn) using Manber and Myers algo and linear time LCP array construction
 #include "stdio.h" #include "stdlib.h" #include "string.h" #include "algorithm" using namespace std; // Begins Suffix Arrays implementation // O(n log n) - Manber and Myers algorithm // Refer to "Suffix arrays: A new method for on-line txting searches", // by Udi Manber and Gene Myers //Usage: // Fill txt with the characters of the txting. // Call SuffixSort(n), where n is the length of the txting stored in txt. // That's it! //Output: // SA = The suffix array. Contains the n suffixes of txt sorted in lexicographical order. // Each suffix is represented as a single integer (the SAition of txt where it starts). // iSA = The inverse of the suffix array. iSA[i] = the index of the suffix txt[i..n) // in the SA array. (In other words, SA[i] = k <==> iSA[k] = i) // With this array, you can compare two suffixes in O(1): Suffix txt[i..n) is smaller // than txt[j..n) if and only if iSA[i] < iSA[j] const int MAX = 100010; char txt[MAX]; //input int iSA[MAX], SA[MAX]; //output int cnt[MAX], next[MAX]; //internal bool bh[MAX], b2h[MAX]; // Compares two suffixes according to their first characters bool smaller_first_char(int a, int b){ return txt[a] < txt[b]; } void suffixSort(int n){ //sort suffixes according to their first characters for (int i=0; i= 0){ int head = iSA[s]; iSA[s] = head + cnt[head]++; b2h[iSA[s]] = true; } } for (int j = i; j < next[i]; ++j){ int s = SA[j] - h; if (s >= 0 && b2h[iSA[s]]){ for (int k = iSA[s]+1; !bh[k] && b2h[k]; k++) b2h[k] = false; } } } for (int i=0; i 0) { int j = SA[iSA[i]-1]; while (i + h < n && j + h < n && txt[i+h] == txt[j+h]) h++; lcp[iSA[i]] = h; if (h > 0) h--; } } } // End of longest common prefixes algorithm int main() { int len; gets(txt); len = strlen(txt); suffixSort(len); for (int i = 0; i < len; ++i) { printf("%d\n",SA[i] ); } return 0; }

pktiw commented Oct 15, 2016

 Please explain the SA part, specially the linear time sorting, because of which the code is nlgn.