Skip to content

Instantly share code, notes, and snippets.

@aaani
Created July 13, 2013 05:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aaani/5989526 to your computer and use it in GitHub Desktop.
Save aaani/5989526 to your computer and use it in GitHub Desktop.
Here is an implementation for Suffix array data structure. The method that I have used for creating the suffix array is known as Prefix-doubling which results in a worst time complexity of O(n*log(n)) where n is the length of the input string. Algorithm details and numerous applications could be found here: http://www.stanford.edu/class/cs97si/s…
// SuffixArray
//
// Created by Anirvana Mishra on 7/1/13.
// Copyright (c) 2013 Anirvana Mishra. All rights reserved.
// Code inspired by http://www.stanford.edu/class/cs97si/suffix-array.pdf
#include <algorithm>
#include <iostream>
#include <vector>
using namespace std;
struct entry {
pair<int, int> sortKey;
int position;
};
int comparator(entry a, entry b){
//bi-level sort!
return (a.sortKey.first == b.sortKey.first) ? (a.sortKey.second < b.sortKey.second ? 1 : 0) : (a.sortKey.first < b.sortKey.first ? 1 : 0);
}
vector<int> suffixArray(string A){
vector<vector<int>> sufArray(A.size(),vector<int>(A.size()));
vector<int> lastRowsufArray(A.size());
int step,N=0,i,prefixLen;
entry *L=new entry[A.size()];
for (N = (int)A.size(), i = 0; i < N; i ++) sufArray[0][i] = A[i]; //Assign a bucket to each character
for (step = 1, prefixLen = 1; prefixLen < N; step ++, prefixLen= prefixLen*2) { //prefix doubling
//Update sort keys for each entry based on previous iteration
for (i = 0; i < N; i++) {
L[i].sortKey.first = sufArray[step - 1][i];
L[i].sortKey.second = (i + prefixLen < N) ? sufArray[step - 1][i + prefixLen] : -1; //If this is the end assign the smallest possible key -1
L[i].position = i; //Save current position before sorting
}
//Count sort all the entries
sort(L, L + N, comparator);
//Reassign buckets to each entry based on sorting result
for (i = 0; i < N; i ++)
sufArray[step][L[i].position] = ((i > 0) && (L[i].sortKey.first == L[i - 1].sortKey.first) && (L[i].sortKey.second == L[i - 1].sortKey.second)) ? sufArray[step][L[i - 1].position] : i; //key => value where key is the prefix and value its index in sorted array
}
for(int i=0;i<A.size();i++) lastRowsufArray[i]=sufArray[step-1][i]; //We are actualy going to return only the last row of the 2d suffix array
return lastRowsufArray;
}
int main(void) { //Driver
string A="banana";
vector<int> sufArray(A.size());
sufArray=suffixArray("banana");
for(int k=0;k<sufArray.size();k++) cout<<sufArray[k]<<" ";
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment