Created
July 13, 2013 05:27
-
-
Save aaani/5989526 to your computer and use it in GitHub Desktop.
Here is an implementation for Suffix array data structure. The method that I have used for creating the suffix array is known as Prefix-doubling which results in a worst time complexity of O(n*log(n)) where n is the length of the input string. Algorithm details and numerous applications could be found here: http://www.stanford.edu/class/cs97si/s…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// SuffixArray | |
// | |
// Created by Anirvana Mishra on 7/1/13. | |
// Copyright (c) 2013 Anirvana Mishra. All rights reserved. | |
// Code inspired by http://www.stanford.edu/class/cs97si/suffix-array.pdf | |
#include <algorithm> | |
#include <iostream> | |
#include <vector> | |
using namespace std; | |
struct entry { | |
pair<int, int> sortKey; | |
int position; | |
}; | |
int comparator(entry a, entry b){ | |
//bi-level sort! | |
return (a.sortKey.first == b.sortKey.first) ? (a.sortKey.second < b.sortKey.second ? 1 : 0) : (a.sortKey.first < b.sortKey.first ? 1 : 0); | |
} | |
vector<int> suffixArray(string A){ | |
vector<vector<int>> sufArray(A.size(),vector<int>(A.size())); | |
vector<int> lastRowsufArray(A.size()); | |
int step,N=0,i,prefixLen; | |
entry *L=new entry[A.size()]; | |
for (N = (int)A.size(), i = 0; i < N; i ++) sufArray[0][i] = A[i]; //Assign a bucket to each character | |
for (step = 1, prefixLen = 1; prefixLen < N; step ++, prefixLen= prefixLen*2) { //prefix doubling | |
//Update sort keys for each entry based on previous iteration | |
for (i = 0; i < N; i++) { | |
L[i].sortKey.first = sufArray[step - 1][i]; | |
L[i].sortKey.second = (i + prefixLen < N) ? sufArray[step - 1][i + prefixLen] : -1; //If this is the end assign the smallest possible key -1 | |
L[i].position = i; //Save current position before sorting | |
} | |
//Count sort all the entries | |
sort(L, L + N, comparator); | |
//Reassign buckets to each entry based on sorting result | |
for (i = 0; i < N; i ++) | |
sufArray[step][L[i].position] = ((i > 0) && (L[i].sortKey.first == L[i - 1].sortKey.first) && (L[i].sortKey.second == L[i - 1].sortKey.second)) ? sufArray[step][L[i - 1].position] : i; //key => value where key is the prefix and value its index in sorted array | |
} | |
for(int i=0;i<A.size();i++) lastRowsufArray[i]=sufArray[step-1][i]; //We are actualy going to return only the last row of the 2d suffix array | |
return lastRowsufArray; | |
} | |
int main(void) { //Driver | |
string A="banana"; | |
vector<int> sufArray(A.size()); | |
sufArray=suffixArray("banana"); | |
for(int k=0;k<sufArray.size();k++) cout<<sufArray[k]<<" "; | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment