Skip to content

Instantly share code, notes, and snippets.

@bicepjai
Created August 18, 2012 10:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bicepjai/3385853 to your computer and use it in GitHub Desktop.
Save bicepjai/3385853 to your computer and use it in GitHub Desktop.
naive Suffix Array Implementation
import java.util.*;
public class simplesa {
private final String[] suffixes;
private final int N;
private int[] sa,lcp,nofsubs,nofusubs,nofusubs_acc;
public simplesa(String s) {
N = s.length();
suffixes = new String[N];
sa = new int[N];
lcp = new int[N];
nofsubs = new int[N];
nofusubs = new int[N];
nofusubs_acc = new int[N];
for (int i = 0; i < N; i++)
suffixes[i] = s.substring(i);
Arrays.sort(suffixes);
sa[0] = this.index(0);
lcp[0] = 0;
nofsubs[0] = N - sa[0];
nofusubs[0] = nofsubs[0];
nofusubs_acc[0] = nofsubs[0];
for (int i = 1; i < s.length(); i++) {
sa[i] = this.index(i);
lcp[i] = lcp(i);
nofsubs[i] = N - sa[i];
nofusubs[i] = nofsubs[i] - lcp[i];
nofusubs_acc[i] = nofusubs_acc[i-1] + nofusubs[i];
}
}
// index of ith sorted suffix
public int index(int i) { return N - suffixes[i].length(); }
// length of longest common prefix of s and t
private static int lcp(String s, String t) {
int N = Math.min(s.length(), t.length());
for (int i = 0; i < N; i++)
if (s.charAt(i) != t.charAt(i)) return i;
return N;
}
// longest common prefix of suffixes(i) and suffixes(i-1)
public int lcp(int i) {
return lcp(suffixes[i], suffixes[i-1]);
}
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
int n = sc.nextInt();
sc.nextLine();
StringBuilder sb = new StringBuilder();
while(n-- > 0){
sb.append(sc.nextLine());
}
String generalized = sb.toString();
//String generalized = sc.nextLine();
simplesa suffix = new simplesa(generalized);
for (int i = 0; i < generalized.length(); i++) {
System.out.println(" "+i+" "+suffix.sa[i]+" "+suffix.lcp[i]
+" "+suffix.nofsubs[i]+" "+suffix.nofusubs[i]+" "+suffix.nofusubs_acc[i]
+" "+generalized.substring(suffix.sa[i]));
}
//binary search
int l =8;
int m=0, lo = 0, hi = generalized.length();
while(lo <= hi){
m = lo + (hi - lo)/2;
if (suffix.nofusubs_acc[m] > l){
hi = m - 1;
} else if (suffix.nofusubs_acc[m] < l){
lo = m + 1;
} else {
break;
}
}
System.out.println(lo);
if(lo -1 > 0){
System.out.println(generalized.substring(suffix.sa[lo],
suffix.sa[lo]+suffix.lcp[lo]+l-suffix.nofusubs_acc[lo-1]));
} else {
System.out.println(generalized.substring(suffix.sa[lo],
suffix.sa[lo]+suffix.lcp[lo]+l));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment