Skip to content

Instantly share code, notes, and snippets.

@jkff
Created May 28, 2011 04:11
Show Gist options
  • Save jkff/996593 to your computer and use it in GitHub Desktop.
Save jkff/996593 to your computer and use it in GitHub Desktop.
String binary search
import java.util.*;
public class StringBinarySearch {
private static class ListDictionary implements Dictionary {
private List<String> ss;
public ListDictionary(List<String> ss) { this.ss = ss; }
public String getWordAt(int i) throws IndexOutOfBoundsException {
return i < 0 || i >= ss.size() ? null : ss.get(i);
}
}
public static void main(String[] args) {
// Random test: a g c t, 1-8 letters, 0-2048-sized array (exponential scale)
Random r = new Random();
for(int experiment = 0; experiment < 1000; ++experiment) {
int scale = 1 << r.nextInt(10);
int n = (int) (scale * (1 + r.nextDouble()));
List<String> haystack = new ArrayList<String>();
for(int i = 0; i < n; ++i) haystack.add(generate(r));
Collections.sort(haystack);
for(String needle : haystack) check(needle, haystack);
for(int needleNo = 0; needleNo < 1000; ++needleNo) check(generate(r), haystack);
}
}
private static void check(String needle, List<String> haystack) {
boolean expected = haystack.contains(needle);
boolean actual = find(needle, new ListDictionary(haystack));
if(expected && !actual) {
System.out.println("Element present but not found: " + needle + " in " + haystack);
} else if(actual && !expected) {
System.out.println("Element found but not present: " + needle + " in " + haystack);
}
}
private static String generate(Random r) {
int len = 1 + r.nextInt(7);
StringBuilder sb = new StringBuilder();
for(int j = 0; j < len; ++j) sb.append("agct".charAt(r.nextInt(4)));
return sb.toString();
}
public static boolean find(String needle, Dictionary haystack) {
int low = 0, high = 1;
int matchLen = 0;
while(haystack.getWordAt(high) != null) high *= 2;
high--;
while (low <= high) {
int mid = (low + high) >>> 1;
String midVal = haystack.getWordAt(mid);
if(midVal == null) {
high = mid - 1;
continue;
}
String lowWord = haystack.getWordAt(low);
String highWord = haystack.getWordAt(high);
if(highWord != null) {
while(matchLen < lowWord.length() &&
matchLen < highWord.length() &&
lowWord.charAt(matchLen) == highWord.charAt(matchLen))
{
if(matchLen >= needle.length()) return false;
if(lowWord.charAt(matchLen) != needle.charAt(matchLen)) return false;
matchLen++;
}
}
int cmp = compare(needle, midVal, matchLen);
if (cmp > 0)
low = mid + 1;
else if (cmp < 0)
high = mid - 1;
else
return true;
}
return false;
}
private static int compare(String a, String b, int matchLen) {
assert a.substring(0, matchLen).equals(b.substring(0, matchLen)) :
"Prefixes of length " + matchLen + " of " + a + " and " + b + " don't actually match!";
for(int i = matchLen; i < a.length() && i < b.length(); ++i) {
if(a.charAt(i) < b.charAt(i))
return -1;
if(a.charAt(i) > b.charAt(i))
return 1;
}
if(a.length() < b.length())
return -1;
if(a.length() > b.length())
return 1;
return 0;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment