Skip to content

Instantly share code, notes, and snippets.

@tsleyson
Created November 13, 2014 00:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tsleyson/f320666db6ba140e8585 to your computer and use it in GitHub Desktop.
Save tsleyson/f320666db6ba140e8585 to your computer and use it in GitHub Desktop.
/* KarpRabin.java
A Java implementation of the Karp-Rabin seminumerical string
matching algorithm.
See CLRS Section 32.2.
*/
import java.util.Map;
import java.util.HashMap;
import java.math.BigInteger;
public class KarpRabin {
final long q, b;
public static final int NONE = -1;
Map<Character, Integer> digitMap;
public KarpRabin(String alphabet) {
digitMap = new HashMap<>();
for (int i = 0; i < alphabet.length(); ++i) {
digitMap.put(alphabet.charAt(i), i);
}
q = 26900927; // A prime such that b*q can reasonably fit in
// a word for most values of b.
b = alphabet.length();
if (b >= q) {
throw new IllegalArgumentException("b cannot be larger than " + q);
}
}
public int match(String pattern, String text) {
// Returns location of first match of pattern in text.
if (pattern.length() > text.length()) {
throw new IllegalArgumentException("Pattern longer than text.");
}
int[] T = digitValue(text);
int[] P = digitValue(pattern);
long h, p, t;
p = t = 0;
h = expt(b, pattern.length()-1).mod(BigInteger.valueOf(q)).longValue(); // See note [7].
// Calculate fingerprint of pattern and of first
// pattern.length-length group in text.
for (int i = 0; i < pattern.length(); ++i) {
p = (b*p + P[i]) % q;
t = (b*t + T[i]) % q;
}
for (int s = 0; s < text.length() - pattern.length(); ++s) {
if (p == t) {
if (pattern.equals(text.substring(s, s+pattern.length()))) {
return s;
}
}
assert s < T.length && s + pattern.length() < T.length :
"s is " + s + " and s+pattern.length() is " +
(s + pattern.length());
t = (b * (t - T[s]*h) + T[s + pattern.length()]) % q;
// Handle Java's modulus behavior; see note[2].
t += q;
t %= q;
assert t >= 0 : "t is below 0, has value " + t;
}
// See note [4].
if (p == t) {
if (pattern.equals(text.substring(text.length() - pattern.length(), text.length())))
return text.length() - pattern.length();
}
return NONE;
}
int[] digitValue(String text) {
// Converts input string into individual digits in base-alphabet.length.
int[] digits = new int[text.length()];
for (int i = 0; i < text.length(); ++i) {
Integer retrieved = digitMap.get(text.charAt(i));
if (retrieved == null) {
throw new IllegalArgumentException("Invalid character " +
text.charAt(i) + " in text.");
}
digits[i] = retrieved;
}
assert text.length() == digits.length :
"Length of digit value != length of original text.";
return digits;
}
BigInteger expt(long base, long power) {
BigInteger val = BigInteger.ONE;
BigInteger b = BigInteger.valueOf(base);
for(; power > 0; --power) {
val = val.multiply(b);
}
return val;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment