Skip to content

Instantly share code, notes, and snippets.

@hanshsieh
Last active June 26, 2018 15:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hanshsieh/8336bf77474ba4055d3e304a54dfc3f4 to your computer and use it in GitHub Desktop.
Save hanshsieh/8336bf77474ba4055d3e304a54dfc3f4 to your computer and use it in GitHub Desktop.
String searching
/**
* Implementation of Knuth-Morris-Pratt (KMP) Algorithm.
* Time complexity:
* (initialization) O(N)
* (query) O(N + M)
*
* Sample: LeetCode 686
*/
public class KMPSearch {
private final String target;
private final int[] kmpNext;
private int currentTargetPos;
/**
* Construct a new instance for searching the given target string from some documents.
*
* @param target The target string.
*/
public KMPSearch(String target) {
this.target = target;
kmpNext = new int[target.length() + 1];
kmpNext[0] = -1;
for (int currLen = 0, prefixLen = -1; currLen < target.length();) {
while (prefixLen > -1 && target.charAt(prefixLen) != target.charAt(currLen)) {
prefixLen = kmpNext[prefixLen];
}
++prefixLen;
++currLen;
if (currLen >= target.length() || target.charAt(currLen) != target.charAt(prefixLen)) {
kmpNext[currLen] = prefixLen;
} else {
kmpNext[currLen] = kmpNext[prefixLen];
}
}
reset();
}
/**
* Feed the next character of the document to be searched.
* If a match is found at the tail of the current character stream,
* true is returned.
*
* @param ch Next character of the document.
* @return True if a match is found.
*/
public boolean feedChar(char ch) {
while (currentTargetPos > -1 && target.charAt(currentTargetPos) != ch) {
currentTargetPos = kmpNext[currentTargetPos];
}
++currentTargetPos;
if (currentTargetPos >= target.length()) {
currentTargetPos = kmpNext[currentTargetPos];
return true;
} else {
return false;
}
}
public void reset() {
currentTargetPos = 0;
}
}
/**
* Implementation of Rabin-Karp (Rolling Hash) Algorithm.
* Time complexity:
* (initialization) O(N)
* (query) O(N + M)
*/
public class RollingHashSearch {
private final String target;
private final long targetHash;
private final long factor;
private final long factorInverse;
private final long mod;
private final char[] window;
private int windowStartIdx;
private int windowLen;
private long windowHash;
private long windowPower;
/**
* Constructs a new instance for searching the given target string from some documents.
*
* @param target The target string.
*/
public RollingHashSearch(String target) {
this(target, 113, 1_000_000_007);
}
/**
* Construct a new instance for searching the given target string from some documents.
* The given {@code factor} and {@code mod} must be relatively prime.
* {@code mod} should be big enough so that the possibility of collision is low.
* See the following link for the illustration of the algorithm.
*
* @param target The target string.
* @param factor The factor used for generating hash.
* @param mod The modular number used for generating hash.
*/
public RollingHashSearch(String target, int factor, int mod) {
this.target = target;
this.factor = factor;
this.mod = mod;
long hash = 0;
long power = 1;
for (int i = 0; i < target.length(); ++i) {
long val = (power * target.charAt(i)) % mod;
hash = (hash + val) % mod;
power = (power * factor) % mod;
}
targetHash = hash;
factorInverse = BigInteger.valueOf(factor).modInverse(BigInteger.valueOf(mod)).intValue();
window = new char[target.length()];
reset();
}
/**
* Feed the next character of the document to be searched.
* If a match is found at the tail of the current character stream,
* true is returned.
*
* @param ch Next character of the document.
* @return True if a match is found.
*/
public boolean feedChar(char ch) {
if (windowLen >= window.length) {
windowHash = (windowHash - window[windowStartIdx]) % mod;
windowHash = (windowHash * factorInverse) % mod;
windowHash = (windowHash + mod) % mod;
windowStartIdx = (windowStartIdx + 1) % window.length;
} else {
++windowLen;
}
window[(windowStartIdx + windowLen - 1) % window.length] = ch;
long val = (windowPower * ch) % mod;
windowHash = (windowHash + val) % mod;
if (windowLen >= window.length) {
if (windowHash == targetHash) {
return compareWindowWithTarget();
}
} else {
windowPower = (windowPower * factor) % mod;
}
return false;
}
private boolean compareWindowWithTarget() {
for (int idx = 0; idx < target.length(); ++idx) {
char ch1 = window[(windowStartIdx + idx) % window.length];
char ch2 = target.charAt(idx);
if (ch1 != ch2) {
return false;
}
}
return true;
}
/**
* Resets and allows searching another document.
*/
public void reset() {
windowLen = 0;
windowHash = 0;
windowPower = 1;
windowStartIdx = 0;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment