Last active
June 26, 2018 15:42
-
-
Save hanshsieh/8336bf77474ba4055d3e304a54dfc3f4 to your computer and use it in GitHub Desktop.
String searching
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Implementation of Knuth-Morris-Pratt (KMP) Algorithm. | |
* Time complexity: | |
* (initialization) O(N) | |
* (query) O(N + M) | |
* | |
* Sample: LeetCode 686 | |
*/ | |
public class KMPSearch { | |
private final String target; | |
private final int[] kmpNext; | |
private int currentTargetPos; | |
/** | |
* Construct a new instance for searching the given target string from some documents. | |
* | |
* @param target The target string. | |
*/ | |
public KMPSearch(String target) { | |
this.target = target; | |
kmpNext = new int[target.length() + 1]; | |
kmpNext[0] = -1; | |
for (int currLen = 0, prefixLen = -1; currLen < target.length();) { | |
while (prefixLen > -1 && target.charAt(prefixLen) != target.charAt(currLen)) { | |
prefixLen = kmpNext[prefixLen]; | |
} | |
++prefixLen; | |
++currLen; | |
if (currLen >= target.length() || target.charAt(currLen) != target.charAt(prefixLen)) { | |
kmpNext[currLen] = prefixLen; | |
} else { | |
kmpNext[currLen] = kmpNext[prefixLen]; | |
} | |
} | |
reset(); | |
} | |
/** | |
* Feed the next character of the document to be searched. | |
* If a match is found at the tail of the current character stream, | |
* true is returned. | |
* | |
* @param ch Next character of the document. | |
* @return True if a match is found. | |
*/ | |
public boolean feedChar(char ch) { | |
while (currentTargetPos > -1 && target.charAt(currentTargetPos) != ch) { | |
currentTargetPos = kmpNext[currentTargetPos]; | |
} | |
++currentTargetPos; | |
if (currentTargetPos >= target.length()) { | |
currentTargetPos = kmpNext[currentTargetPos]; | |
return true; | |
} else { | |
return false; | |
} | |
} | |
public void reset() { | |
currentTargetPos = 0; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Implementation of Rabin-Karp (Rolling Hash) Algorithm. | |
* Time complexity: | |
* (initialization) O(N) | |
* (query) O(N + M) | |
*/ | |
public class RollingHashSearch { | |
private final String target; | |
private final long targetHash; | |
private final long factor; | |
private final long factorInverse; | |
private final long mod; | |
private final char[] window; | |
private int windowStartIdx; | |
private int windowLen; | |
private long windowHash; | |
private long windowPower; | |
/** | |
* Constructs a new instance for searching the given target string from some documents. | |
* | |
* @param target The target string. | |
*/ | |
public RollingHashSearch(String target) { | |
this(target, 113, 1_000_000_007); | |
} | |
/** | |
* Construct a new instance for searching the given target string from some documents. | |
* The given {@code factor} and {@code mod} must be relatively prime. | |
* {@code mod} should be big enough so that the possibility of collision is low. | |
* See the following link for the illustration of the algorithm. | |
* | |
* @param target The target string. | |
* @param factor The factor used for generating hash. | |
* @param mod The modular number used for generating hash. | |
*/ | |
public RollingHashSearch(String target, int factor, int mod) { | |
this.target = target; | |
this.factor = factor; | |
this.mod = mod; | |
long hash = 0; | |
long power = 1; | |
for (int i = 0; i < target.length(); ++i) { | |
long val = (power * target.charAt(i)) % mod; | |
hash = (hash + val) % mod; | |
power = (power * factor) % mod; | |
} | |
targetHash = hash; | |
factorInverse = BigInteger.valueOf(factor).modInverse(BigInteger.valueOf(mod)).intValue(); | |
window = new char[target.length()]; | |
reset(); | |
} | |
/** | |
* Feed the next character of the document to be searched. | |
* If a match is found at the tail of the current character stream, | |
* true is returned. | |
* | |
* @param ch Next character of the document. | |
* @return True if a match is found. | |
*/ | |
public boolean feedChar(char ch) { | |
if (windowLen >= window.length) { | |
windowHash = (windowHash - window[windowStartIdx]) % mod; | |
windowHash = (windowHash * factorInverse) % mod; | |
windowHash = (windowHash + mod) % mod; | |
windowStartIdx = (windowStartIdx + 1) % window.length; | |
} else { | |
++windowLen; | |
} | |
window[(windowStartIdx + windowLen - 1) % window.length] = ch; | |
long val = (windowPower * ch) % mod; | |
windowHash = (windowHash + val) % mod; | |
if (windowLen >= window.length) { | |
if (windowHash == targetHash) { | |
return compareWindowWithTarget(); | |
} | |
} else { | |
windowPower = (windowPower * factor) % mod; | |
} | |
return false; | |
} | |
private boolean compareWindowWithTarget() { | |
for (int idx = 0; idx < target.length(); ++idx) { | |
char ch1 = window[(windowStartIdx + idx) % window.length]; | |
char ch2 = target.charAt(idx); | |
if (ch1 != ch2) { | |
return false; | |
} | |
} | |
return true; | |
} | |
/** | |
* Resets and allows searching another document. | |
*/ | |
public void reset() { | |
windowLen = 0; | |
windowHash = 0; | |
windowPower = 1; | |
windowStartIdx = 0; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment