Created
December 1, 2019 04:58
-
-
Save PavelZaytsev/1683f86250c8e996fc008ca3453e5503 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package strings; | |
import java.util.Arrays; | |
public class KMP { | |
static int [] buildKMPTable(String pattern){ | |
if(pattern.isEmpty()){ | |
return new int [0]; | |
} | |
else if (pattern.length() < 2){ | |
return new int [] {0}; | |
} | |
else{ | |
int [] table = new int [pattern.length()]; | |
int i = 0; | |
int j = 1; | |
table[i] = 0; | |
while(j < table.length){ | |
if(pattern.charAt(i) != pattern.charAt(j)){ | |
// If don't match, | |
// if i is at the beginning and nowhere to go, just set table[j] to 0, | |
// and advance j. | |
if(i == 0){ | |
table[j] = 0; | |
j += 1; | |
} | |
// if i is not at the beginning, drop i back to the index of the recent match. | |
else{ | |
i = table[i - 1]; | |
} | |
} | |
else{ | |
// If match, table[j] = i + 1, and advance both. | |
table[j] = i + 1; | |
i += 1; | |
j += 1; | |
} | |
} | |
return table; | |
} | |
} | |
static boolean kmp(String s, String pattern){ | |
if(pattern.isEmpty() || s.isEmpty()){ | |
return false; | |
} | |
else{ | |
int [] table = buildKMPTable(pattern); | |
int stringIndex = 0; | |
int patternIndex = 0; | |
while(stringIndex < s.length()){ | |
if(s.charAt(stringIndex) == pattern.charAt(patternIndex)){ | |
// We've got a match, increment both. | |
stringIndex += 1; | |
patternIndex += 1; | |
if(patternIndex == pattern.length()){ | |
return true; | |
} | |
} | |
else{ | |
// Pattern index > 0, use kmp table, to find the next best position. | |
if(patternIndex > 0){ | |
patternIndex = table[patternIndex - 1]; | |
} | |
// Can't match char at this string index, move forward. | |
else{ | |
stringIndex += 1; | |
} | |
} | |
} | |
return false; | |
} | |
} | |
public static void main(String[] args) { | |
System.out.println(Arrays.toString(buildKMPTable("abc"))); | |
System.out.println(Arrays.toString(buildKMPTable("aa"))); | |
System.out.println(Arrays.toString(buildKMPTable("dsgwadsgz"))); | |
System.out.println(Arrays.toString(buildKMPTable("dsgwadsgz"))); | |
System.out.println(kmp("adsgwadsxdsgwadsgz", "dsgwadsgz")); | |
System.out.println(kmp("adsgwadsxdsgwadsgz", "dsgwadsgm")); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package strings; | |
import java.util.Optional; | |
public class RabinKarp { | |
static long primeBase = 31L; | |
static long primeMod = 100007L; | |
static class Range{ | |
public int startIndex; | |
public int endIndex; | |
Range(int startIndex, int endIndex){ | |
this.startIndex = startIndex; | |
this.endIndex = endIndex; | |
} | |
} | |
static long initHash(String string){ | |
long hash = 0; | |
for(int i = 0; i < string.length(); i++){ | |
hash *= primeBase; | |
hash += string.charAt(i); | |
hash = hash % primeMod; | |
} | |
return hash; | |
} | |
static long updateHash(long oldHash, char newChar, char dropChar, long power){ | |
// Add a new char: | |
oldHash *= primeBase; | |
oldHash += newChar; | |
oldHash = oldHash % primeMod; | |
// Subtract a hash of a dropping char and mod it. | |
oldHash -= dropChar * power % primeMod; | |
// Add prime mod if negative. | |
if(oldHash < 0){ | |
oldHash += primeMod; | |
} | |
return oldHash; | |
} | |
static boolean compareWords(String haystack, String needle, int startInc, int endInc){ | |
for(int i = startInc; i <= endInc; i++){ | |
if(haystack.charAt(i) != needle.charAt(i - startInc)){ | |
return false; | |
} | |
} | |
return true; | |
} | |
static boolean compareHashesAndWords(long currentHash, long needleHash, String haystack, String needle, int startInc, int endInc){ | |
return currentHash == needleHash && compareWords(haystack, needle, startInc, endInc); | |
} | |
/** | |
* This function finds the beginning and the end indices (inclusive) of a needle in the haystack. | |
* | |
* @return Possible range if its present. | |
*/ | |
static Optional<Range> findRange(String needle, String haystack){ | |
// Initialize a target hash. | |
long needleHash = initHash(needle); | |
int needleSize = needle.length(); | |
long currentHash = 0; | |
long power = 1; | |
for(int i = 0; i < needleSize; i++){ | |
power = (power * primeBase) % primeMod; | |
} | |
StringBuilder window = new StringBuilder(); | |
for(int i = 0; i < haystack.length(); i++){ | |
if(i >= needleSize){ | |
char dropChar = haystack.charAt(i - needleSize); | |
char newChar = haystack.charAt(i); | |
currentHash = updateHash(currentHash, newChar, dropChar, power); | |
if(compareHashesAndWords(currentHash, needleHash, haystack, needle, i - needleSize + 1, i)){ | |
return Optional.of(new Range(i - needleSize + 1, i)); | |
} | |
} | |
else{ | |
window.append(haystack.charAt(i)); | |
currentHash = initHash(window.toString()); | |
if(compareHashesAndWords(currentHash, needleHash, haystack, needle, 0, i)){ | |
return Optional.of(new Range(0, i)); | |
} | |
} | |
} | |
return Optional.empty(); | |
} | |
public static void main(String[] args) { | |
String haystack = "You spin me round baby, right round, like a record baby ---23right round."; | |
findRange("---23right", haystack).ifPresent(r -> System.out.println(haystack.substring(r.startIndex, r.endIndex + 1))); | |
findRange("You", haystack).ifPresent(r -> System.out.println(haystack.substring(r.startIndex, r.endIndex + 1))); | |
findRange("lol", haystack).ifPresent(r -> System.out.println(haystack.substring(r.startIndex, r.endIndex + 1))); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment