Skip to content

Instantly share code, notes, and snippets.

@SOF3
Last active May 25, 2019 08:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save SOF3/9b5528642c4db74c81fc97378424d6cc to your computer and use it in GitHub Desktop.
Save SOF3/9b5528642c4db74c81fc97378424d6cc to your computer and use it in GitHub Desktop.
Java code to search haystack with a large amount of substrings efficiently
import java.util.HashMap;
import java.util.Map;
class NeedleTree{
private int codePoint;
private boolean selfSet = false;
private Map<Integer, NeedleTree> children = new HashMap<>();
public NeedleTree(int codePoint){
this.codePoint = codePoint;
}
public NeedleTree childOrNull(int codePoint){
return children.get(codePoint);
}
public NeedleTree child(int codePoint){
NeedleTree child = children.get(codePoint);
if(child == null){
children.put(codePoint, child = new NeedleTree(codePoint));
}
return child;
}
public boolean isSelfSet(){
return selfSet;
}
public void markSelfSet(){
selfSet = true;
}
}
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
class StringSearcher{
private NeedleTree needles = new NeedleTree(-1);
private boolean caseSensitive;
private List<Integer> lengths = new ArrayList<>();
private int maxLength;
public StringSearcher(List<String> inputs, boolean caseSensitive){
this.caseSensitive = caseSensitive;
for(String input : inputs){
if(!lengths.contains(input.length())){
lengths.add(input.length());
}
NeedleTree tree = needles;
for(int i = 0; i < input.length(); i++){
tree = tree.child(caseSensitive ? input.codePointAt(i) : Character.toLowerCase(input.codePointAt(i)));
}
tree.markSelfSet();
}
maxLength = Collections.max(lengths);
}
public boolean matches(String haystack){
if(!caseSensitive){
haystack = haystack.toLowerCase();
}
for(int i = 0; i < haystack.length(); i++){
String substring = haystack.substring(i, Math.min(i + maxLength, haystack.length())); // maybe we can even skip this and use from haystack directly?
NeedleTree tree = needles;
for(int j = 0; j < substring.length(); j++){
tree = tree.childOrNull(substring.codePointAt(j));
if(tree == null){
break;
}
if(tree.isSelfSet()){
return true;
}
}
}
return false;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment