Skip to content

Instantly share code, notes, and snippets.

@steghio
Last active July 20, 2021 20:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save steghio/64005dfa17cf7087f82f4a3591d79cd0 to your computer and use it in GitHub Desktop.
Save steghio/64005dfa17cf7087f82f4a3591d79cd0 to your computer and use it in GitHub Desktop.
Trie data structure for text input prediction
Trie data structure for text input prediction
package com.blogspot.groglogs.test.trie;
import com.blogspot.groglogs.trie.TrieUtils;
import org.junit.Test;
import java.util.HashSet;
import java.util.Set;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class FindWordsInPhoneNumberAsDigitsJTests {
Set<String> words, result, expected;
String phoneNumber;
@Test
public void nullOrempty() {
phoneNumber = null;
words = null;
assertTrue("null input expected empty output", TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words).isEmpty());
phoneNumber = "";
words = null;
assertTrue("null input expected empty output", TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words).isEmpty());
phoneNumber = null;
words = new HashSet<>();
assertTrue("null input expected empty output", TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words).isEmpty());
phoneNumber = "234";
words = new HashSet<>();
assertTrue("empty input expected empty output", TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words).isEmpty());
}
@Test(expected = IllegalArgumentException.class)
public void testPhoneNumberWithNonDigitThrowsException() {
phoneNumber = "c";
words = new HashSet<>();
words.add("t");
TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words);
}
@Test(expected = IllegalArgumentException.class)
public void testWordWithDigitThrowsException() {
phoneNumber = "123";
words = new HashSet<>();
words.add("t1");
TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words);
}
@Test
public void digitsThatAreNotCharacters() {
words = new HashSet<>();
words.add("foo");
words.add("bar");
words.add("baz");
words.add("foobar");
words.add("emo");
words.add("cap");
words.add("car");
words.add("cat");
phoneNumber = "11111";
result = TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words);
assertTrue("all 1s expected empty output", result.isEmpty());
phoneNumber = "0";
result = TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words);
assertTrue("all 0s expected empty output", result.isEmpty());
phoneNumber = "10";
result = TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words);
assertTrue("all digits that are not characters expected empty output", result.isEmpty());
}
@Test
public void digitsThatAreNotWords() {
words = new HashSet<>();
words.add("foo");
words.add("bar");
words.add("baz");
words.add("foobar");
words.add("emo");
words.add("cap");
words.add("car");
words.add("cat");
phoneNumber = "2222";
result = TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words);
assertTrue("digits that are not words expected empty output", result.isEmpty());
}
@Test
public void singleDigitThatMatches() {
words = new HashSet<>();
String match = "a";
words.add(match);
phoneNumber = "2";
result = TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words);
assertEquals("single digit that matches, expected one match", 1, result.size());
assertTrue("single digit that matches, expected that match", result.contains(match));
}
@Test
public void multipleDigitsThatHaveOnlyOneMatch() {
words = new HashSet<>();
String match = "ac";
words.add(match);
phoneNumber = "22";
result = TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words);
assertEquals("digits that only one match, expected one match", 1, result.size());
assertTrue("digits that only one match, expected that match", result.contains(match));
}
@Test
public void phoneNumberWithMultipleMatches() {
words = new HashSet<>();
words.add("foo");
words.add("bar");
words.add("baz");
words.add("foobar");
words.add("emo");
words.add("cap");
words.add("car");
words.add("cat");
phoneNumber = "3662277";
expected = new HashSet<>();
expected.add("emo");
expected.add("foo");
expected.add("foobar");
expected.add("bar");
expected.add("cap");
expected.add("car");
result = TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words);
assertEquals("phone number with multiple matches expected all matches", expected.size(), result.size());
assertTrue("phone number with multiple matches expected all matches", expected.containsAll(result));
}
@Test
public void phoneNumberWithNonCharacterDigitsMultipleMatches() {
words = new HashSet<>();
words.add("foo");
words.add("bar");
words.add("baz");
words.add("foobar");
phoneNumber = "366122703610";
expected = new HashSet<>();
expected.add("foo");
expected.add("bar");
result = TrieUtils.findMatchingWordsInPhoneNumberAsDigits(phoneNumber, words);
assertEquals("phone number with multiple matches and non character digits expected all matches", expected.size(), result.size());
assertTrue("phone number with multiple matches and non character digits expected all matches", expected.containsAll(result));
}
}
package com.blogspot.groglogs.test.trie;
import com.blogspot.groglogs.trie.Trie;
import com.blogspot.groglogs.trie.TrieUtils;
import org.junit.Test;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
public class FindWordsInPhoneNumberJTests {
Set<String> words, result, expected;
String phoneNumber;
@Test
public void nullOrempty() {
phoneNumber = null;
words = null;
assertTrue("null input expected empty output", TrieUtils.findMatchingWordsInPhoneNumber(phoneNumber, words).isEmpty());
phoneNumber = "";
words = null;
assertTrue("null input expected empty output", TrieUtils.findMatchingWordsInPhoneNumber(phoneNumber, words).isEmpty());
phoneNumber = null;
words = new HashSet<>();
assertTrue("null input expected empty output", TrieUtils.findMatchingWordsInPhoneNumber(phoneNumber, words).isEmpty());
phoneNumber = "234";
words = new HashSet<>();
assertTrue("empty input expected empty output", TrieUtils.findMatchingWordsInPhoneNumber(phoneNumber, words).isEmpty());
}
@Test(expected = IllegalArgumentException.class)
public void testPhoneNumberWithNonDigitThrowsException() {
phoneNumber = "c";
words = new HashSet<>();
words.add("t");
TrieUtils.findMatchingWordsInPhoneNumber(phoneNumber, words);
}
@Test
public void digitsThatAreNotCharacters() {
words = new HashSet<>();
words.add("foo");
words.add("bar");
words.add("baz");
words.add("foobar");
words.add("emo");
words.add("cap");
words.add("car");
words.add("cat");
phoneNumber = "11111";
result = TrieUtils.findMatchingWordsInPhoneNumber(phoneNumber, words);
assertTrue("all 1s expected empty output", result.isEmpty());
phoneNumber = "0";
result = TrieUtils.findMatchingWordsInPhoneNumber(phoneNumber, words);
assertTrue("all 0s expected empty output", result.isEmpty());
phoneNumber = "10";
result = TrieUtils.findMatchingWordsInPhoneNumber(phoneNumber, words);
assertTrue("all digits that are not characters expected empty output", result.isEmpty());
}
@Test
public void digitsThatAreNotWords() {
words = new HashSet<>();
words.add("foo");
words.add("bar");
words.add("baz");
words.add("foobar");
words.add("emo");
words.add("cap");
words.add("car");
words.add("cat");
phoneNumber = "2222";
result = TrieUtils.findMatchingWordsInPhoneNumber(phoneNumber, words);
assertTrue("digits that are not words expected empty output", result.isEmpty());
}
@Test
public void singleDigitThatMatches() {
words = new HashSet<>();
String match = "a";
words.add(match);
phoneNumber = "2";
result = TrieUtils.findMatchingWordsInPhoneNumber(phoneNumber, words);
assertEquals("single digit that matches, expected one match", 1, result.size());
assertTrue("single digit that matches, expected that match", result.contains(match));
}
@Test
public void multipleDigitsThatHaveOnlyOneMatch() {
words = new HashSet<>();
String match = "ac";
words.add(match);
phoneNumber = "22";
result = TrieUtils.findMatchingWordsInPhoneNumber(phoneNumber, words);
assertEquals("digits that only one match, expected one match", 1, result.size());
assertTrue("digits that only one match, expected that match", result.contains(match));
}
@Test
public void phoneNumberWithMultipleMatches() {
words = new HashSet<>();
words.add("foo");
words.add("bar");
words.add("baz");
words.add("foobar");
words.add("emo");
words.add("cap");
words.add("car");
words.add("cat");
phoneNumber = "3662277";
expected = new HashSet<>();
expected.add("emo");
expected.add("foo");
expected.add("foobar");
expected.add("bar");
expected.add("cap");
expected.add("car");
result = TrieUtils.findMatchingWordsInPhoneNumber(phoneNumber, words);
assertEquals("phone number with multiple matches expected all matches", expected.size(), result.size());
assertTrue("phone number with multiple matches expected all matches", expected.containsAll(result));
}
@Test
public void phoneNumberWithNonCharacterDigitsMultipleMatches() {
words = new HashSet<>();
words.add("foo");
words.add("bar");
words.add("baz");
words.add("foobar");
phoneNumber = "366122703610";
expected = new HashSet<>();
expected.add("foo");
expected.add("bar");
result = TrieUtils.findMatchingWordsInPhoneNumber(phoneNumber, words);
assertEquals("phone number with multiple matches and non character digits expected all matches", expected.size(), result.size());
assertTrue("phone number with multiple matches and non character digits expected all matches", expected.containsAll(result));
}
}
package com.blogspot.groglogs.trie;
import java.util.AbstractMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Queue;
import java.util.ArrayDeque;
import java.util.List;
import java.util.ArrayList;
import java.util.Set;
public class Trie {
private String word; //keep a full word in the node. Empty means this is a navigation node
private String path; //track path followed until this node, useful for debugging
private HashMap<Character, Trie> next; //for each character following the current one, add an entry
//Aho-Corasick
private Set<String> words; //maybe this node is a terminal for more words
private Trie failNode;
public Trie(){
this.word = null;
this.next = new HashMap<>();
this.words = new HashSet<>();
this.failNode = null;
this.path = "";
}
public Trie(String path, Character link){
this.word = null;
this.next = new HashMap<>();
this.words = new HashSet<>();
this.failNode = null;
this.path = path + link;
}
public Trie(String word){
this.word = word;
this.next = new HashMap<>();
this.words = new HashSet<>();
this.words.add(word);
this.failNode = null;
this.path = word;
}
public String getWord(){
return this.word;
}
public Set<String> getWords(){
return this.words;
}
public String getPath(){
return this.path;
}
public boolean hasWord(String word){
return this.words.contains(word);
}
public Trie getFailNode(){
return this.failNode;
}
public void setFailNode(Trie failNode){
this.failNode = failNode;
}
public boolean isRoot(){
return this.failNode == null;
}
public Map<Character, Trie> getNodeChildren(){
return this.next;
}
private void addWord(String word, String curr){
Character c = curr.charAt(0);
Trie t = this.next.get(c);
/*
if this is the last letter in the word
add the full word alongside this letter creating a new node if necessary
otherwise move to the next character creating a new node if necessary
*/
if(curr.length() == 1){
if(t == null) this.next.put(c, new Trie(word));//new node has a word
else {
t.word = word;
t.words.add(word);
}
}
else{
//create the node before continuing!
if(t == null){
t = new Trie(this.path, c);
this.next.put(c, t); //new node has no word
}
t.addWord(word, curr.substring(1)); //call add on the next character in the sequence
}
}
//to add a word we walk the tree and create nodes as necessary until we reach the end of the word
public void addWord(String word){
if(word == null || word.equals(""))return;
this.addWord(word, word);
}
public boolean containsWord(String word){
if(word == null || word.equals(""))return false;
Trie t = this.next.get(word.charAt(0));
/*
if we have no entry, the word is not there
else if we have an entry and we are at the end of the word, it is there
else keep searching for the next word portion
*/
if(t == null) return false;
else if(word.length() == 1) return true;
else return t.containsWord(word.substring(1));
}
public Trie findWord(String word){
if(word == null || word.equals(""))return null;
Trie t = this.next.get(word.charAt(0));
/*
if we have no entry, the word is not there, return Trie t (null)
else if we have an entry and we are at the end of the word, return Trie t (input match)
else keep searching for the next word portion
*/
if(t == null) return t;
else if(word.length() == 1) return t;
else return t.findWord(word.substring(1));
}
//NOTE: we are not keeping next as ordered list so the suggestions we get might not be the same everytime
//we will get the first suggestion we find, possibly the longest one
public String getSuggestion(String partial_word){
if(partial_word == null || partial_word.equals(""))return "";
//find the lowest subtree for this partial word, if any
Trie t = findWord(partial_word);
//if there is no subtree or it has no continuation, there is no suggestion
if(t == null || t.next.keySet().isEmpty()) return "";
//otherwise go DFS and pick a path and follow until you reach a complete word and return it as suggestion
Character c;
while(t.next != null && ! t.next.keySet().isEmpty()) {
c = (Character) t.next.keySet().toArray()[0];
t = t.next.get(c);
if(t.word != null) return t.word;
}
return "";
}
//we will get the first suggestion we find, guaranteed to be the shortest one
public String getShortestSuggestion(String partial_word){
if(partial_word == null || partial_word.equals(""))return "";
//find the lowest subtree for this partial word, if any
Trie t = findWord(partial_word);
//if there is no subtree or it has no continuation, there is no suggestion
if(t == null || t.next.keySet().isEmpty()) return "";
//otherwise start a BFS and search for the first node with a full word. That is the first shortest word we can suggest
Queue<Trie> q = new ArrayDeque<>();
q.addAll(t.next.values());
while (!q.isEmpty()) {
t = q.remove();
if(t.word != null) return t.word;
if(!t.next.isEmpty()) q.addAll(t.next.values());
}
return "";
}
public List<String> getSuggestions(String partial_word){
if(partial_word == null || partial_word.equals(""))return null;
//find the lowest subtree for this partial word, if any
Trie t = findWord(partial_word);
//if there is no subtree or it has no continuation, there is no suggestion
if(t == null || t.next.keySet().isEmpty()) return null;
//otherwise search for all suggestions and store them in the List before returning it
List<String> suggestions = new ArrayList<>();
Queue<Trie> q = new ArrayDeque<>();
q.addAll(t.next.values());
while (!q.isEmpty()) {
t = q.remove();
if(t.word != null) suggestions.add(t.word);
if(!t.next.isEmpty()) q.addAll(t.next.values());
}
return suggestions;
}
public List<String> getSuggestions(String partial_word, int maxLength) {
if (partial_word == null || partial_word.equals("")) return new ArrayList<>();
//find the lowest subtree for this partial word, if any
Trie t = findWord(partial_word);
//if there is no subtree or it has no continuation, there is no suggestion
if (t == null || t.next.keySet().isEmpty()) return new ArrayList<>();
//otherwise search for all suggestions and store them in the List before returning it
List<String> suggestions = new ArrayList<>();
Queue<Trie> q = new ArrayDeque<>();
q.addAll(t.next.values());
while (!q.isEmpty()) {
t = q.remove();
if (t.word != null && t.word.length() <= maxLength) suggestions.add(t.word);
if (!t.next.isEmpty()) {
q.addAll(t.next.values());
}
}
return suggestions;
}
public void printTrie(){
if(this.word != null) System.out.println("Got word: " + this.word);
if(!this.next.isEmpty()){
for(Character c : this.next.keySet()){
this.next.get(c).printTrie();
}
}
}
public void printAhoCorasickTrie(){
Queue<Map.Entry<String, Trie>> queue = new ArrayDeque<>();
queue.add(new AbstractMap.SimpleEntry<String, Trie>("", this));
System.out.println("===AHO-CORASICK_PRINT_START");
while(!queue.isEmpty()){
Map.Entry<String, Trie> curr = queue.poll();
System.out.println("Node link was: " + curr.getKey());
System.out.println("Node is root: " + curr.getValue().isRoot());
System.out.println("Node is word: " + curr.getValue().getWord());
System.out.println("Node is path: " + curr.getValue().getPath());
if(!curr.getValue().isRoot()){
System.out.println("Node fail node is: (" + curr.getValue().getFailNode().getPath() + ")");
}
for(String word : curr.getValue().getWords()){
System.out.println("Node has word: " + word);
}
for(Map.Entry<Character, Trie> e : curr.getValue().getNodeChildren().entrySet()){
System.out.println("Node has children: " + e.getKey());
queue.add(new AbstractMap.SimpleEntry<String, Trie>(curr.getKey()+e.getKey(), e.getValue()));
}
System.out.println("===");
}
System.out.println("===AHO-CORASICK_PRINT_END");
}
public void addWords(Set<String> words){
for(String word : words){
this.addWord(word);
}
}
//Aho-Corasick
//for a given set of words, we calculate the fail links in the trie we just built
public void buildFailLinks(Set<String> words){
this.addWords(words);
this.buildFailLinks();
}
public void buildFailLinks(){
Queue<Trie> nodes = new ArrayDeque<>();
//root fails to null
//all children of root fail to root
for(Trie t : this.next.values()){
t.setFailNode(this);
//add all children to queue
nodes.add(t);
}
//process all nodes to calculate fail links
while(!nodes.isEmpty()){
Trie curr = nodes.poll();
Trie failNode = curr.getFailNode();
//for each child of this node look for the closest ancestor starting at failNode
//that has a child with the same character as this child's link
for(Map.Entry<Character, Trie> entry : curr.next.entrySet()){
Trie ancestor = findFailNode(failNode, entry.getKey(), entry.getValue());
entry.getValue().setFailNode(ancestor);
//add children of this node to the queue
nodes.add(entry.getValue());
}
}
}
//helper for buildFailLinks, searches for a valid fail node for the current node character
//starting with the given failNode. If no fail node is found, it keeps bubbling up until a valid node is found
//or the root is reached. In that case, the root is set as fail node, but NO words are added to the current node
private Trie findFailNode(Trie failNode, Character currentLink, Trie current){
//does this failNode have a child following the given character link?
Trie ancestor = failNode.next.get(currentLink);
//if yes, set it as fail node and add its words to the current node
if(ancestor != null){
//when a fail node is set, we add, if any, its output to the current node
current.getWords().addAll(ancestor.getWords());
return ancestor;
}
//if not, bubble up to the fail node of this failNode and search from there
//if root is reached and no link was found, set root as fail and do NOT add words to this node
if(failNode.isRoot()){
return failNode;
}
return findFailNode(failNode.getFailNode(), currentLink, current);
}
//find all matching strings within the given string
public Map<String, List<Integer>> findMatchesInString(String string){
Map<String, List<Integer>> results = new HashMap<>();
if(string == null || string.isEmpty()) return results;
Trie curr = this;
for(int i = 0; i < string.length(); i++){
Character c = string.charAt(i);
//from the current node, try to find a match for the given character
Trie node = curr.getNodeChildren().get(c);
//if there is no match, follow fail links until you either find a match OR cannot fail back anymore (you are trying to fail back from root node)
boolean doSkip = false;
while(node == null) {
//if we are trying to fail back from the root, it means that character does not exist in out trie, skip to the next character
//and continue from the root
if(curr.getFailNode() == null){
doSkip = true;
break;
}
//else fail to our failNode and check from there
curr = curr.getFailNode();
node = curr.getNodeChildren().get(c);
}
if(!doSkip){
//we now matched that node word from a specific start position in the search string, calculate it and add it to results
for(String word : node.getWords()) {
List<Integer> positions = results.get(word);
if (positions == null) {
positions = new ArrayList<>();
}
positions.add(i - word.length() + 1);
results.put(word, positions);
}
//then continue from there
curr = node;
}
}
return results;
}
}
package com.blogspot.groglogs.test.trie;
import org.junit.Test;
import com.blogspot.groglogs.trie.Trie;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
public class TrieJTests {
Trie t, t_out;
String partial_word, output, word, other_word, suggested;
int count;
Set<String> words;
Map<String, List<Integer>> matches, results;
@Test
//empty input, expected empty string
public void empty() {
t = new Trie();
partial_word = "Something";
output = "";
assertEquals("empty Trie has no suggestion", output, t.getSuggestion(partial_word));
assertNull("empty Trie has no fail node", t.getFailNode());
assertEquals("root has no children", 0, t.getNodeChildren().size());
}
@Test
//Trie contains word
public void containsWord() {
//present
t = new Trie();
word = "Else";
t.addWord(word);
assertEquals("contains word which is in Trie gives true", true, t.containsWord(word));
//add same word multiple times
t = new Trie();
word = "Else";
t.addWord(word);
t.addWord(word);
t.addWord(word);
assertEquals("contains word which is in Trie gives true", true, t.containsWord(word));
//add multiple words, some with same prefix
t = new Trie();
word = "Else";
t.addWord(word);
word = "Something";
t.addWord(word);
word = "Elses";
t.addWord(word);
assertEquals("contains word which is in Trie gives true", true, t.containsWord(word));
//not present
t = new Trie();
word = "Else";
other_word = "Something";
t_out = null;
t.addWord(word);
assertEquals("contains word which is not in Trie gives false", false, t.containsWord(other_word));
//add same word multiple times
t = new Trie();
word = "Else";
other_word = "Something";
t_out = null;
t.addWord(word);
t.addWord(word);
t.addWord(word);
assertEquals("contains word which is not in Trie gives false", false, t.containsWord(other_word));
//add multiple words, some with same prefix
t = new Trie();
word = "Else";
other_word = "Something";
t_out = null;
t.addWord(word);
word = "Elses";
t.addWord(word);
word = "Some";
t.addWord(word);
assertEquals("contains word which is not in Trie gives false", false, t.containsWord(other_word));
//add multiple words, some with same prefix, mix case as well, check that we get a suggestion out
t = new Trie();
word = "Else";
t.addWord(word);
word = "Elses";
t.addWord(word);
word = "Elsewhere";
t.addWord(word);
word = "Something";
t.addWord(word);
word = "else";
t.addWord(word);
word = "elses";
t.addWord(word);
word = "elsewhere";
t.addWord(word);
word = "something";
t.addWord(word);
word = "ELSE";
t.addWord(word);
word = "ELSES";
t.addWord(word);
word = "ELSEWHERE";
t.addWord(word);
word = "SOMETHING";
t.addWord(word);
word = "eLSe";
t.addWord(word);
word = "eLsEs";
t.addWord(word);
word = "elseWHERE";
t.addWord(word);
word = "SOMEthing";
t.addWord(word);
other_word = "eLsEs";
assertEquals("contains word which is in Trie gives true", true, t.containsWord(other_word));
}
@Test
//find word in Trie
public void findWord() {
//present
t = new Trie();
word = "Else";
t.addWord(word);
assertEquals("find word which is in Trie gives the word", word, t.findWord(word).getWord());
//add same word multiple times
t = new Trie();
word = "Else";
t.addWord(word);
t.addWord(word);
t.addWord(word);
assertEquals("find word which is in Trie gives the word", word, t.findWord(word).getWord());
//not present
t = new Trie();
word = "Else";
other_word = "Something";
t_out = null;
t.addWord(word);
assertEquals("find word which is not in Trie gives null", t_out, t.findWord(other_word));
//add same word multiple times
t = new Trie();
word = "Else";
other_word = "Something";
t_out = null;
t.addWord(word);
t.addWord(word);
t.addWord(word);
assertEquals("find word which is not in Trie gives null", t_out, t.findWord(other_word));
//add multiple words, some with same prefix, mix case as well, check that we get a suggestion out
t = new Trie();
word = "Else";
t.addWord(word);
word = "Elses";
t.addWord(word);
word = "Elsewhere";
t.addWord(word);
word = "Something";
t.addWord(word);
word = "else";
t.addWord(word);
word = "elses";
t.addWord(word);
word = "elsewhere";
t.addWord(word);
word = "something";
t.addWord(word);
word = "ELSE";
t.addWord(word);
word = "ELSES";
t.addWord(word);
word = "ELSEWHERE";
t.addWord(word);
word = "SOMETHING";
t.addWord(word);
word = "eLSe";
t.addWord(word);
word = "eLsEs";
t.addWord(word);
word = "elseWHERE";
t.addWord(word);
word = "SOMEthing";
t.addWord(word);
other_word = "SOMETHING";
assertEquals("find word which is in Trie gives the word", other_word, t.findWord(other_word).getWord());
}
@Test
//word not in Trie
public void missingGetSuggestion() {
t = new Trie();
word = "Else";
partial_word = "Something";
output = "";
t.addWord(word);
assertEquals("missing word Trie has no suggestion", output, t.getSuggestion(partial_word));
//add same word multiple times
t = new Trie();
word = "Else";
partial_word = "Something";
output = "";
t.addWord(word);
t.addWord(word);
t.addWord(word);
assertEquals("missing word Trie has no suggestion", output, t.getSuggestion(partial_word));
}
@Test
//word in Trie
public void presentGetSuggestion() {
t = new Trie();
word = "Else";
partial_word = "El";
output = "Else";
t.addWord(word);
assertEquals("present word Trie has suggestion", output, t.getSuggestion(partial_word));
//add same word multiple times
t = new Trie();
word = "Else";
partial_word = "El";
output = "Else";
t.addWord(word);
t.addWord(word);
t.addWord(word);
assertEquals("present word Trie has suggestion", output, t.getSuggestion(partial_word));
//add multiple words, some with same prefix, check that we get a suggestion out
t = new Trie();
word = "Else";
partial_word = "El";
output = "Else";
t.addWord(word);
word = "Elses";
t.addWord(word);
word = "Elsewhere";
t.addWord(word);
word = "Something";
t.addWord(word);
suggested = t.getSuggestion(partial_word);
System.out.println("===Got suggestion: " + suggested);
assertEquals("present word Trie has suggestion", true, suggested.length() > 0);
//run again, since there is not guaranteed ordering, maybe we get a different suggestion
suggested = t.getSuggestion(partial_word);
System.out.println("===Got suggestion: " + suggested);
assertEquals("present word Trie has suggestion", true, suggested.length() > 0);
//add multiple words, some with same prefix, mix case as well, check that we get a suggestion out
t = new Trie();
word = "Else";
t.addWord(word);
word = "Elses";
t.addWord(word);
word = "Elsewhere";
t.addWord(word);
word = "Something";
t.addWord(word);
word = "else";
t.addWord(word);
word = "elses";
t.addWord(word);
word = "elsewhere";
t.addWord(word);
word = "something";
t.addWord(word);
word = "ELSE";
t.addWord(word);
word = "ELSES";
t.addWord(word);
word = "ELSEWHERE";
t.addWord(word);
word = "SOMETHING";
t.addWord(word);
word = "eLSe";
t.addWord(word);
word = "eLsEs";
t.addWord(word);
word = "elseWHERE";
t.addWord(word);
word = "SOMEthing";
t.addWord(word);
suggested = t.getSuggestion(partial_word);
System.out.println("===Got suggestion: " + suggested);
assertEquals("present word Trie has suggestion", true, suggested.length() > 0);
//run again, since there is not guaranteed ordering, maybe we get a different suggestion
suggested = t.getSuggestion(partial_word);
System.out.println("===Got suggestion: " + suggested);
assertEquals("present word Trie has suggestion", true, suggested.length() > 0);
}
@Test
//word in Trie
public void presentGetShortestSuggestion() {
t = new Trie();
word = "Else";
partial_word = "El";
output = "Else";
t.addWord(word);
assertEquals("present word Trie has suggestion", output, t.getShortestSuggestion(partial_word));
//add same word multiple times
t = new Trie();
word = "Else";
partial_word = "El";
output = "Else";
t.addWord(word);
t.addWord(word);
t.addWord(word);
assertEquals("present word Trie has suggestion", output, t.getShortestSuggestion(partial_word));
//add multiple words, some with same prefix, check that we get the shortest suggestion. CAREFUL! we have no guaranteed ordering!
t = new Trie();
word = "Else";
partial_word = "El";
output = "Else";
t.addWord(word);
word = "Elses";
t.addWord(word);
word = "Elsewhere";
t.addWord(word);
word = "Something";
t.addWord(word);
suggested = t.getShortestSuggestion(partial_word);
System.out.println("===Got suggestion: " + suggested);
assertEquals("present word Trie has suggestion", output.length(), suggested.length());
//run again, since there is not guaranteed ordering, maybe we get a different suggestion
suggested = t.getShortestSuggestion(partial_word);
System.out.println("===Got suggestion: " + suggested);
assertEquals("present word Trie has suggestion", output.length(), suggested.length());
//add multiple words, some with same prefix, mix case as well, check that we get a suggestion out
t = new Trie();
word = "Else";
t.addWord(word);
word = "Elses";
t.addWord(word);
word = "Elsewhere";
t.addWord(word);
word = "Something";
t.addWord(word);
word = "else";
t.addWord(word);
word = "elses";
t.addWord(word);
word = "elsewhere";
t.addWord(word);
word = "something";
t.addWord(word);
word = "ELSE";
t.addWord(word);
word = "ELSES";
t.addWord(word);
word = "ELSEWHERE";
t.addWord(word);
word = "ELSEWHO";
t.addWord(word);
word = "SOMETHING";
t.addWord(word);
word = "eLSe";
t.addWord(word);
word = "eLsEs";
t.addWord(word);
word = "elseWHERE";
t.addWord(word);
word = "SOMEthing";
t.addWord(word);
partial_word = "ELSEW";
output = "ELSEWHO";
suggested = t.getShortestSuggestion(partial_word);
System.out.println("===Got suggestion: " + suggested);
assertEquals("present word Trie has suggestion", output, suggested);
//run again, since there is not guaranteed ordering, maybe we get a different suggestion
suggested = t.getShortestSuggestion(partial_word);
System.out.println("===Got suggestion: " + suggested);
assertEquals("present word Trie has suggestion", output, suggested);
//same as before but different insertion order, result must be the same
t = new Trie();
word = "Else";
t.addWord(word);
word = "Elses";
t.addWord(word);
word = "Elsewhere";
t.addWord(word);
word = "Something";
t.addWord(word);
word = "else";
t.addWord(word);
word = "elses";
t.addWord(word);
word = "elsewhere";
t.addWord(word);
word = "something";
t.addWord(word);
word = "ELSEWHERE";
t.addWord(word);
word = "ELSES";
t.addWord(word);
word = "ELSE";
t.addWord(word);
word = "ELSEWHO";
t.addWord(word);
word = "SOMETHING";
t.addWord(word);
word = "eLSe";
t.addWord(word);
word = "eLsEs";
t.addWord(word);
word = "elseWHERE";
t.addWord(word);
word = "SOMEthing";
t.addWord(word);
partial_word = "ELSEW";
output = "ELSEWHO";
suggested = t.getShortestSuggestion(partial_word);
System.out.println("===Got suggestion: " + suggested);
assertEquals("present word Trie has suggestion", output, suggested);
//run again, since there is not guaranteed ordering, maybe we get a different suggestion
suggested = t.getShortestSuggestion(partial_word);
System.out.println("===Got suggestion: " + suggested);
assertEquals("present word Trie has suggestion", output, suggested);
}
@Test
//word in Trie, get all suggestions
public void presentGetSuggestions() {
t = new Trie();
word = "Else";
partial_word = "El";
count = 1;
t.addWord(word);
assertEquals("present word Trie has 1 suggestion", count, t.getSuggestions(partial_word).size());
//add same word multiple times
t = new Trie();
word = "Else";
partial_word = "El";
count = 1;
t.addWord(word);
t.addWord(word);
t.addWord(word);
assertEquals("present word Trie has 1 suggestion", count, t.getSuggestions(partial_word).size());
//add multiple words, some with same prefix, check that we get a suggestion out
t = new Trie();
word = "Else";
partial_word = "El";
count = 3;
t.addWord(word);
word = "Elses";
t.addWord(word);
word = "Elsewhere";
t.addWord(word);
word = "Something";
t.addWord(word);
assertEquals("present word Trie has 3 suggestions", count, t.getSuggestions(partial_word).size());
//add multiple words, some with same prefix, mix case as well, check that we get a suggestion out
t = new Trie();
partial_word = "Else";
count = 2;
word = "Else";
t.addWord(word);
word = "Elses";
t.addWord(word);
word = "Elsewhere";
t.addWord(word);
word = "Something";
t.addWord(word);
word = "else";
t.addWord(word);
word = "elses";
t.addWord(word);
word = "elsewhere";
t.addWord(word);
word = "something";
t.addWord(word);
word = "ELSE";
t.addWord(word);
word = "ELSES";
t.addWord(word);
word = "ELSEWHERE";
t.addWord(word);
word = "SOMETHING";
t.addWord(word);
word = "eLSe";
t.addWord(word);
word = "eLsEs";
t.addWord(word);
word = "elseWHERE";
t.addWord(word);
word = "SOMEthing";
t.addWord(word);
assertEquals("present word Trie has 2 suggestions", count, t.getSuggestions(partial_word).size());
}
@Test
//print Trie
public void print() {
//add multiple words, some with same prefix, mix case as well
t = new Trie();
word = "Else";
t.addWord(word);
word = "Elses";
t.addWord(word);
word = "Elsewhere";
t.addWord(word);
word = "Something";
t.addWord(word);
word = "else";
t.addWord(word);
word = "elses";
t.addWord(word);
word = "elsewhere";
t.addWord(word);
word = "something";
t.addWord(word);
word = "ELSE";
t.addWord(word);
word = "ELSES";
t.addWord(word);
word = "ELSEWHERE";
t.addWord(word);
word = "SOMETHING";
t.addWord(word);
word = "eLSe";
t.addWord(word);
word = "eLsEs";
t.addWord(word);
word = "elseWHERE";
t.addWord(word);
word = "SOMEthing";
t.addWord(word);
System.out.println("===PRINT_START");
t.printTrie();
System.out.println("===PRINT_END");
//add multiple words, some with same prefix, mix case as well, repeat words
t = new Trie();
word = "Else";
t.addWord(word);
word = "Elses";
t.addWord(word);
t.addWord(word);
word = "Elsewhere";
t.addWord(word);
t.addWord(word);
t.addWord(word);
word = "Something";
t.addWord(word);
word = "else";
t.addWord(word);
word = "elses";
t.addWord(word);
word = "elsewhere";
t.addWord(word);
t.addWord(word);
t.addWord(word);
word = "something";
t.addWord(word);
word = "ELSE";
t.addWord(word);
word = "ELSES";
t.addWord(word);
word = "ELSEWHERE";
t.addWord(word);
t.addWord(word);
t.addWord(word);
word = "SOMETHING";
t.addWord(word);
word = "eLSe";
t.addWord(word);
word = "eLsEs";
t.addWord(word);
word = "elseWHERE";
t.addWord(word);
t.addWord(word);
t.addWord(word);
word = "SOMEthing";
t.addWord(word);
System.out.println("===PRINT_START");
t.printTrie();
System.out.println("===PRINT_END");
}
@Test
public void childrenOfRootFailToRoot() {
t = new Trie();
words = new HashSet<>();
words.add("a");
words.add("b");
t.buildFailLinks(words);
assertNull("root has no fail node", t.getFailNode());
assertEquals("root only has 2 children", 2, t.getNodeChildren().size());
for(Trie child : t.getNodeChildren().values()){
assertEquals("child node fails to root", t, child.getFailNode());
}
}
@Test
public void repeatedCharactersStringAllNodesFailToPreviousNode() {
t = new Trie();
words = new HashSet<>();
words.add("a");
words.add("aa");
String longestWord = "aaa";
words.add(longestWord);
t.buildFailLinks(words);
assertNull("root has no fail node", t.getFailNode());
assertEquals("root only has one children", 1, t.getNodeChildren().size());
Queue<Trie> queue = new ArrayDeque<>();
Trie currFailNode = t;
queue.addAll(t.getNodeChildren().values());
//check each children fails to previous node
while(!queue.isEmpty()){
Trie curr = queue.poll();
//last node has no children
if(curr.getWord().equals(longestWord)) {
assertEquals("last node has no children", 0, curr.getNodeChildren().size());
} else {
assertEquals("node only has one children", 1, curr.getNodeChildren().size());
}
assertEquals("node fails to previous node", currFailNode, curr.getFailNode());
currFailNode = curr;
queue.addAll(curr.getNodeChildren().values());
}
}
@Test
public void lastUniqueCharacterInSingleStringFailsToRoot() {
t = new Trie();
words = new HashSet<>();
words.add("a");
words.add("aa");
String longestWord = "aab";
words.add(longestWord);
t.buildFailLinks(words);
assertNull("root has no fail node", t.getFailNode());
assertEquals("root only has one children", 1, t.getNodeChildren().size());
Queue<Trie> queue = new ArrayDeque<>();
Trie currFailNode = t;
queue.addAll(t.getNodeChildren().values());
//check each children fails to previous node
while(!queue.isEmpty()){
Trie curr = queue.poll();
//last node has no children and fails to root
if(curr.getWord().equals(longestWord)) {
assertEquals("last node has no children", 0, curr.getNodeChildren().size());
assertTrue("last node fails to root", curr.getFailNode().isRoot());
} else {
assertEquals("node only has one children", 1, curr.getNodeChildren().size());
assertEquals("node fails to previous node", currFailNode, curr.getFailNode());
}
currFailNode = curr;
queue.addAll(curr.getNodeChildren().values());
}
}
@Test
public void lastDifferentCharacterInSingleStringFailsToRootOtherChildren() {
t = new Trie();
words = new HashSet<>();
words.add("a");
String desiredFailNodeWord = "b";
words.add(desiredFailNodeWord);
words.add("aa");
String longestWord = "aab";
words.add(longestWord);
t.buildFailLinks(words);
assertNull("root has no fail node", t.getFailNode());
assertEquals("root has 2 children", 2, t.getNodeChildren().size());
Queue<Trie> queue = new ArrayDeque<>();
Trie desiredFailNode = null;
queue.addAll(t.getNodeChildren().values());
//check each children fails to previous node
while(!queue.isEmpty()){
Trie curr = queue.poll();
//while we do the visit, we are looking for node desiredFailNode, which will be visited before our longestWord node
if(curr.getWord().equals(desiredFailNodeWord)){
desiredFailNode = curr;
}
//longestWordNode must fail to desiredFailNode
if(curr.getWord().equals(longestWord)) {
assertEquals("last node has no children", 0, curr.getNodeChildren().size());
assertEquals("last node fails to desiredFailNode", desiredFailNode, curr.getFailNode());
}
queue.addAll(curr.getNodeChildren().values());
}
}
@Test
/*
For this trie we want:
NODE(word, words, failNode)
ROOT('', '', null)
/ \
A('a', 'a', root) B('', '', root)
/ \ / \
A('', 'a', A) B('ab', 'ab', root) C('bc', 'bc', root) D('bd', 'bd', root)
/ \
B('aab', 'aab, ab', AB) C('aac', '', root)
*/
public void sampleTrieWithNoMatchingCharacters() {
t = new Trie();
word = "none";
words = new HashSet<>();
words.add("a");
words.add("ab");
words.add("bc");
words.add("aab");
words.add("aac");
words.add("bd");
t.buildFailLinks(words);
t.printAhoCorasickTrie();
results = t.findMatchesInString(word);
assertTrue("result is empty", results.isEmpty());
}
@Test
/*
For this trie we want:
NODE(word, words, failNode)
ROOT('', '', null)
/ \
A('a', 'a', root) B('', '', root)
/ \ / \
A('', 'a', A) B('ab', 'ab', root) C('bc', 'bc', root) D('bd', 'bd', root)
/ \
B('aab', 'aab, ab', AB) C('aac', '', root)
for search string bcaab we expect: (string: matching positions)
bc: 0
a: 2, 3
aab: 2
ab: 3
*/
public void sampleTrieWithAllMatchingCharacters() {
t = new Trie();
word = "bcaab";
words = new HashSet<>();
words.add("a");
words.add("ab");
words.add("bc");
words.add("aab");
words.add("aac");
words.add("bd");
matches = new HashMap<>();
List<Integer> positionsBC = new ArrayList<>();
positionsBC.add(0);
matches.put("bc", positionsBC);
List<Integer> positionsA = new ArrayList<>();
positionsA.add(2);
positionsA.add(3);
matches.put("a", positionsA);
List<Integer> positionsAAB = new ArrayList<>();
positionsAAB.add(2);
matches.put("aab", positionsAAB);
List<Integer> positionsAB = new ArrayList<>();
positionsAB.add(3);
matches.put("ab", positionsAB);
t.buildFailLinks(words);
t.printAhoCorasickTrie();
results = t.findMatchesInString(word);
assertTrue("result is same size as expected", results.size() == matches.size());
for(Map.Entry<String, List<Integer>> e : matches.entrySet()){
List<Integer> res = results.get(e.getKey());
assertTrue("key is found in result", results.containsKey(e.getKey()));
assertTrue("result is same size as expected", e.getValue().size() == res.size());
assertTrue("all expected matches are in result", e.getValue().containsAll(res));
}
}
@Test
/*
For this trie we want:
NODE(word, words, failNode)
ROOT('', '', null)
/ \ \
A('a', 'a', root) B('', '', root) C('', '', root)
/ \ / \ \
A('', 'a', A) B('ab', 'ab', root) C('bc', 'bc', root) D('bd', 'bd', root) A('ca', 'ca', root)
/ \
B('aab', 'aab, ab', AB) C('aac', '', root)
for search string bcaab we expect: (string: matching positions)
bc: 0
a: 2, 3
aab: 2
ab: 3
ca: 1
*/
public void otherSampleTrieWithAllMatchingCharacters() {
t = new Trie();
word = "bcaab";
words = new HashSet<>();
words.add("a");
words.add("ab");
words.add("bc");
words.add("aab");
words.add("aac");
words.add("bd");
words.add("ca");
matches = new HashMap<>();
List<Integer> positionsBC = new ArrayList<>();
positionsBC.add(0);
matches.put("bc", positionsBC);
List<Integer> positionsA = new ArrayList<>();
positionsA.add(2);
positionsA.add(3);
matches.put("a", positionsA);
List<Integer> positionsAAB = new ArrayList<>();
positionsAAB.add(2);
matches.put("aab", positionsAAB);
List<Integer> positionsAB = new ArrayList<>();
positionsAB.add(3);
matches.put("ab", positionsAB);
List<Integer> positionsCA = new ArrayList<>();
positionsCA.add(1);
matches.put("ca", positionsCA);
t.buildFailLinks(words);
t.printAhoCorasickTrie();
results = t.findMatchesInString(word);
assertTrue("result is same size as expected", results.size() == matches.size());
for(Map.Entry<String, List<Integer>> e : matches.entrySet()){
List<Integer> res = results.get(e.getKey());
assertTrue("key is found in result", results.containsKey(e.getKey()));
assertTrue("result is same size as expected", e.getValue().size() == res.size());
assertTrue("all expected matches are in result", e.getValue().containsAll(res));
}
}
@Test
/*
For this trie we want:
NODE(word, words, failNode)
ROOT('', '', null)
/ \
A('a', 'a', root) B('', '', root)
/ \ / \
A('', 'a', A) B('ab', 'ab', root) C('bc', 'bc', root) D('bd', 'bd', root)
/ \
B('aab', 'aab, ab', AB) C('aac', '', root)
for search string bcxaab we expect: (string: matching positions)
bc: 0
a: 3, 4
aab: 3
ab: 4
*/
public void sampleTrieWithNonMatchingCharacter() {
t = new Trie();
word = "bcxaab";
words = new HashSet<>();
words.add("a");
words.add("ab");
words.add("bc");
words.add("aab");
words.add("aac");
words.add("bd");
matches = new HashMap<>();
List<Integer> positionsBC = new ArrayList<>();
positionsBC.add(0);
matches.put("bc", positionsBC);
List<Integer> positionsA = new ArrayList<>();
positionsA.add(3);
positionsA.add(4);
matches.put("a", positionsA);
List<Integer> positionsAAB = new ArrayList<>();
positionsAAB.add(3);
matches.put("aab", positionsAAB);
List<Integer> positionsAB = new ArrayList<>();
positionsAB.add(4);
matches.put("ab", positionsAB);
t.buildFailLinks(words);
t.printAhoCorasickTrie();
results = t.findMatchesInString(word);
assertTrue("result is same size as expected", results.size() == matches.size());
for(Map.Entry<String, List<Integer>> e : matches.entrySet()){
List<Integer> res = results.get(e.getKey());
assertTrue("key is found in result", results.containsKey(e.getKey()));
assertTrue("result is same size as expected", e.getValue().size() == res.size());
assertTrue("all expected matches are in result", e.getValue().containsAll(res));
}
}
@Test
/*
For this trie we want:
NODE(word, words, failNode)
ROOT('', '', null)
/ \
A('a', 'a', root) B('', '', root)
/ \ / \
A('', 'a', A) B('ab', 'ab', root) C('bc', 'bc', root) D('bd', 'bd', root)
/ \
B('aab', 'aab, ab', AB) C('aac', '', root)
for search string bcxxaab we expect: (string: matching positions)
bc: 0
a: 4, 5
aab: 4
ab: 5
*/
public void sampleTrieWithConsecutiveNonMatchingCharacters() {
t = new Trie();
word = "bcxxaab";
words = new HashSet<>();
words.add("a");
words.add("ab");
words.add("bc");
words.add("aab");
words.add("aac");
words.add("bd");
matches = new HashMap<>();
List<Integer> positionsBC = new ArrayList<>();
positionsBC.add(0);
matches.put("bc", positionsBC);
List<Integer> positionsA = new ArrayList<>();
positionsA.add(4);
positionsA.add(5);
matches.put("a", positionsA);
List<Integer> positionsAAB = new ArrayList<>();
positionsAAB.add(4);
matches.put("aab", positionsAAB);
List<Integer> positionsAB = new ArrayList<>();
positionsAB.add(5);
matches.put("ab", positionsAB);
t.buildFailLinks(words);
t.printAhoCorasickTrie();
results = t.findMatchesInString(word);
assertTrue("result is same size as expected", results.size() == matches.size());
for(Map.Entry<String, List<Integer>> e : matches.entrySet()){
List<Integer> res = results.get(e.getKey());
assertTrue("key is found in result", results.containsKey(e.getKey()));
assertTrue("result is same size as expected", e.getValue().size() == res.size());
assertTrue("all expected matches are in result", e.getValue().containsAll(res));
}
}
@Test
/*
For this trie we want:
NODE(word, words, failNode)
ROOT('', '', null)
/ \
A('a', 'a', root) B('', '', root)
/ \ / \
A('', 'a', A) B('ab', 'ab', root) C('bc', 'bc', root) D('bd', 'bd', root)
/ \
B('aab', 'aab, ab', AB) C('aac', '', root)
for search string bcxaxab we expect: (string: matching positions)
bc: 0
a: 3, 5
ab: 5
*/
public void sampleTrieWithAlternatingNonMatchingCharacters() {
t = new Trie();
word = "bcxaxab";
words = new HashSet<>();
words.add("a");
words.add("ab");
words.add("bc");
words.add("aab");
words.add("aac");
words.add("bd");
matches = new HashMap<>();
List<Integer> positionsBC = new ArrayList<>();
positionsBC.add(0);
matches.put("bc", positionsBC);
List<Integer> positionsA = new ArrayList<>();
positionsA.add(3);
positionsA.add(5);
matches.put("a", positionsA);
List<Integer> positionsAB = new ArrayList<>();
positionsAB.add(5);
matches.put("ab", positionsAB);
t.buildFailLinks(words);
t.printAhoCorasickTrie();
results = t.findMatchesInString(word);
assertTrue("result is same size as expected", results.size() == matches.size());
for(Map.Entry<String, List<Integer>> e : matches.entrySet()){
List<Integer> res = results.get(e.getKey());
assertTrue("key is found in result", results.containsKey(e.getKey()));
assertTrue("result is same size as expected", e.getValue().size() == res.size());
assertTrue("all expected matches are in result", e.getValue().containsAll(res));
}
}
}
package com.blogspot.groglogs.trie;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
public class TrieUtils {
public static List<List<String>> findWordSquares(List<String> words){
if(words == null || words.size() == 0) throw new IllegalArgumentException("Word list cannot be null or empty!");
Trie trie = new Trie();
Queue<Character[][]> incomplete = new LinkedList<>();
//preprocess: add all words to our trie and create a matrix initializing the first row and column to each single word
for(String word : words){
trie.addWord(word);
Character[][] square = new Character[word.length()][word.length()];
for(int i = 0; i < word.length(); i++){
square[0][i] = word.charAt(i);
square[i][0] = word.charAt(i);
}
incomplete.add(square);
}
List<Character[][]> results = new LinkedList<>();
//analyse each matrix trying to completely fill it
while(!incomplete.isEmpty()){
Character[][] square = incomplete.remove();
//this will mark where the next incomplete word is. First word is always complete
int i = 1;
boolean isComplete = false;
//track the word we built so far, so we can later search the trie
StringBuilder curr = new StringBuilder(square.length);
//scan the current matrix
for(; i < square.length; i++){
//check if we completely filled it and in case, add it to the result set and process the next one
if(i == square.length - 1 && square[i][i] != null){
results.add(square);
isComplete = true;
}
//otherwise we know which word is incomplete, build it up
if(square[i][i] == null){
//at the very beginning the first character we need is the second in the only word we have
for(int j = 0; j < i; j++){
curr.append(square[i][j]);
}
break;
}
}
//if we found a result, no need to keep processing this matrix
if(isComplete) continue;
//find all possible words with same prefix and length up to our current matrix
List<String> possibilities = trie.getSuggestions(curr.toString(), square.length);
if(possibilities.isEmpty()) continue;
//for each possibility, create a new matrix, add the new candidate word and place it back in the queue
//the next round will either proceed further, find no other path to process, or verify we completely filled it
for(String s : possibilities){
Character[][] next = new Character[square.length][square.length];
//COPY THE MATRIX! simply referencing again is not enough
//and we need to create a new matrix from the current one for each candidate word!
for(int row = 0; row < square.length; row++){
for(int column = 0; column < square.length; column++) {
next[row][column] = square[row][column];
}
}
//add the candidate word starting from the place we know is incomplete at the moment
for(int k = 0; k < s.length(); k++){
next[i][k] = s.charAt(k);
next[k][i] = s.charAt(k);
}
incomplete.add(next);
}
}
//collect result from the complete matrices, if any
List<List<String>> out = new LinkedList<>();
for(int i = 0; i < results.size(); i++){
Character[][] curr = results.get(i);
List<String> answers = new LinkedList<>();
StringBuilder word;
//get all words from this matrix and add them to the corresponding result set
for(int j = 0; j < curr.length; j++){
word = new StringBuilder(curr.length);
for(int k = 0; k < curr.length; k++){
word.append(curr[j][k]);
}
answers.add(word.toString());
}
out.add(answers);
}
return out;
}
//finds all occurrences of search strings within given string
//our Trie is built out of the search strings
public static Map<String, List<Integer>> findMatchingStringsInString(String string, Set<String> matches){
if(string == null || string.isEmpty() || matches.isEmpty()) return new HashMap<>();
Trie t = new Trie();
t.buildFailLinks(matches);
return t.findMatchesInString(string);
}
public static Set<String> findMatchingWordsInPhoneNumber(String phoneNumber, Set<String> words){
Set<String> result = new HashSet<>();
if(phoneNumber == null || phoneNumber.isEmpty() || words == null || words.isEmpty()){
return result;
}
//links a digit with its character on the same key
Map<Character, Set<Character>> digitToCharacters = new HashMap<>();
Set<Character> digit2 = new HashSet<>();
digit2.add('a');
digit2.add('b');
digit2.add('c');
Set<Character> digit3 = new HashSet<>();
digit3.add('d');
digit3.add('e');
digit3.add('f');
Set<Character> digit4 = new HashSet<>();
digit4.add('g');
digit4.add('h');
digit4.add('i');
Set<Character> digit5 = new HashSet<>();
digit5.add('j');
digit5.add('k');
digit5.add('l');
Set<Character> digit6 = new HashSet<>();
digit6.add('m');
digit6.add('n');
digit6.add('o');
Set<Character> digit7 = new HashSet<>();
digit7.add('p');
digit7.add('q');
digit7.add('r');
digit7.add('s');
Set<Character> digit8 = new HashSet<>();
digit8.add('t');
digit8.add('u');
digit8.add('v');
Set<Character> digit9 = new HashSet<>();
digit9.add('w');
digit9.add('x');
digit9.add('y');
digit9.add('z');
digitToCharacters.put('0', new HashSet<>());
digitToCharacters.put('1', new HashSet<>());
digitToCharacters.put('2', digit2);
digitToCharacters.put('3', digit3);
digitToCharacters.put('4', digit4);
digitToCharacters.put('5', digit5);
digitToCharacters.put('6', digit6);
digitToCharacters.put('7', digit7);
digitToCharacters.put('8', digit8);
digitToCharacters.put('9', digit9);
Trie t = new Trie();
t.buildFailLinks(words);
findMatchingWordsInPhoneNumber(t, t, phoneNumber, 0, result, digitToCharacters);
return result;
}
//helper for findMatchingWordsInPhoneNumber
//for each potential character for a given digit, we scan the trie to find matches and collect results
private static void findMatchingWordsInPhoneNumber(Trie t, Trie curr, String phoneNumber, int index, Set<String> result, Map<Character, Set<Character>> digitToCharacters){
//stop at end of the number
if(index >= phoneNumber.length()){
return;
}
Character c = phoneNumber.charAt(index);
if(!Character.isDigit(c)){
throw new IllegalArgumentException("Phone number can only contain digits, found: " + c);
}
Set<Character> possibleChars = digitToCharacters.get(c);
//if we are a digit without potential characters, we MUST restart from top of the trie AND move on to the next digit
if(possibleChars.isEmpty()){
findMatchingWordsInPhoneNumber(t, t, phoneNumber, index + 1, result, digitToCharacters);
} else {
for (Character character : possibleChars) {
Trie nextNode = findNodeForChar(curr, character);
result.addAll(nextNode.getWords());
findMatchingWordsInPhoneNumber(t, nextNode, phoneNumber, index + 1, result, digitToCharacters);
}
}
}
//helper for findMatchingWordsInPhoneNumber
//finds the next node or fail node to continue from for the given character starting at the given node
private static Trie findNodeForChar(Trie start, Character c){
//from the current node, try to find a match for the given character
Trie curr = start;
Trie node = curr.getNodeChildren().get(c);
//if there is no match, follow fail links until you either find a match OR cannot fail back anymore (you are trying to fail back from root node)
boolean doSkip = false;
while (node == null) {
//if we are trying to fail back from the root, it means that character does not exist in out trie, skip to the next character
//and continue from the root
if (curr.getFailNode() == null) {
doSkip = true;
break;
}
//else fail to our failNode and check from there
curr = curr.getFailNode();
node = curr.getNodeChildren().get(c);
}
if (!doSkip) {
//then continue from there
curr = node;
}
return curr;
}
public static Set<String> findMatchingWordsInPhoneNumberAsDigits(String phoneNumber, Set<String> words){
if(phoneNumber == null || phoneNumber.isEmpty() || words == null || words.isEmpty()){
return new HashSet<>();
}
for(int i = 0; i < phoneNumber.length(); i++){
if(!Character.isDigit(phoneNumber.charAt(i))){
throw new IllegalArgumentException("Phone number must contain only digits");
}
}
//key = character, value = digit with that character on the key
Map<Character, Character> characterDigit = new HashMap<>();
characterDigit.put('a', '2');
characterDigit.put('b', '2');
characterDigit.put('c', '2');
characterDigit.put('d', '3');
characterDigit.put('e', '3');
characterDigit.put('f', '3');
characterDigit.put('g', '4');
characterDigit.put('h', '4');
characterDigit.put('i', '4');
characterDigit.put('j', '5');
characterDigit.put('k', '5');
characterDigit.put('l', '5');
characterDigit.put('m', '6');
characterDigit.put('n', '6');
characterDigit.put('o', '6');
characterDigit.put('p', '7');
characterDigit.put('q', '7');
characterDigit.put('r', '7');
characterDigit.put('s', '7');
characterDigit.put('t', '8');
characterDigit.put('u', '8');
characterDigit.put('v', '8');
characterDigit.put('w', '9');
characterDigit.put('x', '9');
characterDigit.put('y', '9');
characterDigit.put('z', '9');
//convert all words to digit representation
//key = digit representation, value = words
Map<String, Set<String>> wordsAsDigits = new HashMap<>();
for(String word : words){
StringBuffer wordAsDigits = new StringBuffer();
for(int i = 0; i < word.length(); i++){
Character c = word.charAt(i);
if(!Character.isAlphabetic(c)){
throw new IllegalArgumentException("word: " + word + " contains non alphabetic characters");
}
wordAsDigits.append(characterDigit.get(c));
}
String s = wordAsDigits.toString();
//same digit representation can apply to multiple words
Set<String> wordsForDigitRepresentation = wordsAsDigits.get(s);
if(wordsForDigitRepresentation == null){
wordsForDigitRepresentation = new HashSet<>();
}
wordsForDigitRepresentation.add(word);
wordsAsDigits.put(s, wordsForDigitRepresentation);
}
//we now search the trie using Aho-Corasick, but look only at digits
Map<String, List<Integer>> out = TrieUtils.findMatchingStringsInString(phoneNumber, wordsAsDigits.keySet());
Set<String> result = new HashSet<>();
for(String s : out.keySet()){
result.addAll(wordsAsDigits.get(s));
}
return result;
}
}
package com.blogspot.groglogs.test.trie;
import com.blogspot.groglogs.trie.TrieUtils;
import org.junit.Test;
import java.util.LinkedList;
import java.util.List;
import static org.junit.Assert.*;
import static org.hamcrest.CoreMatchers.*;
public class WordSquareJTests {
List<List<String>> out;
List<String> in;
@Test
//empty input, expected exception
public void empty() {
in = null;
try {
out = TrieUtils.findWordSquares(in);
} catch(IllegalArgumentException e){
System.out.println("null list got IllegalArgumentException " + e.getMessage());
}
in = new LinkedList<>();
try {
out = TrieUtils.findWordSquares(in);
} catch(IllegalArgumentException e){
System.out.println("empty list got IllegalArgumentException " + e.getMessage());
}
}
@Test
public void noSquare() {
in = new LinkedList<>();
in.add("area");
in.add("ball");
in.add("dear");
in.add("lead");
in.add("yard");
out = TrieUtils.findWordSquares(in);
assertEquals("square can't be built, got empty result", true, out.isEmpty());
}
@Test
public void sameLengthSquare() {
in = new LinkedList<>();
in.add("area");
in.add("ball");
in.add("dear");
in.add("lady");
in.add("lead");
in.add("yard");
out = TrieUtils.findWordSquares(in);
assertEquals("two squares can be built", 2, out.size());
List<String> square1 = new LinkedList<>();
square1.add("ball");
square1.add("area");
square1.add("lead");
square1.add("lady");
assertThat("first square is ball, area, lead, lady", out.get(0), is(square1));
List<String> square2 = new LinkedList<>();
square2.add("lady");
square2.add("area");
square2.add("dear");
square2.add("yard");
assertThat("second square is lady, area, dear, yard", out.get(1), is(square2));
}
@Test
public void repeatedWordSquare() {
in = new LinkedList<>();
in.add("that");
in.add("hash");
in.add("asia");
in.add("fool");
out = TrieUtils.findWordSquares(in);
assertEquals("one squares can be built", 1, out.size());
List<String> square1 = new LinkedList<>();
square1.add("that");
square1.add("hash");
square1.add("asia");
square1.add("that");
assertThat("first square is that, hash, asia, that", out.get(0), is(square1));
}
@Test
public void differentLengthSquare() {
in = new LinkedList<>();
in.add("that");
in.add("hash");
in.add("asia");
in.add("foooool");
in.add("heart");
in.add("ember");
in.add("abuse");
in.add("resin");
in.add("trend");
out = TrieUtils.findWordSquares(in);
assertEquals("two squares can be built", 2, out.size());
List<String> square1 = new LinkedList<>();
square1.add("that");
square1.add("hash");
square1.add("asia");
square1.add("that");
assertThat("first square is that, hash, asia, that", out.get(0), is(square1));
List<String> square2 = new LinkedList<>();
square2.add("heart");
square2.add("ember");
square2.add("abuse");
square2.add("resin");
square2.add("trend");
assertThat("second square is heart, ember, abuse, resin, trend", out.get(1), is(square2));
}
}