Created
December 14, 2018 18:16
-
-
Save hgadre/d4e9ec576932167f01fd33970002a882 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.ArrayList; | |
import java.util.Collections; | |
import java.util.HashMap; | |
import java.util.HashSet; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.Set; | |
/** | |
* We have been given a list of strings which are blacklisted. The goal is | |
* to identify if a given text contains any of these blacklisted strings. | |
* The restriction here is that the blacklisted string needs to match on the | |
* word boundary e.g. consider a blacklist string "abc" and text "abc pqr", the | |
* text in this case is unsafe (i.e. it contains a blacklisted string). On the | |
* other hand if the text is "abcoqr", then the text is safe since the string | |
* "abc" is not on the word boundary. | |
*/ | |
public class SafeText { | |
static class Tuple { | |
int span = 0; // the length of previous words which should have been matched if end = true. | |
boolean end; // marks the identification of a blacklisted string. | |
Set<String> nextWords = new HashSet<>(); // next set of words to search for matching blacklisted strings. | |
public void setEnd(boolean end, int span) { | |
this.span = span; | |
this.end = end; | |
} | |
public boolean isEnd(int span) { | |
return end && span == this.span; | |
} | |
public void addNextWord (String word) { | |
this.nextWords.add(word); | |
} | |
public boolean containsWord(String word) { | |
return this.nextWords.contains(word); | |
} | |
@Override | |
public String toString() { | |
return String.format("[end = %s, nextWords = %s]", end, nextWords); | |
} | |
} | |
private final Map<String, Tuple> m = new HashMap<>(); | |
/** | |
* A constructor accepts a list of blacklisted strings | |
* and prepares a modified trie data structure. | |
*/ | |
public SafeText(List<String> blackList) { | |
Collections.sort(blackList); | |
for (String str : blackList) { | |
String[] tokens = str.split("\\s"); | |
int i = 0; | |
for (; i < tokens.length - 1; i++) { | |
m.computeIfAbsent(tokens[i], x -> new Tuple()).addNextWord(tokens[i+1]); | |
} | |
m.computeIfAbsent(tokens[i], x -> new Tuple()).setEnd(true, tokens.length-1); | |
} | |
} | |
/** | |
* Return if the provided text is safe or not. | |
*/ | |
public boolean isSafe(String text) { | |
String[] tokens = text.split("\\s"); | |
for (int i = 0; i < tokens.length; i++) { | |
String key = tokens[i]; | |
int j = i; | |
while (j < tokens.length && m.containsKey(key)) { | |
Tuple t = m.get(key); | |
if (t.isEnd(j-i)) { | |
return false; | |
} else if ((j+1) < tokens.length && t.containsWord(tokens[j+1])) { | |
key = tokens[j+1]; | |
j++; | |
} else { | |
break; | |
} | |
} | |
} | |
return true; | |
} | |
public static void main(String[] args) { | |
List<String> l = new ArrayList<>(); | |
l.add("world war 3"); | |
l.add("world war 2"); | |
l.add("world war 6"); | |
l.add("world war 9"); | |
l.add("machine guns"); | |
l.add("hand grenades"); | |
SafeText s = new SafeText(l); | |
String[] tests = new String[] {"world is in a better place", | |
"world war world war world war in 9 europe 3", | |
"world war 3 in europe", | |
"Buy machine guns at http://", | |
"Machines will take over human jobs" | |
}; | |
for (String t : tests) { | |
System.out.println("Text : " + t + " : " + s.isSafe(t)); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment