Skip to content

Instantly share code, notes, and snippets.

@hgadre
Created December 14, 2018 18:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hgadre/d4e9ec576932167f01fd33970002a882 to your computer and use it in GitHub Desktop.
Save hgadre/d4e9ec576932167f01fd33970002a882 to your computer and use it in GitHub Desktop.
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* We have been given a list of strings which are blacklisted. The goal is
* to identify if a given text contains any of these blacklisted strings.
* The restriction here is that the blacklisted string needs to match on the
* word boundary e.g. consider a blacklist string "abc" and text "abc pqr", the
* text in this case is unsafe (i.e. it contains a blacklisted string). On the
* other hand if the text is "abcoqr", then the text is safe since the string
* "abc" is not on the word boundary.
*/
public class SafeText {
static class Tuple {
int span = 0; // the length of previous words which should have been matched if end = true.
boolean end; // marks the identification of a blacklisted string.
Set<String> nextWords = new HashSet<>(); // next set of words to search for matching blacklisted strings.
public void setEnd(boolean end, int span) {
this.span = span;
this.end = end;
}
public boolean isEnd(int span) {
return end && span == this.span;
}
public void addNextWord (String word) {
this.nextWords.add(word);
}
public boolean containsWord(String word) {
return this.nextWords.contains(word);
}
@Override
public String toString() {
return String.format("[end = %s, nextWords = %s]", end, nextWords);
}
}
private final Map<String, Tuple> m = new HashMap<>();
/**
* A constructor accepts a list of blacklisted strings
* and prepares a modified trie data structure.
*/
public SafeText(List<String> blackList) {
Collections.sort(blackList);
for (String str : blackList) {
String[] tokens = str.split("\\s");
int i = 0;
for (; i < tokens.length - 1; i++) {
m.computeIfAbsent(tokens[i], x -> new Tuple()).addNextWord(tokens[i+1]);
}
m.computeIfAbsent(tokens[i], x -> new Tuple()).setEnd(true, tokens.length-1);
}
}
/**
* Return if the provided text is safe or not.
*/
public boolean isSafe(String text) {
String[] tokens = text.split("\\s");
for (int i = 0; i < tokens.length; i++) {
String key = tokens[i];
int j = i;
while (j < tokens.length && m.containsKey(key)) {
Tuple t = m.get(key);
if (t.isEnd(j-i)) {
return false;
} else if ((j+1) < tokens.length && t.containsWord(tokens[j+1])) {
key = tokens[j+1];
j++;
} else {
break;
}
}
}
return true;
}
public static void main(String[] args) {
List<String> l = new ArrayList<>();
l.add("world war 3");
l.add("world war 2");
l.add("world war 6");
l.add("world war 9");
l.add("machine guns");
l.add("hand grenades");
SafeText s = new SafeText(l);
String[] tests = new String[] {"world is in a better place",
"world war world war world war in 9 europe 3",
"world war 3 in europe",
"Buy machine guns at http://",
"Machines will take over human jobs"
};
for (String t : tests) {
System.out.println("Text : " + t + " : " + s.isSafe(t));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment