Created
June 15, 2020 15:28
-
-
Save mloza/62936c5522913a69f912d3b9a58f32f8 to your computer and use it in GitHub Desktop.
GTE 2 - bitset version
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.*; | |
import java.util.concurrent.atomic.AtomicInteger; | |
public class TitleSearch { | |
public static void main(String[] args) { | |
final Scanner sc = new Scanner(System.in); | |
List<String> titles = new ArrayList<>(); | |
final int n = sc.nextInt(); | |
sc.nextLine(); | |
for (int i = 0; i < n; ++i) { | |
titles.add(sc.nextLine()); | |
} | |
List<String> queries = new ArrayList<>(); | |
final int q = sc.nextInt(); | |
sc.nextLine(); | |
for (int i = 0; i < q; ++i) { | |
queries.add(sc.nextLine()); | |
} | |
solve(titles, queries); | |
} | |
// trie version of this algorithm does not fit in memory | |
// this version performs much faster :) it can also be splitted | |
// across many machines | |
static void solve(List<String> titles, List<String> queries) { | |
Map<String, Integer> dict = new HashMap<>(); | |
AtomicInteger count = new AtomicInteger(); | |
Map<BitSet, List<String>> bitSetTitles = new HashMap<>(); | |
for (String title : titles) { | |
String[] kw = title.split(" "); | |
BitSet bitSet = new BitSet(); | |
for (String word : kw) { | |
dict.computeIfAbsent(word, w -> count.getAndIncrement()); | |
bitSet.set(dict.get(word)); | |
} | |
bitSetTitles.computeIfAbsent(bitSet, bs -> new ArrayList<>()).add(title); | |
} | |
for (String q : queries) { | |
PriorityQueue<String> results = new PriorityQueue<>((a, b) -> { | |
int result = a.split(" ").length - b.split(" ").length; | |
if (result == 0) { | |
return a.compareTo(b); | |
} | |
return result; | |
}); | |
BitSet queryBits = new BitSet(); | |
boolean foundAllKeyqords = true; | |
for (String kw : q.split(" ")) { | |
if (!dict.containsKey(kw)) { | |
foundAllKeyqords = false; | |
break; | |
} | |
queryBits.set(dict.get(kw)); | |
} | |
if(foundAllKeyqords) { | |
bitSetTitles.forEach((bs, titleList) -> { | |
BitSet localBs = (BitSet) bs.clone(); | |
localBs.and(queryBits); | |
if(localBs.equals(queryBits)) { | |
results.addAll(titleList); | |
} | |
}); | |
} | |
System.out.println(Math.min(10, results.size())); | |
for (int i = 0; i < 10; i++) { | |
String title = results.poll(); | |
if (title == null) break; | |
System.out.println(title); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment