Skip to content

Instantly share code, notes, and snippets.

@nielsutrecht
Created September 23, 2015 08:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nielsutrecht/bac00babd9f7145a4bbf to your computer and use it in GitHub Desktop.
Save nielsutrecht/bac00babd9f7145a4bbf to your computer and use it in GitHub Desktop.
Gutenberg frequency count
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import static java.lang.Integer.compare;
public class FreqCount {
public static void main(String... argv) throws Exception {
URL bibleUrl = new URL("http://www.gutenberg.org/cache/epub/10/pg10.txt");
BufferedReader in = new BufferedReader(new InputStreamReader(bibleUrl.openStream()));
String line;
Map<String, AtomicInteger> frequency = new HashMap<>();
while((line = in.readLine()) != null) {
String[] words = line.split("\\s+");
for(String word : words) {
word = word.trim().toLowerCase();
if("".equals(word)) {
continue;
}
if(!frequency.containsKey(word)) {
frequency.put(word, new AtomicInteger(1));
}
else {
frequency.get(word).incrementAndGet();
}
}
}
in.close();
List<Map.Entry<String, AtomicInteger>> sorted = frequency
.entrySet()
.stream()
.sorted((a, b) -> compare(b.getValue().get(), a.getValue().get()))
.collect(Collectors.toList());
System.out.println("Distinct words: " + frequency.size());
System.out.println("Top 10 words:");
sorted
.stream()
.limit(10)
.forEach(e -> System.out.println(e.getKey() + "\t" + e.getValue()));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment