Skip to content

Instantly share code, notes, and snippets.

@adamrabung
Created January 14, 2010 20:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adamrabung/277457 to your computer and use it in GitHub Desktop.
Save adamrabung/277457 to your computer and use it in GitHub Desktop.
package org.mbte.groovypp.examples.wordcount;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Optimization of the original JavaWordCount found here:
* http://code.google.com/p/groovypptest/source/browse/trunk/WordCount/src/org/mbte/groovypp/examples/wordcount/JavaWordCount.java
* 1. Collect the counts w/ a backing HashMap, rather than TreeHashMap
* 2. Word count needs to be case-insensitive (which is actually an optimization)
* 3. Buffered writers
*/
public class JavaWordCount {
public static void main(String[] args) throws IOException {
for (int i = 0; i < 10; ++i) {
Comparator<Map.Entry<String, Integer>> sortByCount = new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return o2.getValue() - o1.getValue();
}
};
Comparator<Map.Entry<String, Integer>> sortByElement = new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return o1.getKey().compareTo(o2.getKey());
}
};
Long timeStart = System.currentTimeMillis();
Pattern wordPattern = Pattern.compile("\\w+");
File rootDir = new File("/Users/arabung/temp/groovypptest-read-only/WordCount/20_newsgroups");
CountingSet counter = new CountingSet();
for (File groupDirectory : rootDir.listFiles()) {
if (groupDirectory.isDirectory()) {
for (File f : groupDirectory.listFiles()) {
if (f.isFile()) {
BufferedReader reader = new BufferedReader(new FileReader(f));
String line;
while ((line = reader.readLine()) != null) {
Matcher matcher = wordPattern.matcher(line);
while (matcher.find()) {
counter.add(matcher.group().toLowerCase());
}
}
reader.close();
}
}
}
}
Writer pw = new BufferedWriter(new PrintWriter("./counts-alphabetical-java.txt"));
sortAndDisplay(counter.entrySet(), sortByElement, pw);
pw.close();
pw = new BufferedWriter(new PrintWriter("./counts-decreasing-java.txt"));
sortAndDisplay(counter.entrySet(), sortByCount, pw);
pw.close();
System.out.println("Finished in " + (System.currentTimeMillis() - timeStart) + " ms");
}
}
private static void sortAndDisplay(Set<Map.Entry<String, Integer>> set, Comparator<Map.Entry<String, Integer>> comp, Writer writer) throws IOException {
List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(set);
Collections.sort(list, comp);
display(list, writer);
}
private static void display(Iterable<java.util.Map.Entry<String, Integer>> list, Writer writer) throws IOException {
for (Map.Entry<String, Integer> entry : list) {
writer.write(entry.getKey() + " : " + entry.getValue() + "\n");
}
}
private static class CountingSet extends LinkedHashMap<String, Integer> {
void add(String s) {
Integer i = get(s);
put(s, (i == null) ? Integer.valueOf(1) : Integer.valueOf(i + 1));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment