Skip to content

Instantly share code, notes, and snippets.

Created March 14, 2010 10:45
What would you like to do?
;; A simple demonstration of several aspects of Clojure:
;; Sequences, Java interop, Regular expressions
;; Presented at the March 10, 2010 Canberra Java User's Group meeting
;; (See for more info)
(ns demo
(:import ( FileReader BufferedReader)))
(defn canonical
"Returns a canonical version of word."
[word] (.toLowerCase word))
(defn word-seq
"Returns a lazy sequence of canonical words from string."
[string] (map canonical (re-seq #"\w+" string)))
(defn read-words
"Returns a lazy sequence of words from the given reader."
[reader] (mapcat word-seq (line-seq reader)))
(defn update
"Returns an updated tally map with incremented count for word."
[tally word]
(assoc tally word (inc (get tally word 0))))
(defn tally-words
"Returns a map of [word count] pairs from the given reader."
[reader] (reduce update {} (read-words reader)))
(with-open [reader (-> "don_quixote.txt" (FileReader.) (BufferedReader.))]
(doseq [[k,v] (take 10 (sort-by (comp - val) (tally-words reader)))]
(println k ": " v)))
package demo;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
* Simple demonstration of tallying word counts in Java
public class Tally {
static final Pattern WORD = Pattern.compile("\\w+");
* @param line The input to parse words from.
* @return A list of lower-case words found in line.
static List<String> words(String line) {
List<String> result = new ArrayList<String>();
Matcher matcher = WORD.matcher(line);
while(matcher.find()) {
String word = line.substring(matcher.start(), matcher.end());
return result;
* @param filename The name of the file to open, parse, tally and close.
* @return A map of word associated with the number of times they appeared.
* @throws IOException
static Map<String,Integer> tallyWords(BufferedReader reader) throws IOException {
Map<String,Integer> result = new HashMap<String,Integer>();
while(reader.ready()) {
String line = reader.readLine();
for(String word : words(line)) {
int count = 0;
if(result.containsKey(word)) {
count = result.get(word);
result.put(word, count+1);
return result;
* Used to sort Map.Entry elements in decreasing order of value.
static final class EntryComparator<K,V extends Comparable<V>> implements Comparator<Map.Entry<K,V>> {
public int compare(Entry<K, V> o1, Entry<K, V> o2) {
return - o1.getValue().compareTo(o2.getValue());
public static void main(String[] args) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader("don_quixote.txt"));
Map<String,Integer> tally = tallyWords(reader);
List<Map.Entry<String, Integer>> entries =
new ArrayList<Map.Entry<String,Integer>>(tally.entrySet());
Collections.sort(entries, new EntryComparator<String, Integer>());
int index = 0;
while(index < 10) {
Map.Entry<String, Integer> entry = entries.get(index++);
System.out.println(entry.getKey() + ": " + entry.getValue());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment