Skip to content

Instantly share code, notes, and snippets.

@mismatch
Last active March 4, 2018 16:07
Show Gist options
  • Save mismatch/9d93bbca801a676e1b418ea0ae1d10b2 to your computer and use it in GitHub Desktop.
Save mismatch/9d93bbca801a676e1b418ea0ae1d10b2 to your computer and use it in GitHub Desktop.
Calculate words frequencies in declarative way
import java.io.BufferedReader;
import java.io.Reader;
import java.util.Map;
import java.util.function.Function;
import java.util.regex.Pattern;
import static java.util.stream.Collectors.counting;
import static java.util.stream.Collectors.groupingBy;
public class WordsFrequencies {
private static final Pattern WORDS_REGEX = Pattern.compile("[^\\w]+");
private static final List<String> IGNORED_WORDS = Arrays.asList(
"a", "an", "the", "in", "on", "at", "is", "are", "am", "as", "of", "if", "so");
private final BufferedReader reader;
public WordsFrequencies(Reader reader) {
this.reader = reader instanceof BufferedReader
? (BufferedReader) reader
: new BufferedReader(reader);
}
public Map<String, Long> get() {
return reader.lines()
.map(String::toLowerCase)
.flatMap(WORDS_REGEX::splitAsStream)
.filter(w -> !IGNORED_WORDS.contains(w))
.collect(groupingBy(Function.identity(), counting()));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment