Skip to content

Instantly share code, notes, and snippets.

@smilingleo
Created October 19, 2015 07:13
Show Gist options
  • Save smilingleo/84ff3d7819411344c12b to your computer and use it in GitHub Desktop.
Save smilingleo/84ff3d7819411344c12b to your computer and use it in GitHub Desktop.
word counter implementation by different approaches.
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;
import java.util.Spliterator;
import java.util.function.Consumer;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class WordCounterTest {
static String fileName = "/Users/leo/Downloads/java7_upgrade_patch.txt";
private long begin;
@Before
public void setup(){
begin = System.currentTimeMillis();
}
@After
public void end() {
System.out.println("takes " + (System.currentTimeMillis() - begin) + " ms");
}
@Test
public void sequential() throws IOException {
int count = 0;
boolean lastSpace = true;
byte[] bytes = Files.readAllBytes(Paths.get(fileName));
for (int i=0; i<bytes.length; i++){
if (Character.isWhitespace(bytes[i])){
lastSpace = true;
} else {
// every time we met the first non-whitespace character, we add counter by 1.
if (lastSpace)
count ++;
lastSpace = false;
}
}
System.out.println("Count sequentially: " + count);
}
@Test
public void parallel_1() throws IOException {
// this is actually not good, since 'split' will sequentially go through the content completely.
long count = Files.lines(Paths.get(fileName))
.parallel()
// problemmatic if the paragragh is huge
.flatMap(line -> ((List<String>)Arrays.asList(line.split("\\s"))).stream())
.filter(s -> s != null && s.trim().length() > 0).count();
System.out.println("Count parallelly #1: " + count);
}
@Test
public void functional() throws IOException {
Stream<Character> stream = Files.lines(Paths.get(fileName))
// need to append a line-break to each line.
.flatMap(line -> IntStream.range(0, line.length() + 1).mapToObj(i -> i < line.length() ? line.charAt(i) : '\n'));
WordCounter result = stream.reduce(new WordCounter(0, true), WordCounter::accumulate, WordCounter::combine);
System.out.println("Count functionally: " + result.count);
}
private static class WordCounter {
private final int count;
private final boolean lastSpace;
public WordCounter(int count, boolean lastSpace) {
this.count = count;
this.lastSpace = lastSpace;
}
public WordCounter accumulate(Character c){
// System.out.println("Thread-" + Thread.currentThread().getId() + " is handling " + c);
if (Character.isWhitespace(c)){
return lastSpace ? this : new WordCounter(count, true);
} else {
return lastSpace ? new WordCounter(count + 1, false) : this;
}
}
public WordCounter combine(WordCounter counter){
return new WordCounter(count + counter.count, counter.lastSpace);
}
}
@Test
public void parallel_2() throws IOException {
Stream<Character> stream = Files.lines(Paths.get(fileName))
.flatMap(line -> StreamSupport.stream(new WordSpliterator(line + "\n"), true));
WordCounter result = stream.parallel()
.reduce(new WordCounter(0, true), WordCounter::accumulate, WordCounter::combine);
System.out.println("Count parallelly functionally: " + result.count);
}
private static class WordSpliterator implements Spliterator<Character> {
private String sentence;
private int currentPos = 0;
public WordSpliterator(String sentence){
this.sentence = sentence;
}
@Override
public boolean tryAdvance(Consumer<? super Character> action) {
// `action` here is the `WordCounter::accumulate`
action.accept(sentence.charAt(currentPos ++));
return currentPos < sentence.length();
}
@Override
public Spliterator<Character> trySplit() {
int currentSize = sentence.length() - currentPos;
if (currentSize < 10)
return null;
for (int pos = currentSize / 2 + currentPos; pos < sentence.length(); pos++){
if (Character.isWhitespace(sentence.charAt(pos))){
// current thread to handle right part, new thread to process the left part
WordSpliterator spliterator = new WordSpliterator(sentence.substring(currentPos, pos));
// so we have to move the pos to handle the right part.
currentPos = pos;
return spliterator;
}
}
return null;
}
@Override
public long estimateSize() {
return sentence.length() - currentPos;
}
@Override
public int characteristics() {
return ORDERED + SIZED + SUBSIZED + NONNULL + IMMUTABLE;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment