Skip to content

Instantly share code, notes, and snippets.

@dfa1
Last active October 16, 2019 19:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dfa1/2ac6407c1c6f0d5cad1adb4c1babbee7 to your computer and use it in GitHub Desktop.
Save dfa1/2ac6407c1c6f0d5cad1adb4c1babbee7 to your computer and use it in GitHub Desktop.
Rewriting https://github.com/ChrisPenner/wc in Java11
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
public class wc {
private static final int WORKERS = 2;
private static final int BATCH_SIZE = 50_000;
private static final int REPEAT = 1_000;
private final ArrayBlockingQueue<List<String>> queue = new ArrayBlockingQueue<>(10);
private final List<String> poison = List.of("poison");
public static void main(String[] args) throws Exception {
for (int i = 1; i <= REPEAT; i++) {
long start = System.currentTimeMillis();
var wc = new wc();
wc.run(args[0]);
long end = System.currentTimeMillis();
System.out.printf("run #%s completed in %sms%n", i, (end - start));
}
}
public void run(String fileName) throws Exception {
var executor = Executors.newCachedThreadPool();
var futures = new ArrayList<Future<Result>>();
var producer = executor.submit(new Producer(fileName));
futures.add(producer);
for (int i = 0; i < WORKERS; i++) {
var worker = executor.submit(new Worker());
futures.add(worker);
}
List<Result> results = new ArrayList<>();
for (var future : futures) {
Result result = future.get();
results.add(result);
}
totals(fileName, results);
executor.shutdown();
}
private void totals(String fileName, List<Result> results) {
long chars = results.stream().mapToLong(Result::chars).sum();
long words = results.stream().mapToLong(Result::words).sum();
long lines = results.stream().mapToLong(Result::lines).sum();
System.out.printf("%s %s %s %s%n", lines, words, chars, fileName);
}
private class Result {
private long chars = 0;
private long words = 0;
private long lines = 0;
long chars() {
return chars;
}
long words() {
return words;
}
long lines() {
return lines;
}
void incrementChars(int chars) {
this.chars += chars;
}
void incrementWords(int words) {
this.words += words;
}
void incrementLines() {
lines++;
}
}
class Producer implements Callable<Result> {
private final String fileName;
Producer(String fileName) {
this.fileName = fileName;
}
@Override
public Result call() throws Exception {
try (var reader = Files.newBufferedReader(Path.of(fileName), StandardCharsets.US_ASCII)) {
var readLine = "";
var lines = new ArrayList<String>(BATCH_SIZE);
var batchIndex = 0;
while ((readLine = reader.readLine()) != null) {
lines.add(readLine);
batchIndex++;
if (batchIndex == BATCH_SIZE) {
queue.put(lines);
lines = new ArrayList<>(BATCH_SIZE);
batchIndex = 0;
}
}
// last batch
queue.put(lines);
for (int i = 0; i < WORKERS; i++) {
queue.put(poison);
}
return new Result();
}
}
}
class Worker implements Callable<Result> {
@Override
public Result call() throws Exception {
Result result = new Result();
while (true) {
var lines = queue.take();
if (lines == poison) {
break;
}
for (var line : lines) {
result.incrementLines();
StringTokenizer tokenizer = new StringTokenizer(line);
result.incrementWords(tokenizer.countTokens());
result.incrementChars(line.length() + 1);
}
}
return result;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment