Last active
October 16, 2019 19:08
-
-
Save dfa1/2ac6407c1c6f0d5cad1adb4c1babbee7 to your computer and use it in GitHub Desktop.
Rewriting https://github.com/ChrisPenner/wc in Java11
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.nio.charset.StandardCharsets; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.Map.Entry; | |
import java.util.StringTokenizer; | |
import java.util.TreeMap; | |
import java.util.concurrent.ArrayBlockingQueue; | |
import java.util.concurrent.Callable; | |
import java.util.concurrent.Executors; | |
import java.util.concurrent.Future; | |
import java.util.regex.Pattern; | |
import java.util.stream.Collectors; | |
public class wc { | |
private static final int WORKERS = 2; | |
private static final int BATCH_SIZE = 50_000; | |
private static final int REPEAT = 1_000; | |
private final ArrayBlockingQueue<List<String>> queue = new ArrayBlockingQueue<>(10); | |
private final List<String> poison = List.of("poison"); | |
public static void main(String[] args) throws Exception { | |
for (int i = 1; i <= REPEAT; i++) { | |
long start = System.currentTimeMillis(); | |
var wc = new wc(); | |
wc.run(args[0]); | |
long end = System.currentTimeMillis(); | |
System.out.printf("run #%s completed in %sms%n", i, (end - start)); | |
} | |
} | |
public void run(String fileName) throws Exception { | |
var executor = Executors.newCachedThreadPool(); | |
var futures = new ArrayList<Future<Result>>(); | |
var producer = executor.submit(new Producer(fileName)); | |
futures.add(producer); | |
for (int i = 0; i < WORKERS; i++) { | |
var worker = executor.submit(new Worker()); | |
futures.add(worker); | |
} | |
List<Result> results = new ArrayList<>(); | |
for (var future : futures) { | |
Result result = future.get(); | |
results.add(result); | |
} | |
totals(fileName, results); | |
executor.shutdown(); | |
} | |
private void totals(String fileName, List<Result> results) { | |
long chars = results.stream().mapToLong(Result::chars).sum(); | |
long words = results.stream().mapToLong(Result::words).sum(); | |
long lines = results.stream().mapToLong(Result::lines).sum(); | |
System.out.printf("%s %s %s %s%n", lines, words, chars, fileName); | |
} | |
private class Result { | |
private long chars = 0; | |
private long words = 0; | |
private long lines = 0; | |
long chars() { | |
return chars; | |
} | |
long words() { | |
return words; | |
} | |
long lines() { | |
return lines; | |
} | |
void incrementChars(int chars) { | |
this.chars += chars; | |
} | |
void incrementWords(int words) { | |
this.words += words; | |
} | |
void incrementLines() { | |
lines++; | |
} | |
} | |
class Producer implements Callable<Result> { | |
private final String fileName; | |
Producer(String fileName) { | |
this.fileName = fileName; | |
} | |
@Override | |
public Result call() throws Exception { | |
try (var reader = Files.newBufferedReader(Path.of(fileName), StandardCharsets.US_ASCII)) { | |
var readLine = ""; | |
var lines = new ArrayList<String>(BATCH_SIZE); | |
var batchIndex = 0; | |
while ((readLine = reader.readLine()) != null) { | |
lines.add(readLine); | |
batchIndex++; | |
if (batchIndex == BATCH_SIZE) { | |
queue.put(lines); | |
lines = new ArrayList<>(BATCH_SIZE); | |
batchIndex = 0; | |
} | |
} | |
// last batch | |
queue.put(lines); | |
for (int i = 0; i < WORKERS; i++) { | |
queue.put(poison); | |
} | |
return new Result(); | |
} | |
} | |
} | |
class Worker implements Callable<Result> { | |
@Override | |
public Result call() throws Exception { | |
Result result = new Result(); | |
while (true) { | |
var lines = queue.take(); | |
if (lines == poison) { | |
break; | |
} | |
for (var line : lines) { | |
result.incrementLines(); | |
StringTokenizer tokenizer = new StringTokenizer(line); | |
result.incrementWords(tokenizer.countTokens()); | |
result.incrementChars(line.length() + 1); | |
} | |
} | |
return result; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment