Skip to content

Instantly share code, notes, and snippets.

@InfoSec812
Created August 7, 2016 13:25
Show Gist options
  • Star 11 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save InfoSec812/f7b03ad627f6e194c793aa908febafdc to your computer and use it in GitHub Desktop.
Save InfoSec812/f7b03ad627f6e194c793aa908febafdc to your computer and use it in GitHub Desktop.
Use Java 8 parallel streams to read a file and tally word counts
package us.juggl.twentysixteen.august;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Instant;
import java.util.Arrays;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.LongAdder;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import static java.lang.Integer.*;
/**
* Created by dphillips on 8/6/16.
*/
public class SimplParallelStreamWordCounteList {
public static void main(String[] args) throws Exception {
System.out.println("\n\nParallel word count example using Old Testement King James bible");
textWordCount("kjvdat.txt");
}
/**
* Return the top 5 most frequently used words from the sample text.
* @throws Exception
*/
private static void textWordCount(String fileName) throws Exception {
long start = Instant.now().toEpochMilli();
ConcurrentHashMap<String, LongAdder> wordCounts = new ConcurrentHashMap<>();
System.out.println("\tReading file: "+fileName);
Path filePath = Paths.get(fileName);
Files.readAllLines(filePath)
.parallelStream() // Start streaming the lines
.map(line -> line.split("\\s+")) // Split line into individual words
.flatMap(Arrays::stream) // Convert stream of String[] to stream of String
.parallel() // Convert to parallel stream
.filter(w -> w.matches("\\w+")) // Filter out non-word items
.map(String::toLowerCase) // Convert to lower case
.forEach(word -> { // Use an AtomicAdder to tally word counts
if (!wordCounts.containsKey(word)) // If a hashmap entry for the word doesn't exist yet
wordCounts.put(word, new LongAdder()); // Create a new LongAdder
wordCounts.get(word).increment(); // Increment the LongAdder for each instance of a word
});
wordCounts
.keySet()
.stream()
.map(key -> String.format("%-10d %s", wordCounts.get(key).intValue(), key))
.sorted((prev, next) -> compare(parseInt(next.split("\\s+")[0]), parseInt(prev.split("\\s+")[0])))
.limit(5)
.forEach(t -> System.out.println("\t"+t));
long end = Instant.now().toEpochMilli();
System.out.println(String.format("\tCompleted in %d milliseconds", (end-start)));
}
}
@niksum
Copy link

niksum commented Jul 4, 2017

Great example thanks

@marwensaid
Copy link

very helpfull ! thx

@navczydev
Copy link

I think it's not optimized because File.lines method we cannot process in parallel way.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment