Skip to content

Instantly share code, notes, and snippets.

@netdance
Created January 11, 2014 23:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save netdance/8378542 to your computer and use it in GitHub Desktop.
Save netdance/8378542 to your computer and use it in GitHub Desktop.
Find Bacon Numbers with JDK 8.
package testbacon;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.LongSummaryStatistics;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class TestBacon {
private final int NUMRUNS = 10;
private final int RESTTIME = 2000;
// The following are somewhat realistic numbers:
/*
private final int NUMACTORS = 2000000;
private final int MAXNUMACTORSPERMOVIE = 30;
private final int MINNUMACTORSPERMOVIE = 10;
private final int NUMMOVIES = 300000;
private final int ONESIGMA = NUMACTORS / 10; // 10% of actors get 67% of work
*/
//
private final int NUMACTORS = 2000000;
private final int MAXNUMACTORSPERMOVIE = 10;
private final int MINNUMACTORSPERMOVIE = 2;
private final int NUMMOVIES = 300000;
private final int ONESIGMA = NUMACTORS / 10; // 10% of actors get 67% of work
private final int DEEPESTSEARCH = 10;
private final int MAXPRINT = 100;
// pick two moderately famous actors, in std distribution
private final int FIRSTACTOR = (NUMACTORS / 2) + (int) (ONESIGMA * 1.2);
private final int SECONDACTOR = (NUMACTORS / 2) + (int) (ONESIGMA * 1.3);
// Instead of using integers, you could generate real(ish) names, via the below method.
// But since you'd just want to convert them back to integers anyway to
// save memory, I don't bother
// List<String> nameList = Files.readAllLines(Paths.get("/usr/share/dict/propernames"));
public static void main(String[] args) throws Exception {
new TestBacon().run();
}
private TestBacon() throws Exception {
}
private void run() throws Exception {
// Change to Level.INFO to see lots of... info
Logger.getGlobal().setLevel(Level.OFF);
final Set<List<Integer>> movies = createDistMovies();
printSizeOfNestedCollection("movies", movies);
Map<Integer, Set<Integer>> actorRelations = transformGraphWithParallelStreams(movies);
printSizeOfNestedCollection("actorRelations", actorRelations.values());
System.out.println("Initial run Result: " + breadthFirst(actorRelations));
rest();
/* Uncomment to run a full suite of performance tests
// Test different create methods
benchmark("createMovies", this::createMovies);
benchmark("createDistMovies", this::createDistMovies);
benchmark("createParallelDistMovies", this::createParallelDistMovies);
// Test different parse methods, with constant data
benchmark("transformGraph constant data", () -> transformGraph(movies));
benchmark("transformGraphWithStreams constant data", () -> transformGraphWithStreams(movies));
benchmark("transformGraphWithParallelStreams constant data", () -> transformGraphWithParallelStreams(movies));
// Test different parse methods, with evenly distributed data
benchmark("transformGraph even data", this::createMovies, this::transformGraph);
benchmark("transformGraphWithStreams even data", this::createMovies, this::transformGraphWithStreams);
benchmark("transformGraphWithParallelStreams even data", this::createMovies, this::transformGraphWithParallelStreams);
// Test different parse methods, with variable distributed data
benchmark("transformGraph dist data", this::createDistMovies, this::transformGraph);
benchmark("transformGraphWithStreams dist data", this::createDistMovies, this::transformGraphWithStreams);
benchmark("transformGraphWithParallelStreams dist data", this::createDistMovies, this::transformGraphWithParallelStreams);
benchmark("breadthfirst static data", () -> breadthFirst(actorRelations));
System.out.println("Naive BreadthFirst: " + breadthFirst(actorRelations));
// Test breadthfirst with even distributed data
benchmark("breadthfirst even data", () -> {
Set<List<Integer>> m = createMovies();
return transformGraphWithParallelStreams(m);
}, this::breadthFirst, (result) -> result < 0);
*/
// Test breadthfirst with variable distributed data
benchmark("breadthfirst variable data", () -> {
Set<List<Integer>> m = createDistMovies();
return transformGraphWithParallelStreams(m);
}, this::breadthFirst, (result) -> result < 0);
}
// Create a set, the values of which is a list of randomly selected ints
private Set<List<Integer>> createMovies() {
long start = System.currentTimeMillis();
Random random = new Random(System.currentTimeMillis());
int spread = MAXNUMACTORSPERMOVIE - MINNUMACTORSPERMOVIE + 1;
Set<List<Integer>> movies = Collections.unmodifiableSet(
Stream.generate(
// make a stream of ramdom numbers, arranged evenly between 1-NUMACTORS
() -> random.ints(1, NUMACTORS + 1)
.distinct()
// the stream should be MINNUMACTORSPERMOVIE - MAXNUMACTORSPERMOVIE in length
.limit(random.nextInt(spread) + MINNUMACTORSPERMOVIE)
// turn the stream into a List of Integers
.boxed().collect(Collectors.toList()))
// only create NUMMOVIES Lists
.limit(NUMMOVIES)
// put those lists in a set
.collect(Collectors.toSet())
);
Logger.getGlobal().info(() -> "Creating dist movies time: " + (System.currentTimeMillis() - start));
return movies;
}
private Set<List<Integer>> createDistMovies() {
final long start = System.currentTimeMillis();
Random random = new Random(System.currentTimeMillis());
long midpoint = NUMACTORS / 2;
int spread = MAXNUMACTORSPERMOVIE - MINNUMACTORSPERMOVIE + 1;
Set<List<Integer>> movies = Collections.unmodifiableSet(
Stream.generate(
// make a stream of ramdom numbers, distributed in a curve, between 1-NUMACTORS
() -> Stream.generate(() -> Math.round(random.nextGaussian() * ONESIGMA + midpoint))
.map((l) -> l.intValue())
// remove any unsuitable values, both dups and out of range
.filter((i) -> i > 0 && i <= NUMACTORS).distinct()
// the stream should be MINNUMACTORSPERMOVIE - MAXNUMACTORSPERMOVIE in length
.limit(random.nextInt(spread) + MINNUMACTORSPERMOVIE)
// turn the stream into a List of Integers
.collect(Collectors.toList()))
// only create NUMMOVIES Lists
.limit(NUMMOVIES)
// put those lists in a set
.collect(Collectors.toSet())
);
/* Wondering what this distribution will look like? Try this:
Random random = new Random();
Map<Long,Long> m = Stream.generate(() ->
(Math.abs(Math.round(random.nextGaussian() * 60 + 100))))
.filter((l)-> l <= 200 && l > 0)
.limit(1000)//.peek((x) -> Logger.getGlobal().info(x.toString()))
.collect(Collectors.groupingBy(Long::longValue,Collectors.counting()));
m.keySet().stream().forEach((l)->System.out.println(l + ": " + m.get(l))); */
Logger.getGlobal().info(() -> "Creating movies time: " + (System.currentTimeMillis() - start));
return movies;
}
private Set<List<Integer>> createParallelDistMovies() {
long start = System.currentTimeMillis();
Random random = new Random(System.currentTimeMillis());
long midpoint = NUMACTORS / 2;
int spread = MAXNUMACTORSPERMOVIE - MINNUMACTORSPERMOVIE;
Set<List<Integer>> movies = Collections.unmodifiableSet(
Stream.generate(
// make a stream of ramdom numbers, distributed in a curve, between 1-NUMACTORS
() -> Stream.generate(() -> {
return Math.abs(Math.round(
random.nextGaussian() * ONESIGMA + midpoint));
})
.map((l) -> l.intValue())
// remove any unsuitable values, both dups and out of range
.filter((i) -> i > 0 && i <= NUMACTORS).distinct()
// the stream should be MINNUMACTORSPERMOVIE - MAXNUMACTORSPERMOVIE in length
.limit(random.nextInt(spread) + MINNUMACTORSPERMOVIE)
// turn the stream into a List of Integers
.collect(Collectors.toList())).unordered().parallel()
// only create NUMMOVIES Lists
.limit(NUMMOVIES)
// put those lists in a set
.collect(Collectors.toSet())
);
Logger.getGlobal().info(() -> "Creating parallel movies time: " + (System.currentTimeMillis() - start));
return movies;
}
// Given a map with sparse integer key, walk backward from starting point to find a valid key
private <T> int findValidActor(Map<Integer, T> map, int start) {
int ret = start;
while (!map.containsKey(ret) && ret > 0) {
ret--;
}
if (ret <= 0) {
throw new RuntimeException("Couldn't find a valid actor");
}
return ret;
}
private void printMovieActors(Set<List<Integer>> set) {
StringBuilder sb = new StringBuilder("Movies: ");
set.stream().limit(MAXPRINT).forEach((l) -> {
sb.append("\nMovie: \n");
l.stream().forEach((s) -> sb.append(s).append(", "));
});
System.out.println(sb.toString());
}
private void printActorRelations(Map<Integer, Set<Integer>> map) {
StringBuilder sb = new StringBuilder("Actors :");
map.keySet().stream().limit(MAXPRINT).forEach((s) -> {
sb.append("\nActor ").append(s).append('\n');
map.get(s).stream().forEach((ss) -> sb.append(ss).append(','));
});
System.out.println(sb.toString());
}
private void printSizeOfNestedCollection(String name, Collection<? extends Collection> coll) {
long start = System.currentTimeMillis();
LongSummaryStatistics stats = coll.parallelStream().unordered().mapToLong((l) -> (long) l.size()).summaryStatistics();
Logger.getGlobal().info(() -> "sizeOfNestedCollection time: " + (System.currentTimeMillis() - start));
printStats(name + " struct stats", "size", stats);
}
private Map<Integer, Set<Integer>> transformGraph(Set<List<Integer>> set) {
long start = System.currentTimeMillis();
Map<Integer, Set<Integer>> actorRelations = new HashMap<>();
// get all the list of actors for each movie
for (List<Integer> movie : set) {
// for each actor in a movie
for (Integer actor : movie) {
// get the list of actors
Set<Integer> coactors = new HashSet(movie);
// remove self
coactors.remove(actor);
// add to the list of relations
actorRelations.merge(actor, coactors,
(existing, update) -> {
existing.addAll(update);
return existing;
});
}
}
Logger.getGlobal().info(() -> "transformGraph time: " + (System.currentTimeMillis() - start));
return Collections.unmodifiableMap(actorRelations);
}
private Map<Integer, Set<Integer>> transformGraphWithStreams(Set<List<Integer>> set) {
long start = System.currentTimeMillis();
Map<Integer, Set<Integer>> actorRelations = new HashMap<>();
set.stream().forEach((movie) -> {
movie.stream().forEach((actor) -> {
Set<Integer> coactors = new HashSet(movie);
coactors.remove(actor);
actorRelations.merge(actor, coactors,
(existing, update) -> {
existing.addAll(update);
return existing;
});
});
});
Logger.getGlobal().info(() -> "transformGraphWithStreams time: " + (System.currentTimeMillis() - start));
return Collections.unmodifiableMap(actorRelations);
}
private Map<Integer, Set<Integer>> transformGraphWithParallelStreams(Set<List<Integer>> set) {
long start = System.currentTimeMillis();
Map<Integer, Set<Integer>> actorRelations = new ConcurrentHashMap<>();
set.parallelStream().unordered().forEach((movie) -> {
movie.stream().forEach((actor) -> {
Set<Integer> coactors = new HashSet(movie);
coactors.remove(actor);
actorRelations.merge(actor, coactors,
(existing, update) -> {
existing.addAll(update);
return existing;
});
});
});
Logger.getGlobal().info(() -> "transformGraphWithParallelStreams time: " + (System.currentTimeMillis() - start));
return Collections.unmodifiableMap(actorRelations);
}
private Map<Integer, Set<Integer>> precomputeSecondDegree(Map<Integer, Set<Integer>> map) {
Instant start = Instant.now();
Map<Integer, Set<Integer>> ret = map.keySet().parallelStream().unordered().collect(
Collectors.toMap(Function.identity(),
(k) -> {
return (Set<Integer>) map.get(k).stream().flatMap((i) -> map.get(i).stream()).collect(Collectors.toSet());
}
));
Instant end = Instant.now();
Logger.getGlobal().info(() -> "precomputeSecondDegree time: " + Duration.between(start, end));
return ret;
}
private int breadthFirst(final Map<Integer, Set<Integer>> actors) {
Integer firstactor = findValidActor(actors, FIRSTACTOR);
Integer secondactor = findValidActor(actors, SECONDACTOR);
Logger.getGlobal().info(() -> "finding connection between actor #" + firstactor + " and actor #" + secondactor);
long start = System.currentTimeMillis();
Set<Integer> visited = new HashSet<>();
Integer result = null;
Set<Integer> degreelist = actors.get(firstactor);
if (degreelist == null || actors.get(secondactor) == null) { //short circuit
result = -1;
} else {
int degree = 1;
while (result == null) {
if (degreelist.contains(secondactor)) {
result = degree;
break;
}
degree++;
if (degree > DEEPESTSEARCH) {
result = DEEPESTSEARCH;
break;
}
visited.addAll(degreelist);
/* You could do it this way...
Set<Integer> nextDegreelist = new HashSet<>();
degreelist.stream().forEach((i) -> {
nextDegreelist.addAll(actors.get(i));
});
*/
// But this way is nicer, and uses parallel processing
Set<Integer> nextDegreelist = degreelist.parallelStream().unordered()
.flatMap((i) -> actors.get(i).stream())
.collect(Collectors.toSet());
nextDegreelist.removeAll(visited);
if (nextDegreelist.isEmpty()) {
result = -1;
break;
}
degreelist = nextDegreelist;
}
}
Logger.getGlobal().info(() -> "naiveBreadthFirst time: " + (System.currentTimeMillis() - start));
final int res = result;
Logger.getGlobal().info(() -> "degree of separation found: " + res);
return result;
}
private <T, U> void benchmark(String name,
Supplier<T> source,
Function<T, U> sink,
Predicate<U> ignoreresultif) throws Exception {
// first run, warmup
sink.apply(source.get());
List<Long> timings = new ArrayList(NUMRUNS);
List<U> results = new ArrayList(NUMRUNS);
U result = null;
Instant start, end;
for (int i = 0; i < NUMRUNS; i++) {
do {
T t = source.get();
rest();
start = Instant.now();
result = sink.apply(t);
end = Instant.now();
//log result, if appropriate
final U res = result;
Logger.getGlobal().info(() -> name + " incremental result: " + res);
Instant s = start;
Instant e = end;
Logger.getGlobal().info(() -> name + " incremental time: "
+ (Duration.between(s, e).toMillis()));
} while (ignoreresultif.test(result));
timings.add(Duration.between(start, end).toMillis());
results.add(result);
}
if (result instanceof Integer) {
printStats(name, "results", results.stream().mapToLong((num) -> ((Integer) num).intValue()).summaryStatistics());
}
printStats(name, "run time", timings.stream().mapToLong((l) -> (long) l).summaryStatistics());
}
private <T, S> void benchmark(String name, Supplier<T> source, Function<T, S> sink) throws Exception {
// first run, warmup
sink.apply(source.get());
List<Long> timings = new ArrayList(NUMRUNS);
for (int i = 0; i < NUMRUNS; i++) {
T t = source.get();
rest();
Instant start = Instant.now();
sink.apply(t);
Instant end = Instant.now();
timings.add(Duration.between(start, end).toMillis());
Logger.getGlobal().info(() -> name + " incremental time: " + (Duration.between(start, end).toMillis()));
}
printStats(name, "run time", timings.stream().mapToLong((l) -> (long) l).summaryStatistics());
}
private void benchmark(String name, Supplier supplier) throws Exception {
// first run, warmup
supplier.get();
List<Long> timings = new ArrayList(NUMRUNS);
for (int i = 0; i < NUMRUNS; i++) {
rest();
Instant start = Instant.now();
supplier.get();
Instant end = Instant.now();
timings.add(Duration.between(start, end).toMillis());
Logger.getGlobal().info(() -> name + " incremental time: " + (Duration.between(start, end).toMillis()));
}
printStats(name, "run time", timings.stream().mapToLong((l) -> (long) l).summaryStatistics());
}
private void printStats(String name, String append, LongSummaryStatistics stats) {
System.out.println(name + " Num Runs: " + stats.getCount());
System.out.println(name + " Sum " + append + ": " + stats.getSum());
System.out.println(name + " Average " + append + ": " + stats.getAverage());
System.out.println(name + " Min " + append + ": " + stats.getMin());
System.out.println(name + " Max " + append + ": " + stats.getMax());
}
private void rest() throws Exception {
System.gc();
Thread.sleep(RESTTIME);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment