Created
January 11, 2014 23:50
-
-
Save netdance/8378542 to your computer and use it in GitHub Desktop.
Find Bacon Numbers with JDK 8.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package testbacon; | |
import java.time.Duration; | |
import java.time.Instant; | |
import java.util.ArrayList; | |
import java.util.Collection; | |
import java.util.Collections; | |
import java.util.HashMap; | |
import java.util.HashSet; | |
import java.util.List; | |
import java.util.LongSummaryStatistics; | |
import java.util.Map; | |
import java.util.Random; | |
import java.util.Set; | |
import java.util.concurrent.ConcurrentHashMap; | |
import java.util.function.Function; | |
import java.util.function.Predicate; | |
import java.util.function.Supplier; | |
import java.util.logging.Level; | |
import java.util.logging.Logger; | |
import java.util.stream.Collectors; | |
import java.util.stream.Stream; | |
public class TestBacon { | |
private final int NUMRUNS = 10; | |
private final int RESTTIME = 2000; | |
// The following are somewhat realistic numbers: | |
/* | |
private final int NUMACTORS = 2000000; | |
private final int MAXNUMACTORSPERMOVIE = 30; | |
private final int MINNUMACTORSPERMOVIE = 10; | |
private final int NUMMOVIES = 300000; | |
private final int ONESIGMA = NUMACTORS / 10; // 10% of actors get 67% of work | |
*/ | |
// | |
private final int NUMACTORS = 2000000; | |
private final int MAXNUMACTORSPERMOVIE = 10; | |
private final int MINNUMACTORSPERMOVIE = 2; | |
private final int NUMMOVIES = 300000; | |
private final int ONESIGMA = NUMACTORS / 10; // 10% of actors get 67% of work | |
private final int DEEPESTSEARCH = 10; | |
private final int MAXPRINT = 100; | |
// pick two moderately famous actors, in std distribution | |
private final int FIRSTACTOR = (NUMACTORS / 2) + (int) (ONESIGMA * 1.2); | |
private final int SECONDACTOR = (NUMACTORS / 2) + (int) (ONESIGMA * 1.3); | |
// Instead of using integers, you could generate real(ish) names, via the below method. | |
// But since you'd just want to convert them back to integers anyway to | |
// save memory, I don't bother | |
// List<String> nameList = Files.readAllLines(Paths.get("/usr/share/dict/propernames")); | |
public static void main(String[] args) throws Exception { | |
new TestBacon().run(); | |
} | |
private TestBacon() throws Exception { | |
} | |
private void run() throws Exception { | |
// Change to Level.INFO to see lots of... info | |
Logger.getGlobal().setLevel(Level.OFF); | |
final Set<List<Integer>> movies = createDistMovies(); | |
printSizeOfNestedCollection("movies", movies); | |
Map<Integer, Set<Integer>> actorRelations = transformGraphWithParallelStreams(movies); | |
printSizeOfNestedCollection("actorRelations", actorRelations.values()); | |
System.out.println("Initial run Result: " + breadthFirst(actorRelations)); | |
rest(); | |
/* Uncomment to run a full suite of performance tests | |
// Test different create methods | |
benchmark("createMovies", this::createMovies); | |
benchmark("createDistMovies", this::createDistMovies); | |
benchmark("createParallelDistMovies", this::createParallelDistMovies); | |
// Test different parse methods, with constant data | |
benchmark("transformGraph constant data", () -> transformGraph(movies)); | |
benchmark("transformGraphWithStreams constant data", () -> transformGraphWithStreams(movies)); | |
benchmark("transformGraphWithParallelStreams constant data", () -> transformGraphWithParallelStreams(movies)); | |
// Test different parse methods, with evenly distributed data | |
benchmark("transformGraph even data", this::createMovies, this::transformGraph); | |
benchmark("transformGraphWithStreams even data", this::createMovies, this::transformGraphWithStreams); | |
benchmark("transformGraphWithParallelStreams even data", this::createMovies, this::transformGraphWithParallelStreams); | |
// Test different parse methods, with variable distributed data | |
benchmark("transformGraph dist data", this::createDistMovies, this::transformGraph); | |
benchmark("transformGraphWithStreams dist data", this::createDistMovies, this::transformGraphWithStreams); | |
benchmark("transformGraphWithParallelStreams dist data", this::createDistMovies, this::transformGraphWithParallelStreams); | |
benchmark("breadthfirst static data", () -> breadthFirst(actorRelations)); | |
System.out.println("Naive BreadthFirst: " + breadthFirst(actorRelations)); | |
// Test breadthfirst with even distributed data | |
benchmark("breadthfirst even data", () -> { | |
Set<List<Integer>> m = createMovies(); | |
return transformGraphWithParallelStreams(m); | |
}, this::breadthFirst, (result) -> result < 0); | |
*/ | |
// Test breadthfirst with variable distributed data | |
benchmark("breadthfirst variable data", () -> { | |
Set<List<Integer>> m = createDistMovies(); | |
return transformGraphWithParallelStreams(m); | |
}, this::breadthFirst, (result) -> result < 0); | |
} | |
// Create a set, the values of which is a list of randomly selected ints | |
private Set<List<Integer>> createMovies() { | |
long start = System.currentTimeMillis(); | |
Random random = new Random(System.currentTimeMillis()); | |
int spread = MAXNUMACTORSPERMOVIE - MINNUMACTORSPERMOVIE + 1; | |
Set<List<Integer>> movies = Collections.unmodifiableSet( | |
Stream.generate( | |
// make a stream of ramdom numbers, arranged evenly between 1-NUMACTORS | |
() -> random.ints(1, NUMACTORS + 1) | |
.distinct() | |
// the stream should be MINNUMACTORSPERMOVIE - MAXNUMACTORSPERMOVIE in length | |
.limit(random.nextInt(spread) + MINNUMACTORSPERMOVIE) | |
// turn the stream into a List of Integers | |
.boxed().collect(Collectors.toList())) | |
// only create NUMMOVIES Lists | |
.limit(NUMMOVIES) | |
// put those lists in a set | |
.collect(Collectors.toSet()) | |
); | |
Logger.getGlobal().info(() -> "Creating dist movies time: " + (System.currentTimeMillis() - start)); | |
return movies; | |
} | |
private Set<List<Integer>> createDistMovies() { | |
final long start = System.currentTimeMillis(); | |
Random random = new Random(System.currentTimeMillis()); | |
long midpoint = NUMACTORS / 2; | |
int spread = MAXNUMACTORSPERMOVIE - MINNUMACTORSPERMOVIE + 1; | |
Set<List<Integer>> movies = Collections.unmodifiableSet( | |
Stream.generate( | |
// make a stream of ramdom numbers, distributed in a curve, between 1-NUMACTORS | |
() -> Stream.generate(() -> Math.round(random.nextGaussian() * ONESIGMA + midpoint)) | |
.map((l) -> l.intValue()) | |
// remove any unsuitable values, both dups and out of range | |
.filter((i) -> i > 0 && i <= NUMACTORS).distinct() | |
// the stream should be MINNUMACTORSPERMOVIE - MAXNUMACTORSPERMOVIE in length | |
.limit(random.nextInt(spread) + MINNUMACTORSPERMOVIE) | |
// turn the stream into a List of Integers | |
.collect(Collectors.toList())) | |
// only create NUMMOVIES Lists | |
.limit(NUMMOVIES) | |
// put those lists in a set | |
.collect(Collectors.toSet()) | |
); | |
/* Wondering what this distribution will look like? Try this: | |
Random random = new Random(); | |
Map<Long,Long> m = Stream.generate(() -> | |
(Math.abs(Math.round(random.nextGaussian() * 60 + 100)))) | |
.filter((l)-> l <= 200 && l > 0) | |
.limit(1000)//.peek((x) -> Logger.getGlobal().info(x.toString())) | |
.collect(Collectors.groupingBy(Long::longValue,Collectors.counting())); | |
m.keySet().stream().forEach((l)->System.out.println(l + ": " + m.get(l))); */ | |
Logger.getGlobal().info(() -> "Creating movies time: " + (System.currentTimeMillis() - start)); | |
return movies; | |
} | |
private Set<List<Integer>> createParallelDistMovies() { | |
long start = System.currentTimeMillis(); | |
Random random = new Random(System.currentTimeMillis()); | |
long midpoint = NUMACTORS / 2; | |
int spread = MAXNUMACTORSPERMOVIE - MINNUMACTORSPERMOVIE; | |
Set<List<Integer>> movies = Collections.unmodifiableSet( | |
Stream.generate( | |
// make a stream of ramdom numbers, distributed in a curve, between 1-NUMACTORS | |
() -> Stream.generate(() -> { | |
return Math.abs(Math.round( | |
random.nextGaussian() * ONESIGMA + midpoint)); | |
}) | |
.map((l) -> l.intValue()) | |
// remove any unsuitable values, both dups and out of range | |
.filter((i) -> i > 0 && i <= NUMACTORS).distinct() | |
// the stream should be MINNUMACTORSPERMOVIE - MAXNUMACTORSPERMOVIE in length | |
.limit(random.nextInt(spread) + MINNUMACTORSPERMOVIE) | |
// turn the stream into a List of Integers | |
.collect(Collectors.toList())).unordered().parallel() | |
// only create NUMMOVIES Lists | |
.limit(NUMMOVIES) | |
// put those lists in a set | |
.collect(Collectors.toSet()) | |
); | |
Logger.getGlobal().info(() -> "Creating parallel movies time: " + (System.currentTimeMillis() - start)); | |
return movies; | |
} | |
// Given a map with sparse integer key, walk backward from starting point to find a valid key | |
private <T> int findValidActor(Map<Integer, T> map, int start) { | |
int ret = start; | |
while (!map.containsKey(ret) && ret > 0) { | |
ret--; | |
} | |
if (ret <= 0) { | |
throw new RuntimeException("Couldn't find a valid actor"); | |
} | |
return ret; | |
} | |
private void printMovieActors(Set<List<Integer>> set) { | |
StringBuilder sb = new StringBuilder("Movies: "); | |
set.stream().limit(MAXPRINT).forEach((l) -> { | |
sb.append("\nMovie: \n"); | |
l.stream().forEach((s) -> sb.append(s).append(", ")); | |
}); | |
System.out.println(sb.toString()); | |
} | |
private void printActorRelations(Map<Integer, Set<Integer>> map) { | |
StringBuilder sb = new StringBuilder("Actors :"); | |
map.keySet().stream().limit(MAXPRINT).forEach((s) -> { | |
sb.append("\nActor ").append(s).append('\n'); | |
map.get(s).stream().forEach((ss) -> sb.append(ss).append(',')); | |
}); | |
System.out.println(sb.toString()); | |
} | |
private void printSizeOfNestedCollection(String name, Collection<? extends Collection> coll) { | |
long start = System.currentTimeMillis(); | |
LongSummaryStatistics stats = coll.parallelStream().unordered().mapToLong((l) -> (long) l.size()).summaryStatistics(); | |
Logger.getGlobal().info(() -> "sizeOfNestedCollection time: " + (System.currentTimeMillis() - start)); | |
printStats(name + " struct stats", "size", stats); | |
} | |
private Map<Integer, Set<Integer>> transformGraph(Set<List<Integer>> set) { | |
long start = System.currentTimeMillis(); | |
Map<Integer, Set<Integer>> actorRelations = new HashMap<>(); | |
// get all the list of actors for each movie | |
for (List<Integer> movie : set) { | |
// for each actor in a movie | |
for (Integer actor : movie) { | |
// get the list of actors | |
Set<Integer> coactors = new HashSet(movie); | |
// remove self | |
coactors.remove(actor); | |
// add to the list of relations | |
actorRelations.merge(actor, coactors, | |
(existing, update) -> { | |
existing.addAll(update); | |
return existing; | |
}); | |
} | |
} | |
Logger.getGlobal().info(() -> "transformGraph time: " + (System.currentTimeMillis() - start)); | |
return Collections.unmodifiableMap(actorRelations); | |
} | |
private Map<Integer, Set<Integer>> transformGraphWithStreams(Set<List<Integer>> set) { | |
long start = System.currentTimeMillis(); | |
Map<Integer, Set<Integer>> actorRelations = new HashMap<>(); | |
set.stream().forEach((movie) -> { | |
movie.stream().forEach((actor) -> { | |
Set<Integer> coactors = new HashSet(movie); | |
coactors.remove(actor); | |
actorRelations.merge(actor, coactors, | |
(existing, update) -> { | |
existing.addAll(update); | |
return existing; | |
}); | |
}); | |
}); | |
Logger.getGlobal().info(() -> "transformGraphWithStreams time: " + (System.currentTimeMillis() - start)); | |
return Collections.unmodifiableMap(actorRelations); | |
} | |
private Map<Integer, Set<Integer>> transformGraphWithParallelStreams(Set<List<Integer>> set) { | |
long start = System.currentTimeMillis(); | |
Map<Integer, Set<Integer>> actorRelations = new ConcurrentHashMap<>(); | |
set.parallelStream().unordered().forEach((movie) -> { | |
movie.stream().forEach((actor) -> { | |
Set<Integer> coactors = new HashSet(movie); | |
coactors.remove(actor); | |
actorRelations.merge(actor, coactors, | |
(existing, update) -> { | |
existing.addAll(update); | |
return existing; | |
}); | |
}); | |
}); | |
Logger.getGlobal().info(() -> "transformGraphWithParallelStreams time: " + (System.currentTimeMillis() - start)); | |
return Collections.unmodifiableMap(actorRelations); | |
} | |
private Map<Integer, Set<Integer>> precomputeSecondDegree(Map<Integer, Set<Integer>> map) { | |
Instant start = Instant.now(); | |
Map<Integer, Set<Integer>> ret = map.keySet().parallelStream().unordered().collect( | |
Collectors.toMap(Function.identity(), | |
(k) -> { | |
return (Set<Integer>) map.get(k).stream().flatMap((i) -> map.get(i).stream()).collect(Collectors.toSet()); | |
} | |
)); | |
Instant end = Instant.now(); | |
Logger.getGlobal().info(() -> "precomputeSecondDegree time: " + Duration.between(start, end)); | |
return ret; | |
} | |
private int breadthFirst(final Map<Integer, Set<Integer>> actors) { | |
Integer firstactor = findValidActor(actors, FIRSTACTOR); | |
Integer secondactor = findValidActor(actors, SECONDACTOR); | |
Logger.getGlobal().info(() -> "finding connection between actor #" + firstactor + " and actor #" + secondactor); | |
long start = System.currentTimeMillis(); | |
Set<Integer> visited = new HashSet<>(); | |
Integer result = null; | |
Set<Integer> degreelist = actors.get(firstactor); | |
if (degreelist == null || actors.get(secondactor) == null) { //short circuit | |
result = -1; | |
} else { | |
int degree = 1; | |
while (result == null) { | |
if (degreelist.contains(secondactor)) { | |
result = degree; | |
break; | |
} | |
degree++; | |
if (degree > DEEPESTSEARCH) { | |
result = DEEPESTSEARCH; | |
break; | |
} | |
visited.addAll(degreelist); | |
/* You could do it this way... | |
Set<Integer> nextDegreelist = new HashSet<>(); | |
degreelist.stream().forEach((i) -> { | |
nextDegreelist.addAll(actors.get(i)); | |
}); | |
*/ | |
// But this way is nicer, and uses parallel processing | |
Set<Integer> nextDegreelist = degreelist.parallelStream().unordered() | |
.flatMap((i) -> actors.get(i).stream()) | |
.collect(Collectors.toSet()); | |
nextDegreelist.removeAll(visited); | |
if (nextDegreelist.isEmpty()) { | |
result = -1; | |
break; | |
} | |
degreelist = nextDegreelist; | |
} | |
} | |
Logger.getGlobal().info(() -> "naiveBreadthFirst time: " + (System.currentTimeMillis() - start)); | |
final int res = result; | |
Logger.getGlobal().info(() -> "degree of separation found: " + res); | |
return result; | |
} | |
private <T, U> void benchmark(String name, | |
Supplier<T> source, | |
Function<T, U> sink, | |
Predicate<U> ignoreresultif) throws Exception { | |
// first run, warmup | |
sink.apply(source.get()); | |
List<Long> timings = new ArrayList(NUMRUNS); | |
List<U> results = new ArrayList(NUMRUNS); | |
U result = null; | |
Instant start, end; | |
for (int i = 0; i < NUMRUNS; i++) { | |
do { | |
T t = source.get(); | |
rest(); | |
start = Instant.now(); | |
result = sink.apply(t); | |
end = Instant.now(); | |
//log result, if appropriate | |
final U res = result; | |
Logger.getGlobal().info(() -> name + " incremental result: " + res); | |
Instant s = start; | |
Instant e = end; | |
Logger.getGlobal().info(() -> name + " incremental time: " | |
+ (Duration.between(s, e).toMillis())); | |
} while (ignoreresultif.test(result)); | |
timings.add(Duration.between(start, end).toMillis()); | |
results.add(result); | |
} | |
if (result instanceof Integer) { | |
printStats(name, "results", results.stream().mapToLong((num) -> ((Integer) num).intValue()).summaryStatistics()); | |
} | |
printStats(name, "run time", timings.stream().mapToLong((l) -> (long) l).summaryStatistics()); | |
} | |
private <T, S> void benchmark(String name, Supplier<T> source, Function<T, S> sink) throws Exception { | |
// first run, warmup | |
sink.apply(source.get()); | |
List<Long> timings = new ArrayList(NUMRUNS); | |
for (int i = 0; i < NUMRUNS; i++) { | |
T t = source.get(); | |
rest(); | |
Instant start = Instant.now(); | |
sink.apply(t); | |
Instant end = Instant.now(); | |
timings.add(Duration.between(start, end).toMillis()); | |
Logger.getGlobal().info(() -> name + " incremental time: " + (Duration.between(start, end).toMillis())); | |
} | |
printStats(name, "run time", timings.stream().mapToLong((l) -> (long) l).summaryStatistics()); | |
} | |
private void benchmark(String name, Supplier supplier) throws Exception { | |
// first run, warmup | |
supplier.get(); | |
List<Long> timings = new ArrayList(NUMRUNS); | |
for (int i = 0; i < NUMRUNS; i++) { | |
rest(); | |
Instant start = Instant.now(); | |
supplier.get(); | |
Instant end = Instant.now(); | |
timings.add(Duration.between(start, end).toMillis()); | |
Logger.getGlobal().info(() -> name + " incremental time: " + (Duration.between(start, end).toMillis())); | |
} | |
printStats(name, "run time", timings.stream().mapToLong((l) -> (long) l).summaryStatistics()); | |
} | |
private void printStats(String name, String append, LongSummaryStatistics stats) { | |
System.out.println(name + " Num Runs: " + stats.getCount()); | |
System.out.println(name + " Sum " + append + ": " + stats.getSum()); | |
System.out.println(name + " Average " + append + ": " + stats.getAverage()); | |
System.out.println(name + " Min " + append + ": " + stats.getMin()); | |
System.out.println(name + " Max " + append + ": " + stats.getMax()); | |
} | |
private void rest() throws Exception { | |
System.gc(); | |
Thread.sleep(RESTTIME); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment