Created
February 14, 2013 07:56
-
-
Save richdougherty/4951224 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public Future<List<String>> crawl(final String url, final int maxDepth, final int currentDepth) { | |
if (maxDepth == currentDepth) { | |
// no need to call | |
return Future.succesful(Collections.emptyList()); | |
} else { | |
// 1. get crawl the URL and get its URLs | |
Future<List<String>> foundUrlsFuture = future(new Callable<List<String>>() { | |
public List<String> call() throws Exception { | |
List<String> results = new ArrayList<String>(); | |
// Simulate Web Crawler | |
System.out.println("Waiting for http://twitter.com/" + currentDepth + "/" + url); | |
Thread.sleep(500); | |
System.out.println("Got results for http://twitter.com/" + currentDepth + "/" + url); | |
String[] foundUrls = new String[] { "http://twitter.com/A/" + currentDepth + "/" + Math.random(), | |
"http://twitter.com/B/" + currentDepth + "/" + Math.random(), | |
"http://twitter.com/C/" + currentDepth + "/" + Math.random() }; | |
// End Simulate Web Crawler | |
return Arrays.asList(foundUrls); | |
} | |
}, ec); | |
// 2. start crawling each URL and sequence the crawl results together | |
Future<Iterable<List<String>>> crawlsFuture = foundUrlsFuture.flatMap(new Mapper<List<String>, Future<Iterable<List<String>>>> { | |
public Future<Iterable<List<String>>> apply(final List<String> foundUrls) { | |
List<Future<List<String>>> crawlFutures = new ArrayList<Future<List<String>>>(); | |
// Add the URLs we just found | |
crawlFutures.add(Future.successful(foundUrls)); | |
// Now recursively add all child URLs | |
for (String u : foundUrls) { | |
results.add(u); | |
Future<List<String>> crawlFuture = crawl(u, maxDepth, currentDepth + 1); | |
crawlFutures.add(crawlFuture); | |
} | |
// Change our Iterable<Future<_>> into a Future<Iterable<_>> | |
return Future.sequence(crawlFutures); | |
} | |
}, ec); | |
// 3. take all the child crawl results and flatten them into a single list | |
Future<Iterable<List<String>>> crawlFuture = childCrawlsFuture.map(new Mapper<Iterable<List<String>>, List<String>> { | |
public List<String>> apply(final Iterable<List<String>> childCrawls) { | |
List<String> result = new ArrayList<String>(); | |
for (String c : childCrawls) { | |
result.addAll(c); | |
} | |
return result; | |
} | |
}, ec); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment