Skip to content

Instantly share code, notes, and snippets.

@richdougherty
Created February 14, 2013 07:56
Show Gist options
  • Save richdougherty/4951224 to your computer and use it in GitHub Desktop.
Save richdougherty/4951224 to your computer and use it in GitHub Desktop.
public Future<List<String>> crawl(final String url, final int maxDepth, final int currentDepth) {
if (maxDepth == currentDepth) {
// no need to call
return Future.succesful(Collections.emptyList());
} else {
// 1. get crawl the URL and get its URLs
Future<List<String>> foundUrlsFuture = future(new Callable<List<String>>() {
public List<String> call() throws Exception {
List<String> results = new ArrayList<String>();
// Simulate Web Crawler
System.out.println("Waiting for http://twitter.com/" + currentDepth + "/" + url);
Thread.sleep(500);
System.out.println("Got results for http://twitter.com/" + currentDepth + "/" + url);
String[] foundUrls = new String[] { "http://twitter.com/A/" + currentDepth + "/" + Math.random(),
"http://twitter.com/B/" + currentDepth + "/" + Math.random(),
"http://twitter.com/C/" + currentDepth + "/" + Math.random() };
// End Simulate Web Crawler
return Arrays.asList(foundUrls);
}
}, ec);
// 2. start crawling each URL and sequence the crawl results together
Future<Iterable<List<String>>> crawlsFuture = foundUrlsFuture.flatMap(new Mapper<List<String>, Future<Iterable<List<String>>>> {
public Future<Iterable<List<String>>> apply(final List<String> foundUrls) {
List<Future<List<String>>> crawlFutures = new ArrayList<Future<List<String>>>();
// Add the URLs we just found
crawlFutures.add(Future.successful(foundUrls));
// Now recursively add all child URLs
for (String u : foundUrls) {
results.add(u);
Future<List<String>> crawlFuture = crawl(u, maxDepth, currentDepth + 1);
crawlFutures.add(crawlFuture);
}
// Change our Iterable<Future<_>> into a Future<Iterable<_>>
return Future.sequence(crawlFutures);
}
}, ec);
// 3. take all the child crawl results and flatten them into a single list
Future<Iterable<List<String>>> crawlFuture = childCrawlsFuture.map(new Mapper<Iterable<List<String>>, List<String>> {
public List<String>> apply(final Iterable<List<String>> childCrawls) {
List<String> result = new ArrayList<String>();
for (String c : childCrawls) {
result.addAll(c);
}
return result;
}
}, ec);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment