Skip to content

Instantly share code, notes, and snippets.

@Bruno125
Created January 12, 2021 05:08
Show Gist options
  • Save Bruno125/6f99950dfaf0ea42f32820d845f0bd27 to your computer and use it in GitHub Desktop.
Save Bruno125/6f99950dfaf0ea42f32820d845f0bd27 to your computer and use it in GitHub Desktop.
/**
* // This is the HtmlParser's API interface.
* // You should not implement it, or speculate about its implementation
* interface HtmlParser {
* public List<String> getUrls(String url) {}
* }
*/
import java.util.LinkedList;
import java.util.LinkedHashSet;
import java.util.Queue;
import java.util.Set;
import java.net.URI;
import java.net.URISyntaxException;
class Solution {
public List<String> crawl(String startUrl, HtmlParser htmlParser) {
Queue<String> q = new LinkedList<>();
q.add(startUrl);
Set<String> visited = new LinkedHashSet<>();
ExecutorService executor;
while(q.size() > 0) {
Set<String> pendingUrls = new HashSet<>();
int n = q.size();
final CountDownLatch latch = new CountDownLatch(n);
executor = Executors.newFixedThreadPool(5);
while(q.size() > 0) {
String val = q.remove();
visited.add(val);
executor.submit(new Runnable() {
@Override
public void run() {
List<String> childs = htmlParser.getUrls(val);
pendingUrls.addAll(childs);
latch.countDown();
}
});
}
try {
latch.await();
} catch(Exception e) {
}
for(String pending : pendingUrls) {
if(!sameDomains(startUrl, pending)) continue;
if(!visited.contains(pending)) { // check same domain
q.add(pending);
}
}
executor.shutdown();
}
return new ArrayList<String>(visited);
}
private boolean sameDomains(String startUrl, String value) {
try {
return new URI(startUrl).getHost().equals(new URI(value).getHost());
} catch(URISyntaxException e) {
return false;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment