Created
January 12, 2021 05:08
-
-
Save Bruno125/6f99950dfaf0ea42f32820d845f0bd27 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* // This is the HtmlParser's API interface. | |
* // You should not implement it, or speculate about its implementation | |
* interface HtmlParser { | |
* public List<String> getUrls(String url) {} | |
* } | |
*/ | |
import java.util.LinkedList; | |
import java.util.LinkedHashSet; | |
import java.util.Queue; | |
import java.util.Set; | |
import java.net.URI; | |
import java.net.URISyntaxException; | |
class Solution { | |
public List<String> crawl(String startUrl, HtmlParser htmlParser) { | |
Queue<String> q = new LinkedList<>(); | |
q.add(startUrl); | |
Set<String> visited = new LinkedHashSet<>(); | |
ExecutorService executor; | |
while(q.size() > 0) { | |
Set<String> pendingUrls = new HashSet<>(); | |
int n = q.size(); | |
final CountDownLatch latch = new CountDownLatch(n); | |
executor = Executors.newFixedThreadPool(5); | |
while(q.size() > 0) { | |
String val = q.remove(); | |
visited.add(val); | |
executor.submit(new Runnable() { | |
@Override | |
public void run() { | |
List<String> childs = htmlParser.getUrls(val); | |
pendingUrls.addAll(childs); | |
latch.countDown(); | |
} | |
}); | |
} | |
try { | |
latch.await(); | |
} catch(Exception e) { | |
} | |
for(String pending : pendingUrls) { | |
if(!sameDomains(startUrl, pending)) continue; | |
if(!visited.contains(pending)) { // check same domain | |
q.add(pending); | |
} | |
} | |
executor.shutdown(); | |
} | |
return new ArrayList<String>(visited); | |
} | |
private boolean sameDomains(String startUrl, String value) { | |
try { | |
return new URI(startUrl).getHost().equals(new URI(value).getHost()); | |
} catch(URISyntaxException e) { | |
return false; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment