Skip to content

Instantly share code, notes, and snippets.

@jackson-rz
Created November 22, 2019 14:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jackson-rz/718751c30dc267a85c3b4f0aa130f2ea to your computer and use it in GitHub Desktop.
Save jackson-rz/718751c30dc267a85c3b4f0aa130f2ea to your computer and use it in GitHub Desktop.
Multithreaded web Crawler
class Solution {
public List<String> crawl(String startUrl, HtmlParser htmlParser) {
// find hostname
int index = startUrl.indexOf('/', 7);
String hostname = (index != -1) ? startUrl.substring(0, index) : startUrl;
// multi-thread
Crawler crawler = new Crawler(startUrl, hostname, htmlParser);
crawler.result = new HashSet<>(); // reset result as static property belongs to class, it will go through all of the test cases
Thread thread = new Thread(crawler);
thread.start();
crawler.joinThread(thread); // wait for thread to complete
return new ArrayList<>(crawler.result);
}
}
class Crawler implements Runnable {
String startUrl;
String hostname;
HtmlParser htmlParser;
public static volatile Set<String> result = new HashSet<>();
public Crawler(String startUrl, String hostname, HtmlParser htmlParser){
this.startUrl = startUrl;
this.hostname = hostname;
this.htmlParser = htmlParser;
}
@Override
public void run(){
if(this.startUrl.startsWith(hostname) && !this.result.contains(this.startUrl)){
addUrl(this.result, this.startUrl);
List<Thread> threads = new ArrayList<>();
for(String s: htmlParser.getUrls(startUrl)){
Crawler crawler = new Crawler(s, hostname, htmlParser);
Thread thread = new Thread(crawler);
thread.start();
threads.add(thread);
}
for(Thread t: threads){
joinThread(t); // wait for all threads to complete
}
}
}
public static synchronized void addUrl(Set<String> result, String url){
result.add(url);
}
public static void joinThread(Thread thread){
try{
thread.join();
} catch(InterruptedException e){
e.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment