Skip to content

Instantly share code, notes, and snippets.

@jmoyers
Last active August 8, 2020 03:04
Show Gist options
  • Save jmoyers/242c1c68adb549849714f1c715594cd7 to your computer and use it in GitHub Desktop.
Save jmoyers/242c1c68adb549849714f1c715594cd7 to your computer and use it in GitHub Desktop.
Futures vs Shared Memory/Locks/Conditional Variable
#include <future>
#include <iostream>
#include <string>
#include <unordered_set>
#include <queue>
using namespace std;
class Solution {
public:
string get_domain(string &url) {
auto pos = url.find('/', 7);
return url.substr(7, pos - 7);
}
vector<string> crawl(string startUrl, HtmlParser htmlParser) {
unordered_set<string> visited{startUrl};
queue<future<vector<string>>> futures;
auto domain = get_domain(startUrl);
futures.push(std::async(launch::async, &HtmlParser::getUrls, &htmlParser, startUrl));
while (!futures.empty()) {
auto urls = futures.front().get();
futures.pop();
for (auto &r : urls) {
if (get_domain(r) == domain and visited.find(r) == visited.end()) {
visited.insert(r);
futures.push(std::async(launch::async, &HtmlParser::getUrls, &htmlParser, r));
}
}
}
vector<string> results(visited.begin(), visited.end());
return results;
}
};
#include <algorithm>
#include <condition_variable>
#include <iostream>
#include <mutex>
#include <string>
#include <thread>
#include <unordered_set>
#include <vector>
using namespace std;
class Solution {
private:
string hostname;
int pending = 0;
mutex work_mutex;
condition_variable work_available;
vector<string> crawlable;
unordered_set<string> visited;
HtmlParser *parser;
string get_hostname(string url) {
return url.substr(7, url.find('/', 7) - 7);
}
bool check_hostname(string url, string hostname) {
return get_hostname(url) == hostname;
}
void worker() {
string url;
while (true) {
{
// deque a url to crawl
unique_lock lock(work_mutex);
work_available.wait(lock, [&]() {
return !crawlable.empty() || (crawlable.empty() && pending == 0);
});
if (crawlable.empty()) {
return;
}
url = crawlable.back();
crawlable.pop_back();
pending++;
}
vector<string> new_urls = parser->getUrls(url);
{
scoped_lock lock(work_mutex);
for (auto &result : new_urls) {
if (visited.find(result) == visited.end() and
check_hostname(result, hostname)) {
crawlable.push_back(result);
visited.insert(result);
}
}
pending--;
work_available.notify_all();
}
}
}
public:
vector<string> crawl(string startUrl, HtmlParser htmlParser) {
crawlable.push_back(startUrl);
visited.insert(startUrl);
hostname = get_hostname(startUrl);
parser = &htmlParser;
int num_threads = thread::hardware_concurrency();
vector<thread> threads;
for (int i = 0; i < num_threads; i++) {
threads.emplace_back(&Solution::worker, this);
}
work_available.notify_all();
for (auto &t : threads)
t.join();
vector<string> results(visited.begin(), visited.end());
return results;
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment