Created
July 12, 2020 01:01
-
-
Save drewverlee/5c70dce76791f9dad838f418311d9dc1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns drewverlee.webcrawler | |
(:require [clojure.core.async | |
:as a | |
:refer [>! <! >!! <!! go chan buffer close! thread | |
onto-chan | |
alts! alts!!]])) | |
;; my version | |
(let [url->urls {:a [:b :e] | |
:c [:a] | |
:d [:e]} | |
urls (chan 10) | |
timeout (a/timeout 1000)] | |
(onto-chan urls (keys url->urls) false) | |
(loop [[url _] (alts!! [urls timeout]) | |
seen? #{}] | |
(cond | |
(nil? url) (println "crawled: " seen?) | |
(seen? url) (recur (alts!! [urls timeout]) seen?) | |
:else | |
(do | |
(println "crawl: " url) | |
(onto-chan urls (url->urls url) false) | |
(recur | |
(alts!! [urls timeout]) | |
(conj seen? url)))))) | |
;; Andrian Smith's version | |
(let [url->urls {:a [:b] | |
:b [:d] | |
:d [:a]} | |
to-download (chan 10) | |
downloaded (chan 10)] | |
(go | |
(loop [] | |
(when-let [url (<! to-download)] | |
(>! downloaded [url | |
(<! (thread (get url->urls url [])))]) | |
(recur)))) | |
(go | |
(loop [to-visit (set (keys url->urls)) | |
pending? #{} | |
seen? #{}] | |
(prn to-visit pending? seen?) | |
(if (and (empty? pending?) | |
(empty? to-visit)) | |
seen? | |
(let [next-url (first to-visit) | |
ports (if next-url | |
[[to-download next-url] | |
downloaded] | |
[downloaded]) | |
[val port] (alts! ports)] | |
(cond | |
(= port to-download) | |
(recur (disj to-visit next-url) | |
(conj pending? next-url) | |
seen?) | |
(= port downloaded) | |
(let [[from-url to-urls] val] | |
(recur (into to-visit (remove #(or (pending? %) | |
(seen? %))) | |
to-urls) | |
(disj pending? from-url) | |
(conj seen? from-url))))))))) | |
(let [c1 (chan) | |
c2 (chan)] | |
(go (println (<! c2))) | |
(let [[value channel] (alts!! [c1 [c2 "put!"]])] | |
(println value) | |
(= channel c2))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment