Skip to content

Instantly share code, notes, and snippets.

@drone29a
Created January 6, 2011 18:27
Show Gist options
  • Save drone29a/768301 to your computer and use it in GitHub Desktop.
Save drone29a/768301 to your computer and use it in GitHub Desktop.
(import java.util.concurrent.Executors)
(use '[plumbing.core :only [wait-until with-log]])
(require '[work.core :as work]
'[fetcher.core :as fetcher])
(def test-feed-urls
["http://daringfireball.net/index.xml" "http://www.rollingstone.com/siteServices/rss/allNews" "http://feeds.feedburner.com/iamwhitePosterous" "http://www.9to5mac.com/feed" "http://abovethecrowd.com/feed/" "http://feeds.feedburner.com/pulsosocial" "http://www.bytearray.org/?feed=rss2" "http://usabilitytestinghowto.blogspot.com/feeds/posts/default" "http://mgalligan.com/rss" "http://feeds.searchengineland.com/searchcap" "http://feeds.feedburner.com/palletops" "http://googledevjp.blogspot.com/feeds/posts/default" "http://blog.boxee.tv/feed/" "http://twitgoo.com/1r6osl/?format=rss" "http://davidrcole.com/feed/" "http://milancermak.posterous.com/rss.xml" "http://typedia.com/blog/feed/" "http://diveintohtml5.org//hg.diveintohtml5.org/hgweb.cgi/atom-log" "http://www.curious-creature.org/feed/" "http://networkeffect.allthingsd.com/feed/" "http://feeds.feedburner.com/garry" "http://feeds.feedburner.com/devourfeed" "http://www.princeton.edu/main/feed.xml" "http://www.mdshakes.org/feed/" "http://blog.javorek.net/feed/" "http://www.dataists.com/feed/" "http://kinecthacks.net/feed/" "http://www.dailymile.com/opensearch.xml" "http://feeds.g4tv.com/g4tv/thefeed" "http://www.good.is/rss/main" "http://cityroom.blogs.nytimes.com/feed/" "http://store.androidandme.com/rss/rssnewitems.php" "http://www.whedonesque.com/rss.xml.php" "http://www.cs.umass.edu/frontpage/feed" "http://scripting.com/rss.xml" "http://cms.myspacecdn.com/cms/api/opensearch_people.xml" "http://frozencanuck.wordpress.com/feed/" "http://hbaseblog.com/feed/" "http://www.trueventures.com/xmlrpc.php" "http://opinionator.blogs.nytimes.com/feed/" "http://www.kurzweilai.net/xmlrpc.php?rsd" "http://www.railway-technology.com/news-rss.xml" "http://heartbreaknympho.com/feed/" "http://mockupstogo.net/rss.xml" "https://github.com/buildbox/contentcheck-maven-plugin/commits/master.atom" "http://feliciaday.com/feed" "http://blog.makezine.com/archive/make_store/index.xml" "http://www.zdnet.com/search?t=1,7&mode=rss" "http://www.techdirt.com/techdirt_rss.xml" "http://www.forkparty.com/feed/" "http://feeds.feedburner.com/GoogleOpenSourceBlog" "http://crazybob.org/roller/rss/crazybob" "http://blogs.strat-cons.com/?feed=rss2" "http://wp.appadvice.com/xmlrpc.php" "http://answers.onstartups.com/feeds/question/329" "http://sleepinghedgehog.com/feed/" "http://www.heureka.cz/direct/firefox/search.xml" "http://x264dev.multimedia.cx/feed" "http://allthestuffido.com/rss.xml" "http://lab.andre-michelle.com/feed/" "http://beervana.blogspot.com/feeds/posts/default" "http://battellemedia.com/index.xml" "http://www.letsredu.com/xmlrpc.php" "http://joeposnanski.blogspot.com/feeds/posts/default" "http://feeds.feedburner.com/5by5" "http://picasaweb.google.com/data/feed/base/user/ukazteiphone/albumid/5538997136458996465?alt=rss&kind=photo&hl=en_US" "http://www.investorplace.com/feed/" "http://design-daily.com/feed/" "http://www.podnikanivusa.com/feed/" "https://github.com/dochang/asia/commits/master.atom" "http://aria42.wordpress.com/feed/" "http://feeds.venturehacks.com/venturehacks" "http://insidescoopsf.sfgate.com/paololucchesi/feed/" "http://blog.coderanger.net/atom.xml" "http://www.blogger.com/feeds/20663591/posts/default" "http://www.aptima.com/news.xml" "http://politics.nytimes.com/congress/votes/house/atom" "http://feeds.feedburner.com/KevinKelly" "http://pixycz.posterous.com/rss.xml" "http://www.letemsvetemapplem.eu/wp-includes/wlwmanifest.xml" "http://www.asjava.com/feed/" "http://blog.summation.net/rss.xml" "http://corduroyclub.com/feed" "http://www.w3.org/News/atom.xml" "http://wiselivingblog.com/feed/" "http://arronla.com/feed/" "http://feeds.feedburner.com/seomoz" "http://www.tmcamp.com/feed/" "http://feeds.feedburner.com/scoutmob/nyc" "http://streeteasy.com/meta/nyc_opensearch_plugin.xml" "http://blog.getcloudapp.com/feed.xml" "http://www.cindyalvarez.com/feed/" "http://www.mactalk.com.au/feed/" "http://cdixon.org/feed/" "http://blog.backblaze.com/feed/" "http://blog.mozilla.com/feed/" "http://www.adafruit.com/blog/feed/" "atom" "http://alanvanroemburg.tumblr.com/rss" "http://www.funnyordie.com/videos.rss" "http://hellohealth.com/feed/" "http://www.tofugu.com/feed/"])
(defn fu-sync
[producer num-threads]
(fn [] (let [finished (atom [])
p (Executors/newFixedThreadPool num-threads)
input (take-while #(not (nil? %)) (repeatedly producer))]
(doseq [feed input]
(let [^Runnable fx (fn []
(slurp (:url feed))
(swap! finished conj (:url feed)))]
(.submit p fx)))
{:p p :f finished})))
(defn queue-input
[count]
(let [xs (atom (take count (cycle test-feed-urls)))]
(fn []
(let [x (first @xs)]
(swap! xs rest)
(if x {:key x :url x}
nil)))))
(defn fu-async
[producer]
(fn [] (let [finished (atom [])
work-q (work/queue-work {:f (with-log :error fetcher/fetch)
:in producer
:out (fn [x] (swap! finished conj (:url x)))
:threads (work/available-processors)
:exec work/async})]
{:p work-q :f finished})))
(defn time-reqs
[f num secs]
(let [{finished :f
pool :p} (f)]
(time (wait-until #(>= (count @finished) num) secs))
(.shutdown pool)
(println (format "Fetched %d of %d URLs." (count @finished) num))))
;;; Example
;;; ;; Asynchronously request 1000 URLs, see how long it takes to get 900 responses,
;;; ;; but don't wait longer than 60 seconds.
;;; (time-reqs (fu-async (queue-input 1000)) 900 60)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment