Created
May 6, 2013 01:13
-
-
Save i-blis/5522826 to your computer and use it in GitHub Desktop.
Building a lazy sequence out of a given subreddit's entries
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns playground.experiments.lazy-reddit | |
(:require [net.cgrand.enlive-html :as html])) | |
(defn subreddit-url [name] | |
(str "http://www.reddit.com/r/" name)) | |
(defn fetch-page [url] | |
(html/html-resource (java.net.URL. url))) | |
(defn make-integer [n] | |
(try | |
(Integer. n) | |
(catch Exception e 0))) | |
(defn page-entries [url] | |
(let [page (fetch-page url) | |
things (html/select page [:div.thing])] | |
(map #(hash-map | |
:title (-> % (html/select [:a.title]) first html/text) | |
:score (-> % (html/select [:div.score.unvoted]) first html/text make-integer)) | |
things))) | |
(defn next-url [url] | |
(let [page (fetch-page url)] | |
(-> page (html/select [:p.nextprev (html/attr-has :rel "next")]) first :attrs :href))) | |
(defn entries [url] | |
(lazy-cat (page-entries url) (entries (next-url url)))) | |
(defn subreddit [name] | |
(-> name subreddit-url entries)) | |
;; (take 30 (subreddit "clojure")) | |
;; (take-while #(<= (:score %) 6) (subreddit "clojure")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'open-uri' | |
require 'nokogiri' | |
class Reddit | |
def initialize(subbredit) | |
@url = "http://www.reddit.com/r/" + subbredit.downcase | |
@entries = [] | |
end | |
def entries | |
Enumerator::Lazy.new(1..Float::INFINITY) do |yielder| | |
if @entries.empty? | |
parse | |
else | |
yielder << @entries.shift | |
end | |
end | |
end | |
def reset | |
@url.gsub!(/\?.*/, '') | |
@entries = [] | |
end | |
private | |
def parse | |
page = Nokogiri::HTML(open(@url)) | |
@url = page.css('p.nextprev a[rel="nofollow next"]').first['href'] | |
page.css('div.thing').each do |thing| | |
title = thing.css('a.title').text | |
points = thing.css('div.score.unvoted').text.to_i | |
@entries << { :title => title, :points => points } | |
end | |
end | |
end | |
# r = Reddit.new('ruby') | |
# r.entries.first(30) | |
# r.entries.take_while { |e| e[:points] <= 20}.all | |
# r.reset | |
# r.entries(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defproject playground/experiments "0.1.0" | |
:description "Experimenting with Clojure for great fun" | |
:dependencies [[org.clojure/clojure "1.5.1"] | |
[com.cemerick/pomegranate "0.2.0"] | |
[enlive "1.1.1"]]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment