Skip to content

Instantly share code, notes, and snippets.

@sunng87
Created October 28, 2011 07:34
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sunng87/1321815 to your computer and use it in GitHub Desktop.
Save sunng87/1321815 to your computer and use it in GitHub Desktop.
clojure code for mining reddit upvotes
(ns reddit-visualization.core
(:refer-clojure :exclude [replace])
(:use [clojure.pprint])
(:use [clojure.string :only [lower-case replace split]])
(:require [clojure.java.jdbc :as jdbc])
(:require [reddit.clj.core :as reddit])
(:import [javax.swing JFrame])
(:import [org.jfree.chart ChartFactory ChartPanel])
(:import [org.jfree.data.general DefaultPieDataset])
(:import [org.jfree.util SortOrder]))
(def reddit-client (reddit/login nil nil))
(def db {:classname "org.hsqldb.jdbc.JDBCDriver"
:subprotocol "hsqldb"
:subname "testdb"
:user "SA"
:password ""})
(def table-name :reddits)
(def prog-subreddits #{"programming" "python" "groovy" "java", "javascript", "node", "clojure", "webdev","web_design" "linux"})
(defn create-db []
(jdbc/with-connection db
(jdbc/create-table table-name
[:name "VARCHAR(32)" "PRIMARY KEY"]
[:submitter "VARCHAR(32)"]
[:title "VARCHAR(512)"]
[:subreddit "VARCHAR(32)"]
[:ups :int]
[:downs :int]
[:comments :int])))
(defn truncate-db []
(jdbc/with-connection db
(jdbc/drop-table table-name)))
(defn save-reddits [reddits]
(jdbc/with-connection db
(dorun (map #(jdbc/insert-record table-name
{:name (:name %)
:submitter (:author %)
:title (:title %)
:subreddit (lower-case (:subreddit %))
:ups (:ups %)
:downs (:downs %)
:comments (:num_comments %)}) reddits))))
(defn load-reddits []
(jdbc/with-connection db
(jdbc/with-query-results results
[(str "select * from " (name table-name))]
(into [] results))))
(defn records-count []
(jdbc/with-connection db
(jdbc/with-query-results results
[(str "select count(*) from " (name table-name))]
(:c1 (first (into [] results))))))
(defn query-top-submitter []
(jdbc/with-connection db
(jdbc/with-query-results results
[(str "SELECT submitter, count(*) as c FROM " (name table-name) " GROUP BY submitter ORDER BY c DESC LIMIT 10")]
(into [] results))))
(defn map-reddits [user after-id]
(reddit/user-liked reddit-client user 25 after-id))
(defn map-most-recent-reddits [user totalsize]
(loop [after-id nil
reddits '()]
(if (>= (count reddits) totalsize)
reddits
(let [some-reddits (map-reddits user after-id)]
(if (empty? some-reddits)
(concat reddits some-reddits)
(recur (:name (last some-reddits)) (concat reddits some-reddits)))))))
(defn filter-prog-reddits [reddits]
(filter #(contains? prog-subreddits (lower-case (:subreddit %))) reddits))
(defn filtered-title-tokens [reddits]
(flatten
(map
#(split
(replace
(lower-case (:title %))
#"\+|=|~|!|\?|:|;|\.|,|\"|\'|\(|\)|\[|\]|\{|\}"
" ")
#"\s+")
reddits)))
(defn count-into-map [data key]
(assoc data key (inc (get data key 0))))
(defn sort-by-value [m]
(into (sorted-map-by
(fn [k1 k2]
(let [v1 (m k1)
v2 (m k2)]
(cond
(= v1 v2) (compare k1 k2)
(< v1 v2) 1
:else -1))))
m))
(defn ignore-common-tokens [m]
(apply (partial dissoc m)
["the" "a" "to" "and" "or" "is" "in" "for" "of" "with"
"i" "on" "s" "1" "you" "3" "2" "-" "that" "what" "your" "my"
"new" "this" "it" "how" "an" "be" "by" "not" "5" "why"
"now" "using" "from" "has" "just" "7" "0" "at" "use" "all"]))
(defn reduce-token-count [tokens]
(sort-by-value
(ignore-common-tokens
(reduce count-into-map {} tokens))))
(defn reduce-subreddits-count [all-reddits]
(let [all-subreddits (map #(:subreddit %) all-reddits)]
(reduce count-into-map {} all-subreddits)))
(defn create-dataset [subreddits-count-map]
(let [ds (DefaultPieDataset.)]
(dorun (map #(.setValue ds (key %) (val %)) subreddits-count-map))
(.sortByValues ds SortOrder/DESCENDING)
ds))
(defn create-chart-pane [dataset]
(let [chart (ChartFactory/createPieChart3D
"Upvotes in subreddits" dataset true true false)]
(ChartPanel. chart)))
(defn start-jframe [chart-panel]
(doto (JFrame. "Upvotes in subreddits")
(.setContentPane chart-panel)
(.pack)
(.setVisible true)))
(defn -main [& args]
(start-jframe
(create-chart-pane
(create-dataset
(let [subreddits-summary
(reduce-subreddits-count
(map-most-recent-reddits "sunng" 1000))]
(pprint subreddits-summary)
subreddits-summary)))))
(defproject reddit-visualization "1.0.0-SNAPSHOT"
:description "FIXME: write description"
:dependencies [[org.clojure/clojure "1.3.0"]
[org.clojure/java.jdbc "0.1.0"]
[org.hsqldb/hsqldb "2.2.4"]
[reddit.clj "0.3.1"]
[jfree/jfreechart "1.0.13"]]
:main reddit-visualization.core)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment