Skip to content

Instantly share code, notes, and snippets.

Created March 28, 2015 03:53
What would you like to do?
who are the answeringest mefites?
(ns answeringest.core
(:require [babbage.core :as b]
[babbage.provided.core :as p]
[ :as io]
[clj-time.format :as f]
[clj-time.core :as time]))
(def mefi-format (f/formatter "MMM d YYYY hh:mm:ss:SSSa"))
(defn parse-date [d]
(f/parse mefi-format (.replace d " " " ")))
(defn read-lines [input]
(when-let [line (.readLine input)]
(cons line (read-lines input)))))
(defn read-csv [filename & [processor]]
(let [input (io/reader (io/resource filename))
processor (or processor identity)]
(with-open [input input]
(.readLine input) ;; timestamp
;; doesn't like the deletion reasons in
;; postdata_askme.txt :(
(let [csv-values (map #(seq (.split % "\t")) (read-lines input))
names (map (comp keyword #(.replace % " " "_")) (first csv-values))]
(processor (map #(zipmap names %) (rest csv-values)))))))
(defn answer-fields [answers]
(map (comp #(update-in % [:datestamp] parse-date)
#(select-keys % [:postid :userid :datestamp]))
(defn process-answers [answers]
(let [answers (answer-fields answers)
sets (-> (b/sets) (b/computed-set :userid))
stats {:first-comment (b/stats :datestamp p/first)
:posts (b/stats :postid p/count-unique p/count p/first)}]
(b/calculate sets stats answers)))
(defn process-posts [posts]
;; how many posts came after this one?
(zipmap (reverse (map :postid posts)) (range)))
(defn process-users [users-seq]
(zipmap (map :userid users-seq) (map :name users-seq)))
(def six-months-ago (time/minus (time/now) (time/months 6)))
(defn calc [answers users posts]
(->> (dissoc answers :all)
(remove (fn [[k v]]
(-> v :first-comment :first (time/after? six-months-ago))))
(map (comp (fn [m]
(assoc m :percent-answered (* 100 (/ (float (:questions-answered m)) (:posts-since-first-answer m)))))
(fn [[k v]]
{:user (get users k)
:questions-answered (-> v :posts :count-unique)
:answers (-> v :posts :count)
:posts-since-first-answer (get posts (-> v :posts :first))
:first-answer (-> v :first-comment :first)})))
(sort-by :percent-answered >)
(take 20)))
(defn find-answeringest []
(let [user-map (read-csv "usernames.txt" process-users)
following-posts (read-csv "postdata_askme.txt" process-posts)
answer-data (read-csv "commentdata_askme.txt" process-answers)]
(calc answer-data user-map following-posts)))
(defproject answeringest "0.1.0-SNAPSHOT"
:description "FIXME: write description"
:url ""
:license {:name "Eclipse Public License"
:url ""}
:dependencies [[org.clojure/clojure "1.5.1"]
[clj-time "0.9.0"]
[readyforzero/babbage "1.1.1"]])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment