who are the answeringest mefites?
(ns answeringest.core | |
(:require [babbage.core :as b] | |
[babbage.provided.core :as p] | |
[clojure.java.io :as io] | |
[clj-time.format :as f] | |
[clj-time.core :as time])) | |
(def mefi-format (f/formatter "MMM d YYYY hh:mm:ss:SSSa")) | |
(defn parse-date [d] | |
(f/parse mefi-format (.replace d " " " "))) | |
(defn read-lines [input] | |
(lazy-seq | |
(when-let [line (.readLine input)] | |
(cons line (read-lines input))))) | |
(defn read-csv [filename & [processor]] | |
(let [input (io/reader (io/resource filename)) | |
processor (or processor identity)] | |
(with-open [input input] | |
(.readLine input) ;; timestamp | |
;; clojure.data.csv doesn't like the deletion reasons in | |
;; postdata_askme.txt :( | |
(let [csv-values (map #(seq (.split % "\t")) (read-lines input)) | |
names (map (comp keyword #(.replace % " " "_")) (first csv-values))] | |
(processor (map #(zipmap names %) (rest csv-values))))))) | |
(defn answer-fields [answers] | |
(map (comp #(update-in % [:datestamp] parse-date) | |
#(select-keys % [:postid :userid :datestamp])) | |
answers)) | |
(defn process-answers [answers] | |
(let [answers (answer-fields answers) | |
sets (-> (b/sets) (b/computed-set :userid)) | |
stats {:first-comment (b/stats :datestamp p/first) | |
:posts (b/stats :postid p/count-unique p/count p/first)}] | |
(b/calculate sets stats answers))) | |
(defn process-posts [posts] | |
;; how many posts came after this one? | |
(zipmap (reverse (map :postid posts)) (range))) | |
(defn process-users [users-seq] | |
(zipmap (map :userid users-seq) (map :name users-seq))) | |
(def six-months-ago (time/minus (time/now) (time/months 6))) | |
(defn calc [answers users posts] | |
(->> (dissoc answers :all) | |
(remove (fn [[k v]] | |
(-> v :first-comment :first (time/after? six-months-ago)))) | |
(map (comp (fn [m] | |
(assoc m :percent-answered (* 100 (/ (float (:questions-answered m)) (:posts-since-first-answer m))))) | |
(fn [[k v]] | |
{:user (get users k) | |
:questions-answered (-> v :posts :count-unique) | |
:answers (-> v :posts :count) | |
:posts-since-first-answer (get posts (-> v :posts :first)) | |
:first-answer (-> v :first-comment :first)}))) | |
(sort-by :percent-answered >) | |
(take 20))) | |
(defn find-answeringest [] | |
(let [user-map (read-csv "usernames.txt" process-users) | |
following-posts (read-csv "postdata_askme.txt" process-posts) | |
answer-data (read-csv "commentdata_askme.txt" process-answers)] | |
(calc answer-data user-map following-posts))) | |
(defproject answeringest "0.1.0-SNAPSHOT" | |
:description "FIXME: write description" | |
:url "http://example.com/FIXME" | |
:license {:name "Eclipse Public License" | |
:url "http://www.eclipse.org/legal/epl-v10.html"} | |
:dependencies [[org.clojure/clojure "1.5.1"] | |
[clj-time "0.9.0"] | |
[readyforzero/babbage "1.1.1"]]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment