Created
March 28, 2015 03:53
-
-
Save bwo/0976b2cb6721672fbb3d to your computer and use it in GitHub Desktop.
who are the answeringest mefites?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns answeringest.core | |
(:require [babbage.core :as b] | |
[babbage.provided.core :as p] | |
[clojure.java.io :as io] | |
[clj-time.format :as f] | |
[clj-time.core :as time])) | |
(def mefi-format (f/formatter "MMM d YYYY hh:mm:ss:SSSa")) | |
(defn parse-date [d] | |
(f/parse mefi-format (.replace d " " " "))) | |
(defn read-lines [input] | |
(lazy-seq | |
(when-let [line (.readLine input)] | |
(cons line (read-lines input))))) | |
(defn read-csv [filename & [processor]] | |
(let [input (io/reader (io/resource filename)) | |
processor (or processor identity)] | |
(with-open [input input] | |
(.readLine input) ;; timestamp | |
;; clojure.data.csv doesn't like the deletion reasons in | |
;; postdata_askme.txt :( | |
(let [csv-values (map #(seq (.split % "\t")) (read-lines input)) | |
names (map (comp keyword #(.replace % " " "_")) (first csv-values))] | |
(processor (map #(zipmap names %) (rest csv-values))))))) | |
(defn answer-fields [answers] | |
(map (comp #(update-in % [:datestamp] parse-date) | |
#(select-keys % [:postid :userid :datestamp])) | |
answers)) | |
(defn process-answers [answers] | |
(let [answers (answer-fields answers) | |
sets (-> (b/sets) (b/computed-set :userid)) | |
stats {:first-comment (b/stats :datestamp p/first) | |
:posts (b/stats :postid p/count-unique p/count p/first)}] | |
(b/calculate sets stats answers))) | |
(defn process-posts [posts] | |
;; how many posts came after this one? | |
(zipmap (reverse (map :postid posts)) (range))) | |
(defn process-users [users-seq] | |
(zipmap (map :userid users-seq) (map :name users-seq))) | |
(def six-months-ago (time/minus (time/now) (time/months 6))) | |
(defn calc [answers users posts] | |
(->> (dissoc answers :all) | |
(remove (fn [[k v]] | |
(-> v :first-comment :first (time/after? six-months-ago)))) | |
(map (comp (fn [m] | |
(assoc m :percent-answered (* 100 (/ (float (:questions-answered m)) (:posts-since-first-answer m))))) | |
(fn [[k v]] | |
{:user (get users k) | |
:questions-answered (-> v :posts :count-unique) | |
:answers (-> v :posts :count) | |
:posts-since-first-answer (get posts (-> v :posts :first)) | |
:first-answer (-> v :first-comment :first)}))) | |
(sort-by :percent-answered >) | |
(take 20))) | |
(defn find-answeringest [] | |
(let [user-map (read-csv "usernames.txt" process-users) | |
following-posts (read-csv "postdata_askme.txt" process-posts) | |
answer-data (read-csv "commentdata_askme.txt" process-answers)] | |
(calc answer-data user-map following-posts))) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defproject answeringest "0.1.0-SNAPSHOT" | |
:description "FIXME: write description" | |
:url "http://example.com/FIXME" | |
:license {:name "Eclipse Public License" | |
:url "http://www.eclipse.org/legal/epl-v10.html"} | |
:dependencies [[org.clojure/clojure "1.5.1"] | |
[clj-time "0.9.0"] | |
[readyforzero/babbage "1.1.1"]]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment