Skip to content

Instantly share code, notes, and snippets.

@alistairtucker
Created April 11, 2012 07:35
Show Gist options
  • Save alistairtucker/2357604 to your computer and use it in GitHub Desktop.
Save alistairtucker/2357604 to your computer and use it in GitHub Desktop.
(ns twitter.edfringe
(:require [clojure.java.io :as jio])
(:require [clojure.contrib.io :as cio])
(:require [clojure.contrib.json :as json]))
(def dir-name "/Users/ali/twitter/#edfringe")
(defn out-files [dir-name]
(let [dir (jio/file dir-name)]
(map #(str (jio/file dir %))
(sort (filter #(.startsWith % "out") (.list dir))))))
(defn tweet-seq [dir-name]
(map json/read-json
(mapcat #(with-open [r (jio/reader %)] (doall (line-seq r)))
(out-files dir-name))))
(defn parse-status [s]
{:created-at (:created-at s)
:user-id (get-in s [:user :id])
:wave-id (if-let [r (:retweeted-status s)] (:id r) (:id s))})
(defn id-map [id-coll]
(reduce (fn [m i] (assoc m i (inc (count m))))
{}
(into (sorted-set) id-coll)))
(defn transformed-seq [dir-name]
(let [raw-seq (map parse-status (tweet-seq dir-name))
user-id-map (id-map (map :user-id raw-seq))
wave-id-map (id-map (map :wave-id raw-seq))]
(for [j raw-seq]
{:created-at (:created-at j)
:user-id (user-id-map (:user-id j))
:wave-id (wave-id-map (:wave-id j))})))
(cio/with-out-writer "/Users/ali/Documents/MATLAB/tuwittuwu/edfringe3.txt"
(doseq [j (transformed-seq dir-name)] (println (:created-at j) (:user-id j) (:wave-id j))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment