-
-
Save kballenegger/df78a51e447c3a7f80bf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns importer.core | |
(:require [clj-json.core :as json] | |
[clojure.java.io :as io] | |
[clojure.tools.cli :as cli]) | |
(:use somnium.congomongo) | |
(:import (com.mongodb Mongo DB DBObject))) | |
; global count | |
(def the-count (atom 0)) | |
; mongo stuff | |
(def connection-info | |
{:host "ec2-23-21-72-55.compute-1.amazonaws.com", :port 27017}) | |
(defn update-unique-devices [uuid app] | |
(update! | |
:unique_devices, | |
{:_id uuid}, | |
{"$addToSet" {:install app}}, | |
:upsert true)) | |
(defn connect-to-mongo [] | |
(set-connection! | |
(make-connection "chartboost" connection-info))) | |
; processing | |
(defn process-line | |
"Executed for each line in the big file" | |
[line] | |
(let [decoded (json/parse-string line) | |
uuid (get decoded "uuid") | |
app (get decoded "app")] | |
(update-unique-devices uuid app) | |
(swap! the-count inc) | |
(cond (= 0 (mod (deref the-count) 10000)) | |
(println the-count)) | |
nil)) ; log and return nil | |
; not used, reading sequencially for now | |
(defn read-parallel | |
"Processes lines in parallel from reader r (10 concurrent)" | |
[r] | |
;(pmap #(apply map process-line %) (partition 10 (line-seq r)))) | |
(seque 50 (map process-line (line-seq r)))) | |
(defn import-file | |
"Import file" | |
[file] | |
(connect-to-mongo) | |
(with-open [r (io/reader file)] | |
(doseq [line (line-seq r)] | |
(process-line line)))) ; read file in parallel | |
; main, when run from build | |
(defn -main [& args] | |
(let [[options args banner] | |
(cli/cli args | |
["--file" "which file to process"])] | |
(import-file "installs.3_24.json") | |
;(import-file "tail.json") | |
(shutdown-agents))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment