Skip to content

Instantly share code, notes, and snippets.

@kballenegger
Created April 13, 2012 04:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save kballenegger/df78a51e447c3a7f80bf to your computer and use it in GitHub Desktop.
Save kballenegger/df78a51e447c3a7f80bf to your computer and use it in GitHub Desktop.
(ns importer.core
(:require [clj-json.core :as json]
[clojure.java.io :as io]
[clojure.tools.cli :as cli])
(:use somnium.congomongo)
(:import (com.mongodb Mongo DB DBObject)))
; global count
(def the-count (atom 0))
; mongo stuff
(def connection-info
{:host "ec2-23-21-72-55.compute-1.amazonaws.com", :port 27017})
(defn update-unique-devices [uuid app]
(update!
:unique_devices,
{:_id uuid},
{"$addToSet" {:install app}},
:upsert true))
(defn connect-to-mongo []
(set-connection!
(make-connection "chartboost" connection-info)))
; processing
(defn process-line
"Executed for each line in the big file"
[line]
(let [decoded (json/parse-string line)
uuid (get decoded "uuid")
app (get decoded "app")]
(update-unique-devices uuid app)
(swap! the-count inc)
(cond (= 0 (mod (deref the-count) 10000))
(println the-count))
nil)) ; log and return nil
; not used, reading sequencially for now
(defn read-parallel
"Processes lines in parallel from reader r (10 concurrent)"
[r]
;(pmap #(apply map process-line %) (partition 10 (line-seq r))))
(seque 50 (map process-line (line-seq r))))
(defn import-file
"Import file"
[file]
(connect-to-mongo)
(with-open [r (io/reader file)]
(doseq [line (line-seq r)]
(process-line line)))) ; read file in parallel
; main, when run from build
(defn -main [& args]
(let [[options args banner]
(cli/cli args
["--file" "which file to process"])]
(import-file "installs.3_24.json")
;(import-file "tail.json")
(shutdown-agents)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment