Skip to content

Instantly share code, notes, and snippets.

@earle
Last active December 29, 2015 15:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save earle/0a5266fd0ebb4db5cc43 to your computer and use it in GitHub Desktop.
Save earle/0a5266fd0ebb4db5cc43 to your computer and use it in GitHub Desktop.
(def re-expn #"([^ ]*) ([^ ]*) ([^ ]*) (?:-|\[([^\]]*)\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*)")
(defn write-csv [resp]
(let [url (-> @resp :opts :url)]
(doseq [line (-> @resp :body string/split-lines)]
(println (conj (re-seq re-expn line) url))
;(csv/write-csv *out* (re-seq re-expn line))
)
)
)
(defn grab-urls [urls]
(let [futures (doall (map http/get urls))]
(doseq [resp futures]
(write-csv resp)))
)
(defn read-urls [filename]
(with-open [rdr (io/reader filename)]
(grab-urls (line-seq rdr)))
)
(defn -main
"download a list of urls pointing to access.logs and generate csv"
[& args]
;; work around dangerous default behaviour in Clojure
(alter-var-root #'*read-eval* (constantly false))
(if (< (count args) 1)
(println "usage: access-logs <file1> <file2> ... <fileN>")
(doseq [filename args]
(read-urls filename)))
)
; id like the url to be inside hte vector, not a list of the url then the vector
(http://notebook.cowgar.com/access.log [208.115.111.72 - - [22/May/2012:00:19:43 -0400] "GET /?C=M;O=D HTTP/1.1" 200 208.115.111.72 - - 22/May/2012:00:19:43 -0400 "GET /?C=M;O=D HTTP/1.1" 200])
@noisesmith
Copy link

try this (doto (conj (re-seq re-expn line) url) (println "-- IS ARG"))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment