-
-
Save earle/0a5266fd0ebb4db5cc43 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(def re-expn #"([^ ]*) ([^ ]*) ([^ ]*) (?:-|\[([^\]]*)\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*)") | |
(defn write-csv [resp] | |
(let [url (-> @resp :opts :url)] | |
(doseq [line (-> @resp :body string/split-lines)] | |
(println (conj (re-seq re-expn line) url)) | |
;(csv/write-csv *out* (re-seq re-expn line)) | |
) | |
) | |
) | |
(defn grab-urls [urls] | |
(let [futures (doall (map http/get urls))] | |
(doseq [resp futures] | |
(write-csv resp))) | |
) | |
(defn read-urls [filename] | |
(with-open [rdr (io/reader filename)] | |
(grab-urls (line-seq rdr))) | |
) | |
(defn -main | |
"download a list of urls pointing to access.logs and generate csv" | |
[& args] | |
;; work around dangerous default behaviour in Clojure | |
(alter-var-root #'*read-eval* (constantly false)) | |
(if (< (count args) 1) | |
(println "usage: access-logs <file1> <file2> ... <fileN>") | |
(doseq [filename args] | |
(read-urls filename))) | |
) | |
; id like the url to be inside hte vector, not a list of the url then the vector | |
(http://notebook.cowgar.com/access.log [208.115.111.72 - - [22/May/2012:00:19:43 -0400] "GET /?C=M;O=D HTTP/1.1" 200 208.115.111.72 - - 22/May/2012:00:19:43 -0400 "GET /?C=M;O=D HTTP/1.1" 200]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
try this (doto (conj (re-seq re-expn line) url) (println "-- IS ARG"))