Skip to content

Instantly share code, notes, and snippets.

@djKianoosh
Created May 9, 2012 20:55
Show Gist options
  • Save djKianoosh/2648751 to your computer and use it in GitHub Desktop.
Save djKianoosh/2648751 to your computer and use it in GitHub Desktop.
Some Clojure functions to help read custom access log files into maps
(defn comment? [s]
(.startsWith s "#"))
(defn not-comment? [s]
(not (comment? s)))
(defn remove-comments [line]
(filter not-comment? line))
(defn nil-if-hyphen [s]
(if (not= s "-") s))
(defn str->int
"Returns an int if the string parses as an int, otherwise returns input unaltered"
[str]
(if (re-matches (re-pattern "\\d+\\.?\\d+") str)
(read-string str)
str))
;; #Fields: date time s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus sc-win32-status time-taken
(defn is-format-line? [s]
(= (.substring s 0 8) "#Fields:"))
(defn find-first-format-line [lines]
(first (filter is-format-line? lines)))
(defn read-format-into-keywords [s]
(map keyword (filter not-comment? (clojure.string/split s #" "))))
(defn read-format-from-file [f]
(let [file-contents (line-seq (clojure.java.io/reader (clojure.java.io/file f)))]
(read-format-into-keywords (find-first-format-line file-contents) )))
(defn zipmap-line-data
"Returns a map with the keywords mapped to data from a log line."
[columns line]
(let [line-data (map str->int (re-seq #"[^ ']+|'[^']*'" line))]
(zipmap columns line-data)))
(defn read-data-from-file [file]
(let [columns (read-format-from-file file)
file-without-comments (remove-comments (line-seq (clojure.java.io/reader (clojure.java.io/file file))))]
(map #(zipmap-line-data columns %1) file-without-comments)))
(def summarize (juxt :time :c-ip :time-taken :cs-uri))
;;; Example usage
(def data (read-data-from-file "path/to/file.log"))
(def longest200 (take 200 (reverse (sort-by :time-taken data))))
(pprint (sort (map summarize longest200)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment