Skip to content

Instantly share code, notes, and snippets.

@mattdeboard
Created April 11, 2013 01:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mattdeboard/5359853 to your computer and use it in GitHub Desktop.
Save mattdeboard/5359853 to your computer and use it in GitHub Desktop.
(ns bucketlogs.core
(:use clojure.java.io))
(defn re-groupmap
"Return a hash-map of group names and matches generated by computing regex
matches for named groups."
[ptn names s]
(let [matcher (.matcher ptn sample)]
(.find matcher)
(let [groups (for [name names]
[(keyword name) (try (.group matcher name)
(catch IllegalStateException e
nil)
(catch IllegalArgumentException e
"No group found"))])]
(into (hash-map) groups))))
(def groups ["ownerid" "bucketname" "timestamp" "remotehost" "requester"
"requestid" "operation" "key" "requesturi" "status" "error"
"bytessent" "objectsize" "totaltime" "turnaround" "referrer"
"useragent"])
(def ptn (re-pattern
(str "(?s)^(?<ownerid>[\\w\\d]+) "
"(?<bucketname>[\\w\\d-]+) "
"\\[(?<timestamp>\\d{2}/[A-Za-z]{3}/[\\d:]+) [\\-\\+]\\d{4}\\] "
"(?<remotehost>[\\d\\., -]+) "
"(?<requester>[\\w\\d\\:/]+) "
"(?<requestid>\\w+) "
"(?<operation>[A-Z\\.]+) "
"(?<key>[\\w/\\.]+) "
"\\\"(?<requesturi>[\\w/\\. ]+)\\\" "
"(?<status>\\d{2,3}) "
"(?<error>[\\w\\-]+) "
"(?<bytessent>\\d+|\\-) "
"(?<objectsize>\\d+) "
"(?<totaltime>\\d+) "
"(?<turnaround>\\d+) "
"\\\"(?<referrer>[^\\\"\\s]+|\\-)\\\" "
"\\\"(?<useragent>[^\\\"]+)\\\" *")))
(with-open [rdr (reader "resources/sample.log")]
(doall (map (partial re-groupmap ptn groups) (line-seq rdr))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment