Skip to content

Instantly share code, notes, and snippets.

@jskripsky
Created February 22, 2011 13:48
Show Gist options
  • Save jskripsky/838678 to your computer and use it in GitHub Desktop.
Save jskripsky/838678 to your computer and use it in GitHub Desktop.
Extract columns from Apache access logs
open System
open System.Text.RegularExpressions
let separator = Regex(@" (?=(?:[^\""]*\""[^\""]*\"")*(?![^\""]*\""))")
let split s = separator.Split s
let matches = Regex(@"([^\s""\[]\S*|"".*?""|\[.*?\])").Matches
let value (m: Match) = m.Value
let trim (separator: char) (s: string) = s.Trim separator;;
let split s = matches s |> Seq.cast |> Seq.map (value >> trim '"') |> Seq.toList
let toMap (keys: string list) (vals: string list) = Map.ofList (List.zip keys vals)
let apacheLogKeys = ["vhost"; "client"; "u1"; "u2"; "time"; "req"; "code"; "bytes"; "ref"; "agent"]
let apacheLogToMap = toMap apacheLogKeys
let test = "www.text.ch 217.69.134.177 - - [13/Feb/2011:02:08:03 +0100] \"GET / HTTP/1.0\" 200 26682 \"-\" \"Mail.Ru/1.0\""
apacheLogToMap test
(* %v %h %l %u %t \"%r\" %>s %b "%{Referer}i" "%{User-Agent}i" *)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment